In [1]:
from IPython.core.display import HTML
with open('../style.css') as file:
    css = file.read()
HTML(css)

# An Array-Based Implementation of Dual-Pivot-Quicksort

In [2]:
import random as rnd

The function $\texttt{sort}(L)$ sorts the list $L$ in place.

In [3]:
def sort(L):
    quickSort(0, len(L) - 1, L)

The function $\texttt{quickSort}(a, b, L)$ sorts the sublist $L[a:b+1]$ in place.

In [4]:
def quickSort(a, b, L):
    if b <= a:
        return  # at most one element, nothing to do
    x1, x2 = L[a], L[b]
    if x1 > x2:
        swap(a, b, L)
    m1, m2 = partition(a, b, L)  # m1 and m2 are the split indices
    quickSort(a, m1 - 1, L)
    if L[m1] != L[m2]:
        quickSort(m1 + 1, m2 - 1, L)
    quickSort(m2 + 1, b, L)

The function $\texttt{partition}(\texttt{start}, \texttt{end}, L)$ returns two indices $m_1$ and $m_2$ 
into the list $L$ and regroups the elements of $L$ such that after the function returns the following holds:
 
  - $\forall i \in \{\texttt{start}, \cdots, m_1-1\} : L[i]    <   L[m_1]$,
  - $\forall i \in \{ m_1+1, \cdots, m_2-1 \}        : L[m_1] \leq L[i] \leq L[m_2]$,
  - $\forall i \in \{ m_2+1, \cdots, \texttt{end} \} : L[m_2]  <   L[i]$,
  - $L[m_1] = p_1$,
  - $L[m_2] = p_2$.

Here, $p_1$ is the element that is at the index $\texttt{start}$ at the time of the invocation 
of the function, while $p_2$ is the elements at index $\texttt{end}$.  It is assumed that $p_1 \leq p_2$.
  
The for-loop of `partition` maintains the following invariants:

 - $L[\texttt{start}] = p_1$,
 - $L[\texttt{end}]   = p_2$,
 - $\forall i \in \{\texttt{start} + 1, \cdots, \texttt{idxLeft} \} : L[i] < p_1$
 - $\forall i \in \{\texttt{idxLeft} + 1, \cdots, \texttt{idxMiddle} - 1\} : p_1 \leq L[i] \leq p_2$
 - $\forall i \in \{\texttt{idxRight}, \cdots, \texttt{end}-1\} : p_2 < L[i]$
 

In [5]:
def partition(start, end, L):
    p1 = L[start]
    p2 = L[end]
    idxLeft   = start
    idxMiddle = start + 1
    idxRight  = end
    while idxMiddle < idxRight:
        x = L[idxMiddle]
        if x < p1:
            idxLeft += 1
            swap(idxLeft, idxMiddle, L)
            idxMiddle += 1
        elif x <= p2:
            idxMiddle += 1
        else: 
            idxRight -= 1
            swap(idxMiddle, idxRight, L)
    swap(start, idxLeft, L)
    swap(end, idxRight, L)
    return idxLeft, idxRight

The function $\texttt{swap}(x, y, L)$ swaps the elements at index $x$ and $y$ in $L$.

In [6]:
def swap(x, y, L):
    L[x], L[y] = L[y], L[x]

## Testing

In [7]:
L = [5, 7, 9, 1, 24, 11, 5, 2, 5, 8, 2, 13, 9]
print(L)
p1, p2 = partition(0, len(L) - 1, L)
print(L[:p1], L[p1], L[p1+1:p2], L[p2], L[p2+1:])

[5, 7, 9, 1, 24, 11, 5, 2, 5, 8, 2, 13, 9]
[2, 1, 2] 5 [9, 8, 5, 7, 5] 9 [13, 24, 11]


In [8]:
def demo():
    L = [ rnd.randrange(1, 200) for n in range(1, 16) ]
    print("L = ", L)
    sort(L)
    print("L = ", L)

In [9]:
demo()

L =  [103, 151, 184, 119, 192, 28, 134, 4, 22, 128, 170, 109, 9, 93, 145]
L =  [4, 9, 22, 28, 93, 103, 109, 119, 128, 134, 145, 151, 170, 184, 192]


In [10]:
def isOrdered(L):
    for i in range(len(L) - 1):
        assert L[i] <= L[i+1]

In [11]:
from collections import Counter

In [12]:
def sameElements(L, S):
    assert Counter(L) == Counter(S)

The function $\texttt{testSort}(n, k)$ generates $n$ random lists of length $k$, sorts them, and checks whether the output is sorted and contains the same elements as the input.

In [13]:
def testSort(n, k):
    for i in range(n):
        L = [ rnd.randrange(2*k) for x in range(k) ]
        oldL = L[:]
        sort(L)
        isOrdered(L)
        sameElements(oldL, L)
        assert len(L) == len(oldL)
        print('.', end='')
    print()
    print("All tests successful!")

In [14]:
%%time
testSort(100, 20_000)

....................................................................................................
All tests successful!
CPU times: user 8.02 s, sys: 94.9 ms, total: 8.12 s
Wall time: 8.18 s


Next, we sort a million random integers.

In [15]:
%%timeit
k = 1_000_000
L = [ rnd.randrange(2 * k) for x in range(k) ]
sort(L)

5.65 s ± 138 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Again, we sort a million integers.  This time, many of the integers have the same value.

In [16]:
%%timeit
k = 1_000_000
L = [ rnd.randrange(1000) for x in range(k) ]
sort(L)

3.35 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
