# 1 Best cases

Given a sorted list of size $n$ as input, what are the running times of the following algorithms, if the list is already sorted?

1. Bubble sort (the "short" version): $O(n)$, it is possible to make it optimal by using FLAG.
2. Selection sort: $O(n^2)$
3. Merge sort: $O(n\log n)$

In [0]:
def shortBubbleSort(alist):
    exchanges = True
    passnum = len(alist)-1
    while passnum > 0 and exchanges:
        exchanges = False
        for i in range(passnum):
            if alist[i]>alist[i+1]:
                exchanges = True
                temp = alist[i]
                alist[i] = alist[i+1]
                alist[i+1] = temp
        passnum = passnum-1

# 2 Merge sort without slicing

The online book shows an implementation of merge sort that slices lists: https://runestone.academy/runestone/static/pythonds/SortSearch/TheMergeSort.html

Using this code as a starting point, write a version indexMergeSort of merge sort that does not use slicing, but uses list indices instead. You are allowed to use a temporary array in every merging operation.

In [30]:
def indexMergeSort(a, start=0, end=None):
    if end is None:
        end = len(a)-1
    size = end-start+1
    if size > 1:
        mid = (start+end)//2
        left_arr = indexMergeSort(a, start, mid)
        left_size = len(left_arr)
        right_arr = indexMergeSort(a, mid+1, end)
        right_size = len(right_arr)
        new_list = [None]*size
        
        
        # Merge
        lp = 0
        rp = 0
        np = 0
        while lp < left_size and rp < right_size:
            if left_arr[lp] < right_arr[rp]:
                new_list[np] = left_arr[lp]
                lp += 1
            else:
                new_list[np] = right_arr[rp]
                rp += 1
            np += 1
        while lp < left_size:
            new_list[np] = left_arr[lp]
            lp += 1
            np += 1
        while rp < right_size:
            new_list[np] = right_arr[rp]
            rp += 1
            np += 1
        #print(left_arr, right_arr, size, new_list)
        return new_list
    else:
        return [a[start]]

aList = [1, 4, 5, 2, 10, 3, 7, 2]
print(indexMergeSort(aList))
#print(aList)

[1, 2, 2, 3, 4, 5, 7, 10]


# 3 Benchmarking sorting algorithms

Generate random lists of integers and measure the running times of these sorting algorithm (given in the online book) as the size of the list grows:
```
sorting_algorithms = [bubbleSort, shortBubbleSort, selectionSort, insertionSort, shellSort, mergeSort, indexMergeSort, quickSort]
```

We recommend you use the modules timeit, random and matplotlib. You may look at the solutions to the Week 3 tutes for inspiration on how to use these modules and set up the benchmark.

### Sort algorithm

In [0]:
def bubbleSort(alist):
    for passnum in range(len(alist)-1,0,-1):
        for i in range(passnum):
            if alist[i]>alist[i+1]:
                temp = alist[i]
                alist[i] = alist[i+1]
                alist[i+1] = temp
                
def selectionSort(alist):
    for fillslot in range(len(alist)-1,0,-1):
        positionOfMax=0
        for location in range(1,fillslot+1):
            if alist[location]>alist[positionOfMax]:
                positionOfMax = location

        temp = alist[fillslot]
        alist[fillslot] = alist[positionOfMax]
        alist[positionOfMax] = temp
        
def insertionSort(alist):
    for index in range(1,len(alist)):

        currentvalue = alist[index]
        position = index

        while position>0 and alist[position-1]>currentvalue:
            alist[position]=alist[position-1]
            position = position-1

        alist[position]=currentvalue
        
def shellSort(alist):
    sublistcount = len(alist)//2
    while sublistcount > 0:

        for startposition in range(sublistcount):
            gapInsertionSort(alist,startposition,sublistcount)

        sublistcount = sublistcount // 2

def gapInsertionSort(alist,start,gap):
    for i in range(start+gap,len(alist),gap):

        currentvalue = alist[i]
        position = i

        while position>=gap and alist[position-gap]>currentvalue:
            alist[position]=alist[position-gap]
            position = position-gap

        alist[position]=currentvalue
        
def mergeSort(alist):
    if len(alist)>1:
        mid = len(alist)//2
        lefthalf = alist[:mid]
        righthalf = alist[mid:]

        mergeSort(lefthalf)
        mergeSort(righthalf)

        i=0
        j=0
        k=0
        while i < len(lefthalf) and j < len(righthalf):
            if lefthalf[i] <= righthalf[j]:
                alist[k]=lefthalf[i]
                i=i+1
            else:
                alist[k]=righthalf[j]
                j=j+1
            k=k+1

        while i < len(lefthalf):
            alist[k]=lefthalf[i]
            i=i+1
            k=k+1

        while j < len(righthalf):
            alist[k]=righthalf[j]
            j=j+1
            k=k+1
            
def quickSort(alist):
    quickSortHelper(alist,0,len(alist)-1)

def quickSortHelper(alist,first,last):
    if first<last:

        splitpoint = partition(alist,first,last)

        quickSortHelper(alist,first,splitpoint-1)
        quickSortHelper(alist,splitpoint+1,last)


def partition(alist,first,last):
    pivotvalue = alist[first]

    leftmark = first+1
    rightmark = last

    done = False
    while not done:

        while leftmark <= rightmark and alist[leftmark] <= pivotvalue:
            leftmark = leftmark + 1

        while alist[rightmark] >= pivotvalue and rightmark >= leftmark:
            rightmark = rightmark -1

        if rightmark < leftmark:
            done = True
        else:
            temp = alist[leftmark]
            alist[leftmark] = alist[rightmark]
            alist[rightmark] = temp

    temp = alist[first]
    alist[first] = alist[rightmark]
    alist[rightmark] = temp


    return rightmark

### Random generator

In [0]:
import random
import time

def GenerateList(seedValue, length):
    l = [None]*length
    random.seed(seedValue)
    for i in range(length):
        l[i] = random.randint(0, seedValue)
    return l
        
# Generate random data
seedValue = math.floor(114514*time.time())

### Benchmark

In [44]:
import math
import matplotlib.pyplot as plt
import timeit

def sorting_benchmark(sort_alg, sample, times):
    sample_copy = [sample]*times
    ts = time.time()
    for th in range(times):
        sort_alg(sample_copy[th])
    te = time.time()
    return te-ts

sorting_algorithms = [bubbleSort, shortBubbleSort, selectionSort, insertionSort, \
                      shellSort, mergeSort, indexMergeSort, quickSort]
data_length = [100, 200, 300, 400, 500, 600, 700, 800]
time_costs = [[None]*8]*8
times = 100
for i in range(8):
    sample_data = str(GenerateList(seedValue, data_length[i]))
    time_costs[0][i] = timeit.timeit(stmt=sorting_algorithms[0],setup='alist=%s'%sample_data,number=1000)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[1][i] = sorting_benchmark(sorting_algorithms[1], sample_data, times)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[2][i] = sorting_benchmark(sorting_algorithms[2], sample_data, times)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[3][i] = sorting_benchmark(sorting_algorithms[3], sample_data, times)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[4][i] = sorting_benchmark(sorting_algorithms[4], sample_data, times)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[5][i] = sorting_benchmark(sorting_algorithms[5], sample_data, times)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[6][i] = sorting_benchmark(sorting_algorithms[6], sample_data, times)
    sample_data = GenerateList(seedValue, data_length[i])
    time_costs[7][i] = sorting_benchmark(sorting_algorithms[7], sample_data, times)
    

plt.plot(data_length, time_costs[0], 'g')
plt.plot(data_length, time_costs[1], 'r')
plt.plot(data_length, time_costs[2], 'y')
plt.plot(data_length, time_costs[3], 'b')
plt.plot(data_length, time_costs[4], 'c')
plt.plot(data_length, time_costs[5], 'm')
plt.plot(data_length, time_costs[6], 'k')
plt.plot(data_length, time_costs[7], 'g--')
plt.show()

print(time_costs)

TypeError: ignored

# 4 Sorting small integers


1. Suppose that the input list to sort only contains integers in ${0, ..., maxv}$, where maxv is not very large. Using an auxiliary list of size $maxv+1$, write a sorting algorithm with worst-case complexity $O(n)$.

In [0]:
# For case in which the list contains unique integers
def sort_unique_list(a, maxv):
    sorted_list = [None]*(maxv+1)
    for v in a:
        sorted_list[v] = v
    result = []
    for v in sorted_list:
        if v != None:
            result.append(v)
    return result

def sort_using_counter(a, maxv):
    count = [0]*(maxv+1)
    for v in a:
        count[v] += 1
    if False:    # Print counting list switch
        print(count)
    # Re-consturct the list in ascending order
    result = []
    for i in range(maxv+1):
        while count[i] != 0:
            result.append(i)
            count[i] -= 1
    return result

In [0]:
print(sort_using_counter([1, 3, 2, 2, 0, 12, 6, 9, 7, 12, 4, 12, 3, 0, 8, 5, 6, 8], 12))

[2, 1, 2, 2, 1, 1, 2, 1, 2, 1, 0, 0, 3]
[0, 0, 1, 2, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 12, 12, 12]


2. Use the code you wrote for Exercise 3 to plot the running time of your implementation.

# 5 Find the duplicates

Given a Python list $l$ of $n$ integers, design an algorithm that removes all duplicates in $O(nlogn)$ average time. You may not sort the list first, but you may modify a sorting algorithm.

In [58]:
def remove_duplicates(aList, start=0, end=None):
    if end is None:
        end = len(aList)-1
    size = end-start+1
    if size > 1:
        mid = (start+end)//2
        left_arr = indexMergeSort(aList, start, mid)
        left_size = len(left_arr)
        right_arr = indexMergeSort(aList, mid+1, end)
        right_size = len(right_arr)
        
        new_list = []
        
        # Merge
        lp = 0
        rp = 0
        
        # For case in which new_list is empty
        if left_arr[lp] < right_arr[rp]:
            new_list.append(left_arr[lp])
            lp += 1
        elif left_arr[lp] > right_arr[rp]:
            new_list.append(right_arr[rp])
            rp += 1
        else:
            new_list.append(left_arr[lp])
            lp += 1
            rp += 1
        
        while lp < left_size and rp < right_size:
            if left_arr[lp] < right_arr[rp] and left_arr[lp] != new_list[-1]:
                new_list.append(left_arr[lp])
                lp += 1
            elif left_arr[lp] > right_arr[rp] and right_arr[rp] != new_list[-1]:
                new_list.append(right_arr[rp])
                rp += 1
            else:
                if left_arr[lp] != new_list[-1]:
                    new_list.append(left_arr[lp])
                lp += 1
                rp += 1
            
        while lp < left_size:
            if left_arr[lp] != new_list[-1]:
                new_list.append(left_arr[lp])
            lp += 1
        while rp < right_size:
            if right_arr[rp] != new_list[-1]:
                new_list.append(right_arr[rp])
            rp += 1
        #print(left_arr, right_arr, size, new_list)
        return new_list
    else:
        return [aList[start]]

remove_duplicates([1,0,-1,2,3,4,3,5,6,4,1,2,12,-1,2,5,7])

[-1, 0, 1, 2, 3, 4, 5, 6, 7, 12]