# Experimento sobre algoritmos de ordenação

##### Autor: Estevão de Carvalho Costa

In [1]:
import random
import time
import matplotlib.pyplot as plt
import numpy as np

## 1. Implementações

### 1.1. Heap sort

In [2]:
## Utility functions

def index_of_left_child(i):
    return 2*i
def index_of_right_child(i):
    return 2*i + 1

def heapify(heap_array, i, size):
    maximum = i
    while(i <= size):
        left = index_of_left_child(i)
        right = index_of_right_child(i)
        if(left <= size and heap_array[left] > heap_array[i]):
            maximum = left
        if(right <= size and heap_array[right] > heap_array[maximum]):
            maximum = right
        if(maximum != i):
            heap_array[maximum], heap_array[i] = heap_array[i], heap_array[maximum]
            i = maximum
        else:
            break

def build_heap(array, number_of_elements):
    array.insert(0, None)
    heap_array = array
    index_of_first_non_leaf_node = round(number_of_elements/2)
    for i in range(index_of_first_non_leaf_node, 0, -1):
        heapify(heap_array, i, number_of_elements)
        
def show_heap(heap_array):
    nodes_buffer = [heap_array[1]]
    current = 1
    while(len(nodes_buffer) > 0):
        for node in nodes_buffer:
            print(node, end=' ')
            nodes_buffer = nodes_buffer[1:]
            if(index_of_left_child(current) <= size):
                nodes_buffer.append(heap_array[index_of_left_child(current)])
            if(index_of_right_child(current) <= size):
                nodes_buffer.append(heap_array[index_of_right_child(current)])
            current += 1
        print()
        
## main heap sort function:

def heap_sort(array):
    number_of_elements = len(array)
    build_heap(array, number_of_elements)
    for i in range(number_of_elements, 1, -1):
        array[1], array[i] = array[i], array[1]
        number_of_elements -= 1
        heapify(array, 1, number_of_elements)
    array.pop(0)


In [3]:
array = [4,10,50,20,1,3,5]
heap_sort(array)
print(array)

[1, 3, 4, 5, 10, 20, 50]


### 1.2. Quick Sort

In [4]:
def partition(array, low, high):
    pivot = array[high]
    i = low - 1
    for j in range(low, high):
        if(array[j] <= pivot):
            i += 1
            array[i], array[j] = array[j], array[i]
    array[i+1], array[high] = array[high], array[i+1]
    return i + 1

def helper_quick_sort(array, low, high):
    if(high > low):
        middle_index = partition(array, low, high)    
        helper_quick_sort(array, low, middle_index - 1)
        helper_quick_sort(array, middle_index + 1, high)

def quick_sort(array):
    helper_quick_sort(array, 0, len(array) - 1)    
    

In [5]:
array = [78,5,578,8,6,4,5,7,8,5,4,3,56]
quick_sort(array)
print(array)

[3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 56, 78, 578]


### 1.3. Counting Sort e Radix Sort

Os algoritmos Counting Sort e Radix Sort assumem que as chaves de `array` pertencem ao intervalo `[0, maximum_key_value]`

In [42]:
def counting_sort(array, maximum_key_value, key):
    countingLists = []
    for i in range(0, maximum_key_value + 1):
        countingLists.append([])
    n = len(array)
    sorted_array = []*n
    for i in range(0, n):
        countingLists[key(array[i])].append(array[i])
    for i in range(0, maximum_key_value + 1):
          sorted_array.extend(countingLists[i])
    return sorted_array

In [43]:
sorted_array = counting_sort([1,0,4,6,4,3,6,6], 6, lambda number : number)
print(sorted_array)

[0, 1, 3, 4, 4, 6, 6, 6]


In [49]:
def radix_sort(array, digits, base = 10):
    sorted_array = array
    for i in range(0, digits):
        ith_less_significant_digit = lambda number: (number // base**i) % base
        sorted_array = counting_sort(sorted_array, base - 1, ith_less_significant_digit)
    return sorted_array

In [50]:
array = [176,220,123,40,50,1,0,123,312,3,12,3,123,12,123,12,445,67,1233,6664]
sorted_array = radix_sort(array, 4)
print(sorted_array)

[0, 1, 3, 3, 12, 12, 12, 40, 50, 67, 123, 123, 123, 123, 176, 220, 312, 445, 1233, 6664]


#### Nos experimentos a seguir, vamos plotar gráficos comparando em cada algoritmo implementado o **tempo de execução** em função do **tamanho do vetor de entrada**

## 2. Experimento 1

#### Deve-se ordenar vetores numéricos onde em média 90% dos elementos têm o mesmo valor. Os demais apresentem valores distintos e distribuídos de maneira uniforme o longo do vetor.

## 3. Experimento 2

#### Ordenar vetores onde não há elementos repetidos e chaves foram inseridas de maneira aleatória.

In [56]:
def __is_sorted(array):
    for i in range(1, len(array)):
        if(array[i-1] > array[i]):
            return False
    return True
        
def __generate_unique_random_key(keys_already_chosen, maximum_key_value):
    candidate = random.randint(0, maximum_key_value)
    while(keys_already_chosen.get(candidate) != None):
        candidate = random.randint(0, maximum_key_value)
    return candidate
    
def generate_array_with_unique_random_keys(n, maximum_key_value = 1000):
    if(maximum_key_value < n):
        maximum_key_value = n + 100
    array = [None] * n
    keys_already_chosen = {}
    for i in range(n):
        new_random_key = __generate_unique_random_key(keys_already_chosen, maximum_key_value)
        keys_already_chosen[new_random_key] = True
        array[i] = new_random_key
    return array

def execution_times_to_sort_random_arrays(maximun_array_size):
    execution_times = {}
    for n in range(1, maximun_array_size + 1, 10):
        random_array = generate_array_with_unique_random_keys(n)
        random_array_copy_1 = random_array.copy()
        random_array_copy_2 = random_array.copy()
        execution_time = {}
        
        ## heap sort
        start_time = time.time()
        heap_sort(random_array)
        execution_time['heap_sort'] = time.time() - start_time
        
        ## quick sort
        start_time = time.time()
        quick_sort(random_array_copy_1)
        execution_time['quick_sort'] = time.time() - start_time
        
        ## radix sort
        maximum_value = max(random_array_copy_2)
        digits = len(str(maximum_value))
        start_time = time.time()
        output_array = radix_sort(random_array_copy_2, digits)
        execution_time['radix_sort'] = time.time() - start_time

        execution_times[n] = execution_time
    
    return execution_times  

def describe_results(execution_times):
    for n in execution_times.keys():
        print('n = {}, results: {}'.format(n, execution_times[n]))


In [57]:
execution_times = execution_times_to_sort_random_arrays(500)