<a href="https://colab.research.google.com/github/Ryan-M-Smith/CS315/blob/main/InClass/mergesort.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Template for testing Insertion-Sort on small lists, outputing each intermediate step

In [4]:
import random
import time
import numpy as np
from typing import Any

In [5]:
# Merge provided as a convenience
def merge(arr: list[Any], p: int, q: int, r: int) -> None:
    n_left = q - p + 1
    n_right = r - q
    lhs, rhs = [], []

    for i in range(n_left):
        lhs.append(arr[p + i])

    for j in range(n_right):
        rhs.append(arr[q + j + 1])

    i, j, k = 0, 0, p

    while i < n_left and j < n_right:
        if (lhs[i] <= rhs[j]):
            arr[k] = lhs[i]
            i += 1
        else:
            arr[k] = rhs[j]
            j += 1

        k += 1

    while i < n_left:
        arr[k] = lhs[i]
        i += 1
        k += 1

    while j < n_right:
        arr[k] = rhs[j]
        j += 1
        k += 1

    return

In [6]:
def merge_sort(arr: list[Any], p: int, r: int, *, print_flag: bool = False):
    if p >= r:
        return

    q = (p + r) // 2

    # Modify the following three calls:
    merge_sort(arr, p, q, print_flag=print_flag)
    merge_sort(arr, q + 1, r, print_flag=print_flag)
    merge(arr, p, q, r)

    if print_flag:
      print(arr)

    return

In [7]:
n = 8
arr = list(range(n))
random.shuffle(arr)

print(arr)
merge_sort(arr, 0, n - 1, print_flag=True)
print(arr)

[2, 5, 1, 6, 7, 0, 4, 3]
[2, 5, 1, 6, 7, 0, 4, 3]
[2, 5, 1, 6, 7, 0, 4, 3]
[1, 2, 5, 6, 7, 0, 4, 3]
[1, 2, 5, 6, 0, 7, 4, 3]
[1, 2, 5, 6, 0, 7, 3, 4]
[1, 2, 5, 6, 0, 3, 4, 7]
[0, 1, 2, 3, 4, 5, 6, 7]
[0, 1, 2, 3, 4, 5, 6, 7]


In [25]:
#
# HW04 - time Merge Sort on random data for a variety of array sizes.
#

rng = np.random.default_rng()
TEST_RUNS = 10
sizes = [10, 50, 100, 1000, 2500, 7500, 10000, 12500, 17500, 20000]

for i in range(TEST_RUNS):
		n = sizes[i]
		arr = rng.integers(low=0, high=100, size=n).tolist()

		start = time.perf_counter()
		merge_sort(arr, 0, n - 1)
		end = time.perf_counter()

		print(f"n = {n:<8,} time = {(end - start)*1_000:.3f} ms")


n = 10       time = 0.019 ms
n = 50       time = 0.077 ms
n = 100      time = 0.150 ms
n = 1,000    time = 1.627 ms
n = 2,500    time = 4.554 ms
n = 7,500    time = 14.377 ms
n = 10,000   time = 22.863 ms
n = 12,500   time = 26.663 ms
n = 17,500   time = 35.169 ms
n = 20,000   time = 42.591 ms


## Results

Because Merge Sort has an expected runtime of $\Theta\left(n^2\right)$, we will calculate all ratios with a denominator of $n^2$.

| $n$    | Time (ms) | $\dfrac{T(n)}{n^2}$|
|--------|-----------|--------------------|
| 10     | 0.019     | 0.190              |
| 50     | 0.077     | 0.0308             |
| 100    | 0.150     | 0.0150             |
| 1,000  | 1.627     | 0.001627           |
| 2,500  | 4.554     | 0.000729           |
| 7,500  | 14.377    | 0.000256           |
| 10,000 | 22.863    | 0.000229           |
| 12,500 | 26.663    | 0.000171           |
| 17,500 | 35.169    | 0.000115           |
| 20,000 | 42.591    | 0.000107           |

Looking at our ratios, as $n$ gets larger, the results tend to stabilize around a factor of $1.0 \times 10^{-4}$, showing that the runtime tends to stabilize
around a ratio of $n^2$. This strongly suggests that the algorithm runs in $\Theta\left(n^2\right)$ time.


In [26]:
#
# HW04 - time Merge Sort on sorted data for a variety of array sizes.
#

for i in range(TEST_RUNS):
		n = sizes[i]
		arr = list(range(n))

		start = time.perf_counter()
		merge_sort(arr, 0, n - 1)
		end = time.perf_counter()

		print(f"n = {n:<8,} time = {(end - start)*1_000:.3f} ms")

n = 10       time = 0.016 ms
n = 50       time = 0.066 ms
n = 100      time = 0.111 ms
n = 1,000    time = 1.644 ms
n = 2,500    time = 4.349 ms
n = 7,500    time = 15.848 ms
n = 10,000   time = 18.193 ms
n = 12,500   time = 22.459 ms
n = 17,500   time = 32.376 ms
n = 20,000   time = 37.496 ms
