<a href="https://colab.research.google.com/github/Bayhaqieee/SearchSort_OlistDatasets/blob/main/SearchSort_Olist_Team_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# sorting algortihms

In [1]:
def heap_sort(data, key):
    import heapq
    return [item for _, item in heapq.nsmallest(len(data), [(key(d), d) for d in data])]

def merge_sort(data, key):
    if len(data) <= 1:
        return data
    mid = len(data) // 2
    left = merge_sort(data[:mid], key)
    right = merge_sort(data[mid:], key)
    return merge(left, right, key)

def merge(left, right, key):
    result = []
    i = j = 0
    while i < len(left) and j < len(right):
        if key(left[i]) <= key(right[j]):
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1
    result.extend(left[i:])
    result.extend(right[j:])
    return result

def selection_sort(data, key):
    n = len(data)
    for i in range(n):
        min_idx = i
        for j in range(i + 1, n):
            if key(data[j]) < key(data[min_idx]):
                min_idx = j
        data[i], data[min_idx] = data[min_idx], data[i]
    return data

def bubble_sort(data, key):
    n = len(data)
    for i in range(n):
        for j in range(0, n - i - 1):
            if key(data[j]) > key(data[j + 1]):
                data[j], data[j + 1] = data[j + 1], data[j]
    return data

def counting_sort(data, key):
    values = [key(d) for d in data]
    unique_values = sorted(set(values))
    value_to_index = {v: i for i, v in enumerate(unique_values)}

    count = [0] * len(unique_values)
    output = [None] * len(data)

    for val in values:
        count[value_to_index[val]] += 1

    for i in range(1, len(count)):
        count[i] += count[i - 1]

    for d in reversed(data):
        idx = value_to_index[key(d)]
        count[idx] -= 1
        output[count[idx]] = d

    return output

# searching algorithms

In [2]:
def jump_search(data, target, key):
    import math
    n = len(data)
    step = int(math.sqrt(n))
    prev = 0

    while key(data[min(step, n) - 1]) < target:
        prev = step
        step += int(math.sqrt(n))
        if prev >= n:
            return None

    for i in range(prev, min(step, n)):
        if key(data[i]) == target:
            return data[i]
    return None

def hash_search(data, target, key):
    hashmap = {key(item): item for item in data}
    return hashmap.get(target, None)

def binary_search(data, target, key):
    low = 0
    high = len(data) - 1
    while low <= high:
        mid = (low + high) // 2
        mid_val = key(data[mid])
        if mid_val == target:
            return data[mid]
        elif mid_val < target:
            low = mid + 1
        else:
            high = mid - 1
    return None

# Testing Combination

In [3]:
import pandas as pd
import time

def test_algorithm_combinations_multi(file_path, key_columns, algorithm_combinations, target_dict):
    df = pd.read_csv(file_path)

    for key_column in key_columns:
        data = df[[key_column]].dropna().to_dict(orient="records")
        target = target_dict.get(key_column, None)
        print(f"\n=== Testing Column: {key_column} ===")

        for search_func, search_name, sort_func, sort_name in algorithm_combinations:
            try:
                start_time = time.time()
                sorted_data = sort_func(data, key=lambda x: x[key_column])
                _ = search_func(sorted_data, key=lambda x: x[key_column], target=target)
                end_time = time.time()
                print(f"{search_name} + {sort_name} => Time: {end_time - start_time:.8f} seconds")
            except Exception as e:
                print(f"{search_name} + {sort_name} => ERROR in column '{key_column}': {e}")

# running

In [4]:
algorithm_combinations = [
    (jump_search, "Jump Search", heap_sort, "Heap Sort"),
    (jump_search, "Jump Search", merge_sort, "Merge Sort"),
    (hash_search, "Hash Search", selection_sort, "Selection Sort"),
    (jump_search, "Jump Search", bubble_sort, "Bubble Sort"),
    (binary_search, "Binary Search", counting_sort, "Counting Sort"),
]

target_dict = {
    "product_category_name": "telefonia",
    "product_category_name_english": "telephony"
}

test_algorithm_combinations_multi(
    file_path="/content/drive/MyDrive/Analisis Algoritma/product_category_name_translation.csv",
    key_columns=["product_category_name", "product_category_name_english"],
    algorithm_combinations=algorithm_combinations,
    target_dict=target_dict
)


=== Testing Column: product_category_name ===
Jump Search + Heap Sort => Time: 0.00008249 seconds
Jump Search + Merge Sort => Time: 0.00019026 seconds
Hash Search + Selection Sort => Time: 0.00056767 seconds
Jump Search + Bubble Sort => Time: 0.00047636 seconds
Binary Search + Counting Sort => Time: 0.00005627 seconds

=== Testing Column: product_category_name_english ===
Jump Search + Heap Sort => Time: 0.00004315 seconds
Jump Search + Merge Sort => Time: 0.00014758 seconds
Hash Search + Selection Sort => Time: 0.00047874 seconds
Jump Search + Bubble Sort => Time: 0.00049591 seconds
Binary Search + Counting Sort => Time: 0.00005102 seconds
