<a href="https://colab.research.google.com/github/Bayhaqieee/SearchSort_OlistDatasets/blob/main/SearchSort_Olist_Team_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# sorting algortihms

In [None]:
def heap_sort(data, key):
    import heapq
    return [item for _, item in heapq.nsmallest(len(data), [(key(d), d) for d in data])]

def merge_sort(data, key):
    if len(data) <= 1:
        return data
    mid = len(data) // 2
    left = merge_sort(data[:mid], key)
    right = merge_sort(data[mid:], key)
    return merge(left, right, key)

def merge(left, right, key):
    result = []
    i = j = 0
    while i < len(left) and j < len(right):
        if key(left[i]) <= key(right[j]):
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1
    result.extend(left[i:])
    result.extend(right[j:])
    return result

def selection_sort(data, key):
    n = len(data)
    for i in range(n):
        min_idx = i
        for j in range(i + 1, n):
            if key(data[j]) < key(data[min_idx]):
                min_idx = j
        data[i], data[min_idx] = data[min_idx], data[i]
    return data

def bubble_sort(data, key):
    n = len(data)
    for i in range(n):
        for j in range(0, n - i - 1):
            if key(data[j]) > key(data[j + 1]):
                data[j], data[j + 1] = data[j + 1], data[j]
    return data

def counting_sort(data, key):
    values = [key(d) for d in data]
    unique_values = sorted(set(values))
    value_to_index = {v: i for i, v in enumerate(unique_values)}

    count = [0] * len(unique_values)
    output = [None] * len(data)

    for val in values:
        count[value_to_index[val]] += 1

    for i in range(1, len(count)):
        count[i] += count[i - 1]

    for d in reversed(data):
        idx = value_to_index[key(d)]
        count[idx] -= 1
        output[count[idx]] = d

    return output

# searching algorithms

In [None]:
def jump_search(data, target, key):
    import math
    n = len(data)
    step = int(math.sqrt(n))
    prev = 0

    while key(data[min(step, n) - 1]) < target:
        prev = step
        step += int(math.sqrt(n))
        if prev >= n:
            return None

    for i in range(prev, min(step, n)):
        if key(data[i]) == target:
            return data[i]
    return None

def hash_search(data, target, key):
    hashmap = {key(item): item for item in data}
    return hashmap.get(target, None)

def binary_search(data, target, key):
    low = 0
    high = len(data) - 1
    while low <= high:
        mid = (low + high) // 2
        mid_val = key(data[mid])
        if mid_val == target:
            return data[mid]
        elif mid_val < target:
            low = mid + 1
        else:
            high = mid - 1
    return None

# Testing Combination

In [None]:
def uji_kombinasi_algoritma_multi(file_path, kolom_kunci_list, kombinasi_algoritma):
    import pandas as pd
    import time

    df = pd.read_csv(file_path)
    records = df.to_dict(orient="records")

    for kolom in kolom_kunci_list:
        print(f"\n=== Testing: {kolom} ===")
        for search_func, label_search, sort_func, label_sort in kombinasi_algoritma:
            try:
                start = time.time()
                hasil_sort = sort_func(records.copy(), key=lambda x: x[kolom])
                target = hasil_sort[-1][kolom]
                _ = search_func(hasil_sort, target, key=lambda x: x[kolom])
                end = time.time()
                total_time = end - start
                print(f"{label_search} + {label_sort} => Time: {total_time:.8f} detik")
            except Exception as e:
                print(f"{label_search} + {label_sort} => ERROR in column '{kolom}': {e}")

# running

In [None]:
kombinasi_algoritma = [
    (jump_search, "Jump Search", heap_sort, "Heap Sort"),
    (jump_search, "Jump Search", merge_sort, "Merge Sort"),
    (hash_search, "Hash Search", selection_sort, "Selection Sort"),
    (jump_search, "Jump Search", bubble_sort, "Bubble Sort"),
    (binary_search, "Binary Search", counting_sort, "Counting Sort"),
]

uji_kombinasi_algoritma_multi(
    file_path="/content/drive/MyDrive/Analisis Algoritma/product_category_name_translation.csv", # dataset path
    kolom_kunci_list=["product_category_name", "product_category_name_english"], #columns name
    kombinasi_algoritma=kombinasi_algoritma
)


=== Uji Kolom: product_category_name ===
Jump Search + Heap Sort => Waktu: 0.00011539 detik
Jump Search + Merge Sort => Waktu: 0.00025940 detik
Hash Search + Selection Sort => Waktu: 0.00087214 detik
Jump Search + Bubble Sort => Waktu: 0.00099754 detik
Binary Search + Counting Sort => Waktu: 0.00009584 detik

=== Uji Kolom: product_category_name_english ===
Jump Search + Heap Sort => Waktu: 0.00005817 detik
Jump Search + Merge Sort => Waktu: 0.00025296 detik
Hash Search + Selection Sort => Waktu: 0.00067163 detik
Jump Search + Bubble Sort => Waktu: 0.00057888 detik
Binary Search + Counting Sort => Waktu: 0.00005007 detik
