<a href="https://colab.research.google.com/github/Andrew09268/search-sort-diabetes/blob/main/search_sort.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import time

def load_data():
    return pd.read_csv('diabetes.csv')

def linear_search(data, column, value):
    for index, row in data.iterrows():
        if row[column] == value:
            return index
    return -1

def binary_search(data, column, value):
    left, right = 0, len(data) - 1
    while left <= right:
        mid = (left + right) // 2
        if data[column].iloc[mid] == value:
            while mid > 0 and data[column].iloc[mid - 1] == value:
                mid -= 1
            return mid
        elif data[column].iloc[mid] < value:
            left = mid + 1
        else:
            right = mid - 1
    return -1

def bubble_sort(data, column):
    sorted_data = data.copy()
    for i in range(len(sorted_data)):
        for j in range(len(sorted_data) - i - 1):
            if sorted_data[column].iloc[j] > sorted_data[column].iloc[j + 1]:
                sorted_data.iloc[j], sorted_data.iloc[j + 1] = sorted_data.iloc[j + 1], sorted_data.iloc[j]
    return sorted_data

def selection_sort(data, column):
    sorted_data = data.copy()
    for i in range(len(sorted_data)):
        min_idx = i
        for j in range(i + 1, len(sorted_data)):
            if sorted_data[column].iloc[j] < sorted_data[column].iloc[min_idx]:
                min_idx = j
        sorted_data.iloc[i], sorted_data.iloc[min_idx] = sorted_data.iloc[min_idx], sorted_data.iloc[i]
    return sorted_data

def insertion_sort(data, column):
    sorted_data = data.copy()
    for i in range(1, len(sorted_data)):
        key_row = sorted_data.iloc[i].copy()
        j = i - 1
        while j >= 0 and sorted_data[column].iloc[j] > key_row[column]:
            sorted_data.iloc[j + 1] = sorted_data.iloc[j]
            j -= 1
        sorted_data.iloc[j + 1] = key_row
    return sorted_data

def quick_sort(data, column):
    if len(data) <= 1:
        return data
    pivot = data.iloc[len(data) // 2]
    left = data[data[column] < pivot[column]]
    middle = data[data[column] == pivot[column]]
    right = data[data[column] > pivot[column]]
    return pd.concat([quick_sort(left, column), middle, quick_sort(right, column)])

def get_valid_column(data, user_input):
    user_input = user_input.strip().lower()
    column_mapping = {col.lower(): col for col in data.columns}
    return column_mapping.get(user_input, None)

def main():
    data = load_data()

    column = get_valid_column(data, input("Choose a column to search (Glucose, Age, BMI, etc.): "))
    if column is None:
        print("Invalid column name. Please check spelling and capitalization.")
        return

    value = float(input("Enter a value to search for: "))

    print("\nChoose search method:\n1. Linear Search\n2. Binary Search")
    search_choice = int(input("Enter choice: "))

    start_time = time.time()
    if search_choice == 2:
        print(f"Sorting \"{column}\" column before Binary Search...")
        data = quick_sort(data, column)

    index = linear_search(data, column, value) if search_choice == 1 else binary_search(data, column, value)
    end_time = time.time()

    if index != -1:
        print(f"Found at row index {index}")
    else:
        print("Value not found.")
    print(f"Time taken: {end_time - start_time:.5f}s\n")

    column = get_valid_column(data, input("Choose a column to sort (Glucose, Age, BMI, etc.): "))
    if column is None:
        print("Invalid column name. Please check spelling and capitalization.")
        return

    print("\nChoose sorting algorithm:\n1. Bubble Sort\n2. Selection Sort\n3. Insertion Sort\n4. Quick Sort")
    sort_choice = int(input("Enter choice: "))

    start_time = time.time()
    sorting_algorithms = {1: bubble_sort, 2: selection_sort, 3: insertion_sort, 4: quick_sort}
    sorted_data = sorting_algorithms[sort_choice](data, column)
    sorted_data.to_csv("sorted-diabetes.csv", index=False)
    end_time = time.time()

    print(f"Sorting by \"{column}\" using chosen method...")
    print("Sorting completed.")
    print("Sorted data saved as \"sorted-diabetes.csv\".")
    print(f"Time taken: {end_time - start_time:.5f}s")

if __name__ == "__main__":
    main()

Choose a column to search (Glucose, Age, BMI, etc.): bmi
Enter a value to search for: 45

Choose search method:
1. Linear Search
2. Binary Search
Enter choice: 1
Found at row index 178
Time taken: 0.00813s

Choose a column to sort (Glucose, Age, BMI, etc.): bmi

Choose sorting algorithm:
1. Bubble Sort
2. Selection Sort
3. Insertion Sort
4. Quick Sort
Enter choice: 4
Sorting by "BMI" using chosen method...
Sorting completed.
Sorted data saved as "sorted-diabetes.csv".
Time taken: 0.25604s
