In [1]:
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances

In [2]:
def load_glass_dataset(url):
    df = pd.read_csv('glass.csv')
    print(df.head())
    X = df.drop(columns=['Type']).values
    y = df['Type'].values
    return X, y

In [3]:
def load_fruit_dataset(url):
    df = pd.read_csv(url)
    print(df.head())
    X = df.drop(columns=['fruit_label', 'fruit_name', 'fruit_subtype']).values
    y = df['fruit_label'].values
    return X, y

In [4]:
def knn_classification(X_train, y_train, X_test, k, metric):
    if metric == 'euclidean':
        distances = euclidean_distances(X_test, X_train)
    elif metric == 'manhattan':
        distances = manhattan_distances(X_test, X_train)
    else:
        raise ValueError("Invalid distance metric. Choose 'euclidean' or 'manhattan'.")
    indices = np.argsort(distances, axis=1)
    knn_indices = indices[:, :k]
    knn_labels = y_train[knn_indices]

    predictions = []
    for neighbors in knn_labels:
        unique, counts = np.unique(neighbors, return_counts=True)
        pred_label = unique[np.argmax(counts)]
        predictions.append(pred_label)

    return np.array(predictions)

In [5]:
def evaluate(X, y, k_values, metrics, test_size=0.1):
    for k in k_values:
        for metric in metrics:
            print(f"K = {k}, Metric = {metric}:")
            accuracy_scores = []
            for _ in range(3):  # 3 different train-test splits
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
                y_pred = knn_classification(X_train, y_train, X_test, k, metric)
                accuracy = accuracy_score(y_test, y_pred)
                accuracy_scores.append(accuracy)
            print("Accuracy:", np.mean(accuracy_scores))
            print()

In [None]:
def main():
     # Load Glass dataset
    glass_url = "https://github.com/jasp9559/KNN-Classifier-R-Python/raw/main/glass.csv"
    X_glass, y_glass = load_glass_dataset(glass_url)

    # Load Fruit dataset
    fruit_url = "https://github.com/saanyalasod/Fruits/raw/main/fruit%20data.csv"
    X_fruit, y_fruit = load_fruit_dataset(fruit_url)

    datasets = {'glass': (X_glass, y_glass), 'fruit': (X_fruit, y_fruit)}
    k_values = [3, 5, 7]
    metrics = ['euclidean', 'manhattan']
    test_sizes = [0.1, 0.3]

    for dataset_name, (X, y) in datasets.items():
        print(f"Dataset: {dataset_name.capitalize()}")
        for test_size in test_sizes:
            print(f"Test Size: {test_size}")
            evaluate(X, y, k_values, metrics, test_size)
            print("="*30)
        print("*"*50)

if __name__ == "__main__":
    main()

        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
0  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
1  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
2  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
3  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
4  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1
