In [41]:
import pandas as pd
import numpy as np

print('\nName:- Abhishikth Boda')
print('Roll Number:- S20210010044')
print('Course:- Machine Learning')
print('Section:- 2')
print('Assignment Number:- 4')
print('Date:- 31th August 2023\n')

print('Program is being executed\n')

# Load data from the provided dataset
data = pd.read_csv('seeds.csv')

# Assuming 'Type' is the label column
features = data.drop('Type', axis=1).values
labels = data['Type'].values

# Shuffle the data
shuffled_indices = np.random.permutation(len(features))
shuffled_features = features[shuffled_indices]
shuffled_labels = labels[shuffled_indices]

# Euclidean distance calculation
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

# K-Nearest Neighbors algorithm
def knn_algorithm(X_train, y_train, X_test, k, p):
    predictions = []

    for test_sample in X_test:
        distances = [euclidean_distance(test_sample, train_sample) for train_sample in X_train]
        sorted_indices = np.argsort(distances)
        k_nearest_labels = [y_train[i] for i in sorted_indices[:k]]

        # Perform majority vote
        unique_labels, counts = np.unique(k_nearest_labels, return_counts=True)
        predicted_label = unique_labels[np.argmax(counts)]

        predictions.append(predicted_label)

    return predictions

# Split the shuffled dataset into training and test sets
train_size = 150
train_features = shuffled_features[:train_size]
test_features = shuffled_features[train_size:]
train_labels = shuffled_labels[:train_size]
test_labels = shuffled_labels[train_size:]

best_accuracy = 0
best_k = 0
best_p = 0
best_precision = 0
best_recall = 0
best_f1 = 0

# Evaluate KNN for a range of k and p values
k_range = range(1, 11)
p_range = range(1, 11)

for k in k_range:
    for p in p_range:
        predictions = knn_algorithm(train_features, train_labels, test_features, k, p)

        correct_predictions = np.sum(predictions == test_labels)
        acc = correct_predictions / len(test_labels)

        unique_labels = np.unique(test_labels)
        precision_sum = 0
        recall_sum = 0
        for label in unique_labels:
            true_positive = np.sum((predictions == label) & (test_labels == label))
            false_positive = np.sum((predictions == label) & (test_labels != label))
            false_negative = np.sum((predictions != label) & (test_labels == label))

            precision = true_positive / (true_positive + false_positive + 1e-10)
            recall = true_positive / (true_positive + false_negative + 1e-10)

            precision_sum += precision
            recall_sum += recall

        precision = precision_sum / len(unique_labels)
        recall = recall_sum / len(unique_labels)
        f1 = 2 * (precision * recall) / (precision + recall + 1e-10)

        # Check if this combination is the best
        if acc > best_accuracy:
            best_accuracy = acc
            best_k = k
            best_p = p
            best_precision = precision
            best_recall = recall
            best_f1 = f1

print("Best K:", best_k)
print("Best p:", best_p)
print("Best Accuracy:", best_accuracy)
print("Best Precision:", best_precision)
print("Best Recall:", best_recall)
print("Best F1-score:", best_f1)

# Confusion matrix calculation
confusion_matrix = np.zeros((len(unique_labels), len(unique_labels)), dtype=int)
for i in range(len(test_labels)):
    true_label_index = np.where(unique_labels == test_labels[i])[0][0]
    predicted_label_index = np.where(unique_labels == predictions[i])[0][0]
    confusion_matrix[true_label_index][predicted_label_index] += 1

print("\nConfusion Matrix:")
for i in range(len(unique_labels)):
    print("True Label", unique_labels[i], ":", end=" ")
    for j in range(len(unique_labels)):
        print(confusion_matrix[i][j], end=" ")
    print()



Name:- Abhishikth Boda
Roll Number:- S20210010044
Course:- Machine Learning
Section:- 2
Assignment Number:- 4
Date:- 31th August 2023

Program is being executed

Best K: 1
Best p: 1
Best Accuracy: 0.9387755102040817
Best Precision: 0.9440789473625174
Best Recall: 0.938562091497503
Best F1-score: 0.9413124361347166

Confusion Matrix:
True Label 1 : 16 0 1 
True Label 2 : 1 14 0 
True Label 3 : 1 0 16 
