In [9]:
# Lab Assignment 2: K-Nearest Neighbors vs Radius Neighbors
# Name: Nischal Joshi
# Course: MSCS 634 Advanced Data Mining and Big Data. 
# Lab Title: Classification with KNN and RNN using the Wine Dataset

In [None]:
# Importing the libraries for the task
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Loading the wine dataset from Sklearn
wine = load_wine()
X = wine.data
y = wine.target

# Convert to DataFrame for exploration
df = pd.DataFrame(X, columns= wine.feature_names)
df['target'] = y

# Basic Exploration
print("Dataset shape:", df.shape)
print("\nClass Distribution:")
print(df['target'].value_counts())
df.head()

In [None]:
## Spliting the data as Train-test(80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Utilizing feature scaling.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("train test Completed");

In [None]:
#Implementing K Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

k_values = [1, 5, 11, 15, 21]
knn_accuracies = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    knn_accuracies.append(acc)
    print(f"K={k}, Accuracy={acc:.4f}")

In [None]:
from sklearn.neighbors import RadiusNeighborsClassifier

r_values = [350, 400, 450, 500, 550, 600]
rnn_accuracies = []

for r in r_values:
    rnn = RadiusNeighborsClassifier(radius=r)
    try:
        rnn.fit(X_train_scaled, y_train)
        y_pred = rnn.predict(X_test_scaled)
        acc = accuracy_score(y_test, y_pred)
    except ValueError:
        acc = 0  # RNN fails where no neighbor is in the re
    rnn_accuracies.append(acc)
    print(f"Radius={r}, Accuracy={acc:.4f}")


In [None]:
#Ploting for KNN
plt.figure(figsize=(10, 5))
plt.plot(k_values, knn_accuracies, marker='o', color='red', label='KNN Accuracy')
plt.title('KNN-Accuracy vs K-Value')
plt.xlabel('No of Neighbors (K)')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting for RNN
plt.figure(figsize=(10, 5))
plt.plot(r_values, rnn_accuracies, marker='s', color='green', label='RNN Accuracy')
plt.title('RNN-Accuracy vs Radius-Value')
plt.xlabel('Radius')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.show()

In [1]:
# Analysis and Observations

# Performance Summary
# KNN Results Analysis
best_knn_idx = np.argmax(knn_accuracies)
worst_knn_idx = np.argmin(knn_accuracies)

print("KNN Performance Analysis:")
print(f" Best Performance: K={k_values[best_knn_idx]}, Accuracy={knn_accuracies[best_knn_idx]:.4f}")
print(f" Worst Performance: K={k_values[worst_knn_idx]}, Accuracy={knn_accuracies[worst_knn_idx]:.4f}")
print(f" Performance Range: {max(knn_accuracies) - min(knn_accuracies):.4f}")
print(f" Average Accuracy: {np.mean(knn_accuracies):.4f}")

# RNN Results Analysis
valid_rnn_accuracies = [acc for acc in rnn_accuracies if acc > 0]
failed_predictions = rnn_accuracies.count(0)

if valid_rnn_accuracies:
    best_rnn_idx = rnn_accuracies.index(max(valid_rnn_accuracies))
    print(f"\nRNN Performance Analysis:")
    print(f" Best Performance: Radius={r_values[best_rnn_idx]}, Accuracy={max(valid_rnn_accuracies):.4f}")
    print(f" Failed Predictions: {failed_predictions}/{len(r_values)} radius values")
    print(f" Success Rate: {len(valid_rnn_accuracies)}/{len(r_values)} ({len(valid_rnn_accuracies)/len(r_values)*100:.1f}%)")
    if len(valid_rnn_accuracies) > 1:
        print(f" Performance Range (valid): {max(valid_rnn_accuracies) - min(valid_rnn_accuracies):.4f}")
    print(f" Average Accuracy (valid only): {np.mean(valid_rnn_accuracies):.4f}")
else:
    print(f"\nRNN Performance Analysis:")
    print(f" All radius values failed to make predictions")



NameError: name 'np' is not defined

In [None]:
#Parameter Sensitivity Analysis
# KNN Sensitivity
knn_std = np.std(knn_accuracies)
print(f" KNN Parameter Sensitivity:")
print(f" Standard Deviation: {knn_std:.4f}")
print(f" Coefficient of Variation: {knn_std/np.mean(knn_accuracies)*100:.2f}%")
if knn_std < 0.05:
    knn_sensitivity = "Low"
elif knn_std < 0.1:
    knn_sensitivity = "Moderate"
else:
    knn_sensitivity = "High"
print(f" Sensitivity Level: {knn_sensitivity}")

# RNN Sensitivity
if len(valid_rnn_accuracies) > 1:
    rnn_std = np.std(valid_rnn_accuracies)
    print(f"\nRNN Parameter Sensitivity:")
    print(f"Standard Deviation (valid): {rnn_std:.4f}")
    print(f"Coefficient of Variation: {rnn_std/np.mean(valid_rnn_accuracies)*100:.2f}%")
    print(f"Failure Rate: {failed_predictions/len(r_values)*100:.1f}%")
    rnn_sensitivity = "Very High (due to failures)"
else:
    rnn_sensitivity = "Extremely High (mostly failed)"

print(f" Sensitivity Level: {rnn_sensitivity}")