In [None]:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler


In [None]:
dataset = np.load("dataset.npz")
X, y = dataset["X"], dataset["y"]

In [None]:
random_state = 42
X_scaled = X / 255.0 

In [None]:
#vislualize

#chosen image
k=10

shape = (X[k].reshape(20,20))
flatten = shape.flatten()

print(f"Label for image {k}: {y[k]}")
plt.imshow(shape, vmin=0, vmax=255, cmap="gray")
plt.show()

# KNN

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=random_state)

param_grid_knn = {
    'n_neighbors': [2, 3, 4, 5, 6, 7],
    'metric': ['euclidean', 'manhattan', 'minkowski', 'chebyshev']
}

knn = KNeighborsClassifier()

grid_knn = GridSearchCV(knn, param_grid_knn, cv=5, scoring='f1_weighted', n_jobs=-1)
grid_knn.fit(x_train, y_train)

best_params_knn = grid_knn.best_params_
print(f"Best KNN parameters: {best_params_knn}")
knn = KNeighborsClassifier(**best_params_knn)

knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
score = knn.score(x_test, y_test)
print(f"KNN Classifier score: {score * 100:.2f}%") 


## Visualisation of parameters 

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

results = grid_knn.cv_results_
scores = results['mean_test_score']
params = results['params']

df = pd.DataFrame(params)
df['score'] = scores

pivot = df.pivot(index='n_neighbors', columns='metric', values='score')

bubble_to_hot_pink = LinearSegmentedColormap.from_list(
    "bubble_to_hot_pink",
    ["#fec1d9", "#ffa6cb", "#ff82c3", "#ff0080ca"], 
    N=256 
)

plt.figure(figsize=(8, 6))
sns.heatmap(pivot, annot=True, fmt=".3f", cmap=bubble_to_hot_pink)
plt.title("KNN Grid Search Mean F1 Score", fontsize=14)
plt.ylabel("n_neighbors")
plt.xlabel("metric")
plt.show()



## Missclassified images for KNN

In [None]:

import matplotlib.pyplot as plt

misclassified = np.where(y_pred != y_test)[0]

for idx in misclassified[:10]:
    img = x_test[idx].reshape(20, 20) * 255 
    plt.imshow(img, vmin=0, vmax=255, cmap="gray")
    plt.title(f"True: {y_test[idx]}, Predicted: {y_pred[idx]}")
    plt.show()

# Corrupted Dataset

In [None]:
dataset = np.load("dataset_corrupted.npz")
X, y = dataset["X"], dataset["y"]

In [None]:
#MLP

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=random_state)

mlp = MLPClassifier(max_iter=1000, solver='adam', random_state=random_state, early_stopping=True)

paramgrid_mlp = {
    'max_iter': [500, 1000, 2000, 3000],
    'hidden_layer_sizes': [(100,50), (200,100,50), (300,200,100), (400,300,200,100)],
    'alpha': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0001, 0.001, 0.01],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.001, 0.01, 0.05]
}

grid_mlp = GridSearchCV(mlp, paramgrid_mlp, cv=5, n_jobs=-1)
grid_mlp.fit(X_train, y_train)



y_pred_mlp = grid_mlp.predict(X_test)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)

print("Best parameters for MLP:", grid_mlp.best_params_)
print("Best MLP Accuracy:", accuracy_mlp)

#print training accuracy
y_train_pred_mlp = grid_mlp.predict(X_train)
accuracy_train_mlp = accuracy_score(y_train, y_train_pred_mlp)
print("Training Accuracy:", accuracy_train_mlp)

#print test accuracy
print("Test Accuracy:", accuracy_mlp)

plt.plot(grid_mlp.best_estimator_.loss_curve_)
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("MLP Training Loss Curve")
plt.show()