<a href="https://colab.research.google.com/github/RaspyPiano24270/ML-Assignment_2/blob/main/ML_Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from collections import Counter
from math import dist

In [None]:
#load data
digits = load_digits()
X, y = digits.data, digits.target

#1st split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

#2nd split
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=True, random_state=42)

print(X_train.shape, X_val.shape, X_test.shape)

(1257, 64) (270, 64) (270, 64)


In [None]:
#distance function 1
def euclidean_distance(a, b):
  return np.sqrt(np.sum((a-b)**2))

#distance function 2
def manhattan_distance(a, b):
  return np.sum(np.abs(a-b))

#distance function 3
def cosine_distance(a, b):
  return 1 - np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [None]:
#KNN class skeleton
class KNNClassifier:
  def __init__(self, k=3, distance_func=euclidean_distance):
    self.k = k
    self.distance_func = distance_func

  def fit(self, X, y):
    self.X_train = X
    self.y_train = y

  def predict(self, X):
    predictions = []
    for x in X:
      distances = [self.distance_func(x, x_train) for x_train in self.X_train]
      k_indices = np.argsort(distances)[:self.k]
      k_labels = [self.y_train[i] for i in k_indices]
      predictions.append(Counter(k_labels).most_common(1)[0][0])
    return np.array(predictions)

# KNN Tests

In [None]:
#k=3, euclidean
knn = KNNClassifier(k=3, distance_func=euclidean_distance)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred))
print("F1 score:", f1_score(y_val, y_pred, average='macro'))
print("Precision:", precision_score(y_val, y_pred, average='macro'))
print("Recall:", recall_score(y_val, y_pred, average='macro'))

Accuracy: 0.9925925925925926
F1 score: 0.9937373737373738
Precision: 0.9940178571428572
Recall: 0.9935714285714287


In [None]:
#k=5, manhattan
knn = KNNClassifier(k=5, distance_func=manhattan_distance)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred))
print("F1 score:", f1_score(y_val, y_pred, average='macro'))
print("Precision:", precision_score(y_val, y_pred, average='macro'))
print("Recall:", recall_score(y_val, y_pred, average='macro'))

Accuracy: 0.9851851851851852
F1 score: 0.9863265278731662
Precision: 0.9876157407407407
Recall: 0.9854517704517705


In [None]:
#k=7, cosine
knn = KNNClassifier(k=7, distance_func=cosine_distance)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred))
print("F1 score:", f1_score(y_val, y_pred, average='macro'))
print("Precision:", precision_score(y_val, y_pred, average='macro'))
print("Recall:", recall_score(y_val, y_pred, average='macro'))

Accuracy: 0.9925925925925926
F1 score: 0.9942568542568543
Precision: 0.9940178571428572
Recall: 0.9945787545787546


#Other Algorithm Tests


In [None]:
#decision tree, max_depth=10, criterion=gini
dt = DecisionTreeClassifier(max_depth=10,criterion='gini', random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_val)

#SVM, C=1.0, kernel=rbf
svm = SVC(C=1.0, kernel='rbf')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_val)

#eval
for name, y_pred in [("Decision Tree", y_pred_dt),("SVM", y_pred_svm)]:
  print(name)
  print("Accuracy:", accuracy_score(y_val, y_pred))
  print("F1:", f1_score(y_val, y_pred, average='macro'))
  print("Precision:", precision_score(y_val, y_pred, average='macro'))
  print("Recall:", recall_score(y_val, y_pred, average='macro'))

Decision Tree
Accuracy: 0.8740740740740741
F1: 0.8731401279570659
Precision: 0.8801512565196775
Recall: 0.869648111744886
SVM
Accuracy: 0.9851851851851852
F1: 0.9854916559174857
Precision: 0.9850824642250788
Recall: 0.9862454212454214


In [None]:
#decision tree, max_depth=05, criterion=entropy
dt = DecisionTreeClassifier(max_depth=5,criterion='entropy', random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_val)

#SVM, C=2.0, kernel=linear
svm = SVC(C=2.0, kernel='linear')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_val)

#eval
for name, y_pred in [("Decision Tree", y_pred_dt),("SVM", y_pred_svm)]:
  print(name)
  print("Accuracy:", accuracy_score(y_val, y_pred))
  print("F1:", f1_score(y_val, y_pred, average='macro'))
  print("Precision:", precision_score(y_val, y_pred, average='macro'))
  print("Recall:", recall_score(y_val, y_pred, average='macro'))

Decision Tree
Accuracy: 0.8333333333333334
F1: 0.8276540661538558
Precision: 0.8557182608455953
Recall: 0.823680190776965
SVM
Accuracy: 0.9777777777777777
F1: 0.977244728102024
Precision: 0.977991452991453
Recall: 0.9768437118437119


In [None]:
#decision tree, max_depth=20, criterion=log_loss
dt = DecisionTreeClassifier(max_depth=20,criterion='log_loss', random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_val)

#SVM, C=3.0, kernel=poly
svm = SVC(C=3.0, kernel='poly')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_val)

#eval
for name, y_pred in [("Decision Tree", y_pred_dt),("SVM", y_pred_svm)]:
  print(name)
  print("Accuracy:", accuracy_score(y_val, y_pred))
  print("F1:", f1_score(y_val, y_pred, average='macro'))
  print("Precision:", precision_score(y_val, y_pred, average='macro'))
  print("Recall:", recall_score(y_val, y_pred, average='macro'))

Decision Tree
Accuracy: 0.8888888888888888
F1: 0.8857965167942871
Precision: 0.883784277504105
Recall: 0.8916519322970936
SVM
Accuracy: 0.9851851851851852
F1: 0.9857627909242476
Precision: 0.985609857978279
Recall: 0.9862454212454214
