In [1]:
import numpy as np
from pprint import pprint

from sklearn.metrics import euclidean_distances, accuracy_score

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels

In [2]:
def write_answer(data, filename):
    with open(f"./output/1nn_vs_random_forest/{filename}.txt", "w") as f:
        if type(data)==list:
            f.write(" ".join(str(i) for i in data))
        else:
            f.write(str(data))

# Loading data

In [3]:
data = load_digits()
X = data["data"]
y = data["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

# 1NN

In [4]:
class OneNNClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self):
        pass

    def fit(self, X, y):

        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y
        
        return self

    def predict(self, X):

        check_is_fitted(self)
        X = check_array(X)

        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        
        return self.y_[closest]

In [5]:
onennclassifier = OneNNClassifier()
onennclassifier.fit(X_train, y_train)
prediction = onennclassifier.predict(X_test)
accuracy = accuracy_score(prediction, y_test)
print(accuracy)
write_answer(1 - accuracy, 1)

0.9622222222222222


# RandomForestClassifier

In [6]:
randomforestclassifier = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
randomforestclassifier.fit(X_train, y_train)
prediction = randomforestclassifier.predict(X_test)
accuracy = accuracy_score(prediction, y_test)
print(accuracy)
write_answer(1 - accuracy, 2)

0.9333333333333333
