In [2]:
from sklearn.datasets import fetch_openml
from sklearn.neighbors import KNeighborsClassifier
from joblib import Parallel, delayed

In [3]:
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X = X / 255.0

In [4]:
# rescale the data, use the traditional train/test split
# (60K: Train) and (10K: Test)
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

# Parameters

### Neighbours (3 parameters)
1. n_neighbors = 3
2. n_neighbors = 5
3. n_neighbors = 7


### Weights function used for perdiction (2 parameters taken)
1. ‘uniform’ : uniform weights. All points in each neighborhood are weighted equally.
2. ‘distance’ : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away.

### Algorithm used to compute NN (Just 3 taken as auto basically selects one of the 3)

1. ‘ball_tree’ will use BallTree
2. ‘kd_tree’ will use KDTree
3. ‘brute’ will use a brute-force search.
4. ‘auto’ will attempt to decide the most appropriate algorithm based on the values passed to fit method.

Note: fitting on sparse input will override the setting of this parameter, using brute force.

### p values p = 1 manhattan_distance and p = 2 euclidean_distance (2 parameters)
1. p = 1
2. p = 2

### leaf size, this parameters will only affect if we select KDTree Algo (will do this if I have time)
1. Leaf size = 30
2. Leaf size = 50
3. Leaf size  = 100


In [5]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='uniform', p=1, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.036699999999999955


In [45]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=5, weights='uniform', p=1, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03820000000000001


TypeError: ignored

In [46]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=7, weights='uniform', p=1, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03849999999999998


In [47]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='distance', p=1, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03600000000000003


In [48]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='distance', p=1, algorithm='ball_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03600000000000003


In [49]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='distance', p=1, algorithm='brute', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03600000000000003


In [50]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='uniform', p=2, algorithm='brute', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.02949999999999997


In [51]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='distance', p=2, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.028299999999999992


In [52]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=5, weights='uniform', p=2, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.031200000000000006


In [5]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='uniform', p=2, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.02949999999999997


In [6]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=5, weights='uniform', p=2, algorithm='brute', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.031200000000000006


In [9]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=7, weights='uniform', p=2, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03059999999999996


In [10]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=5, weights='distance', p=2, algorithm='kd_tree', n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03090000000000004


In [11]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=3, weights='distance', p=2, algorithm='kd_tree', leaf_size=60, n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.028299999999999992


In [12]:
# Doing each parameters in single code block just in case I screw something up
neigh3uniform1kdtree = KNeighborsClassifier(n_neighbors=5, weights='distance', p=2, algorithm='kd_tree', leaf_size=60, n_jobs=-1)
neigh3uniform1kdtree.fit(X_train, y_train)
print(1.0 - neigh3uniform1kdtree.score(X_test, y_test))

0.03090000000000004
