# KNN Exhibition

In [1]:
# Import packages used throughout
import numpy as np
import matplotlib.pyplot as plt
from csv import reader

from sklearn import model_selection
from from_scratch.import_data import train_test_split

## Part 1: Class Prediction

In [2]:
# Step 0: Import the necessary classes
from from_scratch.knn import KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_blobs

### Section 1a: Blobs

In [None]:
# Step 1: Import data
centers = [(-2, -2, -2), (-2, 2, 2), (2, 2, 2)]
clusters_std = [2, 2, 2]

features, targets = make_blobs(n_samples=90, 
                               cluster_std=clusters_std,
                               centers=centers,
                               n_features=3)

In [None]:
# Step 2: Definite viasualization function
def visualize_blobs(train_features, train_targets, test_features, test_targets, predictions):
    ax = plt.axes(projection="3d")

    ### Training data
    train_cluster0 = train_features[:, train_targets[0, :] == 0]
    train_cluster1 = train_features[:, train_targets[0, :] == 1]
    train_cluster2 = train_features[:, train_targets[0, :] == 2]

    ### Correct predictions for each of the three classes
    test_cluster0_true = test_features[:, np.logical_and(predictions[0, :] == 0,
     test_targets[0, :] == 0)]
    test_cluster1_true = test_features[:, np.logical_and(predictions[0, :] == 1,
     test_targets[0, :] == 1)]
    test_cluster2_true = test_features[:, np.logical_and(predictions[0, :] == 2,
     test_targets[0, :] == 2)]

    ### Incorrect predictions for each of the three classes
    test_cluster0_false = test_features[:, np.logical_and(predictions[0, :] == 0,
          test_targets[0, :] != 0)]
    test_cluster1_false = test_features[:, np.logical_and(predictions[0, :] == 1,
          test_targets[0, :] != 1)]
    test_cluster2_false = test_features[:, np.logical_and(predictions[0, :] == 2,
          test_targets[0, :] != 2)]

    ### Class 1 scatterplots
    ax.scatter(train_cluster0[0, :], train_cluster0[1, :], train_cluster0[2, :],
        c='b', marker='o', label="Class 1 - Training")
    ax.scatter(test_cluster0_true[0, :], test_cluster0_true[1, :], test_cluster0_true[2, :],
        c='b', marker='D', label="Class 1 - Predicted (Correct)")
    ax.scatter(test_cluster0_false[0, :], test_cluster0_false[1,:], test_cluster0_false[2, :],
        c='b', marker='x', label="Class 1 - Predicted (Incorrect)")

    ### Class 2 scatterplots
    ax.scatter(train_cluster1[0, :], train_cluster1[1, :], train_cluster1[2, :],
        c='g', marker='o', label="Class 2 - Training")
    ax.scatter(test_cluster1_true[0, :], test_cluster1_true[1,:], test_cluster1_true[2, :],
        c='g', marker='D', label="Class 2 - Predicted (Correct)")
    ax.scatter(test_cluster1_false[0, :], test_cluster1_false[1,:], test_cluster1_false[2, :],
        c='g', marker='x', label="Class 2 - Predicted (Incorrect)")

    ### Class 3 scatterplots
    ax.scatter(train_cluster2[0, :], train_cluster2[1, :], train_cluster2[2, :],
        c='r', marker='o', label="Class 3 - Training")
    ax.scatter(test_cluster2_true[0, :], test_cluster2_true[1,:], test_cluster2_true[2, :],
        c='r', marker='D', label="Class 3 - Predicted (Correct)")
    ax.scatter(test_cluster2_false[0, :], test_cluster2_false[1,:], test_cluster2_false[2, :],
        c='r', marker='x', label="Class 3 - Predicted (Incorrect)")

    ### Set up plot
    ax.legend(bbox_to_anchor=(-0.35, 1), loc="upper left", fontsize="xx-small")
    ax.set_title("Feature 3 over (Feature 1, Feature 2) with Class Predictions")
    ax.set_xlabel("Feature 1", fontsize = 12)
    ax.set_ylabel("Feature 2", fontsize = 12)
    ax.set_zlabel("Feature 3", fontsize = 12)
    plt.show()

#### From Scratch Implementation

In [None]:
# Step 2: Extract training and test features/targets
train_features, train_targets, test_features, test_targets = train_test_split(
    features.T, targets.reshape((1, targets.shape[0])))

In [None]:
# Step 3: Fit a KNN model to the data and visualize
# %matplotlib qt <- ENABLE TO ROTATE
%matplotlib inline

## Create and fit model
model = KNN(n_neighbors = 4)
model.fit(train_features, train_targets)
predictions = model.predict(test_features)

## Visualize
visualize_blobs(train_features, train_targets, test_features, test_targets, predictions)

#### Scikit Implementation

In [None]:
# Step 2: Extract training and test features/targets
train_features, test_features, train_targets, test_targets = model_selection.train_test_split(features, targets, train_size = 0.8, test_size = 0.2)

In [None]:
# Step 3: Fit a KNN model to the data and visualize
# %matplotlib qt <- ENABLE TO ROTATE
%matplotlib inline

## Create and fit model
model = KNeighborsClassifier(n_neighbors = 4, metric = "euclidean")
model.fit(train_features, train_targets)
predictions = model.predict(test_features)

## Visualize
visualize_blobs(train_features.T,
 train_targets.reshape((1, train_targets.shape[0])),
 test_features.T,
 test_targets.reshape((1, test_targets.shape[0])),
 predictions.reshape((1, predictions.shape[0])))

### Section 2a: IRIS.csv

In [18]:
# Step 1: Import "IRIS.csv"
with open("IRIS.csv", 'r') as file:
    csv_reader = reader(file)
    feature_names = list(next(csv_reader))
    data = np.array(list(csv_reader))

In [19]:
# Step 2: Wrangle data
feature_names = feature_names[:-1]
features = data[:,:-1].astype("float").T
targets = data[:,-1]
targets = targets.reshape((1, targets.shape[0]))

#### From Scratch Implementation

In [20]:
# Step 3: Extract traing and test features/targets
train_features, train_targets, test_features, test_targets = train_test_split(features, targets)

In [21]:
# Step 4: Fit a KNN model to the data and predict values
model = KNN(n_neighbors=5)
model.fit(train_features, train_targets)
predictions = model.predict(test_features)

print("Accuracy = " + str(100 * np.sum(predictions == test_targets)/predictions.shape[1]) + "%")

Accuracy = 96.66666666666667%


Scikit  Implementation

In [24]:
# Step 3: Extract training and test features/targets
train_features, test_features, train_targets, test_targets = model_selection.train_test_split(features.T, targets.T, train_size = 0.8, test_size = 0.2)

In [25]:
# Step 4: Fit a KNN model to the data and predict values
model = KNeighborsClassifier(n_neighbors = 4, metric = "euclidean")
model.fit(train_features, train_targets)
predictions = model.predict(test_features)

print("Accuracy = " + str(100 * np.sum(predictions == test_targets.flatten())/predictions.shape[0]) + "%")

Accuracy = 96.66666666666667%


## Part 2: Collaborative Filter