In [17]:
import pandas as pd

train = pd.read_csv("vertigo_train.txt", sep = " ", header = None)
train.columns = ["label", "feat1", "feat2", "feat3", "feat4", "feat5"]

labels_train = train["label"]

train = train.drop("label", axis = 1)
train

Unnamed: 0,feat1,feat2,feat3,feat4,feat5
0,4,1,1,5,0
1,5,2,3,5,0
2,4,5,1,2,0
3,0,0,0,5,0
4,0,0,0,0,0
...,...,...,...,...,...
441,2,1,5,0,0
442,3,1,5,0,0
443,2,1,5,0,0
444,5,1,2,0,0


In [18]:
test = pd.read_csv("vertigo_predict.txt", sep = " ", header = None)
test.columns = ["feat1", "feat2", "feat3", "feat4", "feat5"]
test

Unnamed: 0,feat1,feat2,feat3,feat4,feat5
0,3,1,4,0,0
1,3,6,1,6,0
2,6,5,2,6,0
3,6,7,4,6,0
4,3,5,1,3,1
...,...,...,...,...,...
189,0,0,0,5,0
190,3,6,1,0,0
191,3,1,5,0,0
192,5,3,1,5,0


In [19]:
labels_test = pd.read_csv("vertigo_answers.txt", sep = " ", header = None)
labels_test.columns = ["label"]
labels_test

Unnamed: 0,label
0,6
1,4
2,3
3,3
4,5
...,...
189,1
190,1
191,2
192,3


In [20]:
from sklearn.linear_model import Perceptron

model = Perceptron()

model.fit(train, labels_train)

preds = model.predict(test)
preds

array([6, 3, 3, 3, 5, 3, 3, 3, 1, 6, 3, 3, 2, 3, 3, 2, 3, 1, 2, 1, 3, 3,
       5, 2, 5, 2, 6, 4, 1, 3, 3, 1, 5, 5, 6, 2, 3, 3, 5, 1, 6, 3, 3, 3,
       3, 2, 3, 3, 3, 3, 1, 1, 3, 1, 3, 6, 3, 3, 3, 5, 3, 3, 3, 3, 3, 1,
       3, 6, 2, 3, 3, 5, 6, 3, 3, 6, 3, 3, 6, 3, 2, 3, 3, 3, 3, 3, 3, 5,
       2, 3, 2, 2, 5, 3, 3, 3, 5, 5, 4, 3, 3, 5, 3, 3, 2, 6, 3, 2, 5, 5,
       3, 3, 6, 3, 3, 2, 1, 3, 3, 6, 3, 6, 2, 3, 3, 3, 6, 1, 1, 3, 5, 3,
       5, 2, 5, 6, 1, 1, 1, 3, 3, 6, 2, 3, 4, 3, 1, 2, 2, 3, 5, 2, 3, 3,
       3, 1, 3, 3, 3, 1, 3, 2, 1, 6, 3, 3, 5, 1, 2, 1, 3, 3, 3, 2, 1, 6,
       3, 5, 1, 3, 1, 1, 3, 1, 3, 3, 3, 1, 3, 1, 2, 6, 3, 3])

In [21]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(labels_test, preds)

print(f"Accuracy: {100 * acc:.2f} %")

Accuracy: 80.41 %


In [24]:
import warnings
warnings.filterwarnings("ignore")

for arg in [10, 50, 100, 250, 500, 1000]: # default is 1000
    model = Perceptron(max_iter = arg)
    model.fit(train, labels_train)
    preds = model.predict(test)
    acc = accuracy_score(labels_test, preds)
    print(f"Perceptron (max_iter = {arg}): {100 * acc:.2f} % correct")  

Perceptron (max_iter = 10): 73.71 % correct
Perceptron (max_iter = 50): 80.41 % correct
Perceptron (max_iter = 100): 80.41 % correct
Perceptron (max_iter = 250): 80.41 % correct
Perceptron (max_iter = 500): 80.41 % correct
Perceptron (max_iter = 1000): 80.41 % correct


In [19]:
from sklearn.neighbors import DistanceMetric

dist = DistanceMetric.get_metric("manhattan") # dist = sum(|x - y|)
dist.pairwise(train) # dist between first two rows is 4

array([[ 0.,  4.,  7., ..., 11.,  7., 10.],
       [ 4.,  0.,  9., ..., 11.,  7., 10.],
       [ 7.,  9.,  0., ..., 12.,  8., 11.],
       ...,
       [11., 11., 12., ...,  0.,  6.,  1.],
       [ 7.,  7.,  8., ...,  6.,  0.,  5.],
       [10., 10., 11., ...,  1.,  5.,  0.]])

In [25]:
from sklearn.neighbors import KNeighborsClassifier

model_knn = KNeighborsClassifier(n_neighbors = 1)
model_knn.fit(train, labels_train)
preds = model_knn.predict(test)
acc = accuracy_score(labels_test, preds)

print(f"Nearest neighbour: {100 * acc:.2f} % correct") 

Nearest neighbour: 74.74 % correct
