In [1]:
# Increase the width of the notebook

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# import required libraries

import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import log_loss, make_scorer
from scipy.spatial import distance_matrix

import time

In [3]:
# Read Data for Train

# X Train
gesture_x_train = pd.read_csv("HW3-data/uWaveGestureLibrary_X_TRAIN", header=None, sep=" ",
                             prefix="xt", skipinitialspace=True)
gesture_x_train.rename(columns={"xt0":"class"}, inplace=True)
gesture_x_train["class"] = gesture_x_train["class"].astype("int").astype("category")

# Y Train
gesture_y_train = pd.read_csv("HW3-data/uWaveGestureLibrary_Y_TRAIN", header=None, sep=" ",
                             prefix="yt", skipinitialspace=True)
gesture_y_train.rename(columns={"yt0":"class"}, inplace=True)
gesture_y_train["class"] = gesture_y_train["class"].astype("int").astype("category")
gesture_y_train.drop(["class"], axis=1, inplace=True)

# Z Train
gesture_z_train = pd.read_csv("HW3-data/uWaveGestureLibrary_Z_TRAIN", header=None, sep=" ",
                             prefix="zt", skipinitialspace=True)
gesture_z_train.rename(columns={"zt0":"class"}, inplace=True)
gesture_z_train["class"] = gesture_z_train["class"].astype("int").astype("category")
gesture_z_train.drop(["class"], axis=1, inplace=True)

In [4]:
# Concatenate the variables for train
concat_gesture_train = pd.concat([gesture_x_train, gesture_y_train, gesture_z_train], join="inner", axis=1)
X_train = concat_gesture_train.drop("class", axis=1)
y_train = concat_gesture_train["class"]
X_train.head()

Unnamed: 0,xt1,xt2,xt3,xt4,xt5,xt6,xt7,xt8,xt9,xt10,...,zt306,zt307,zt308,zt309,zt310,zt311,zt312,zt313,zt314,zt315
0,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,...,0.523217,0.513994,0.503481,0.492967,0.474522,0.456077,0.437632,0.419187,0.400743,0.382298
1,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,...,-0.42701,-0.42701,-0.42701,-0.427172,-0.428773,-0.44072,-0.452667,-0.464613,-0.47656,-0.488507
2,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,...,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717
3,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,...,-0.187384,-0.123549,-0.05587,0.011808,0.079487,0.157056,0.25374,0.445503,0.648538,0.851573
4,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,...,1.867473,1.834319,1.756,1.638582,1.521164,1.453266,1.515219,1.632637,1.750054,1.867473


In [5]:
# Read Data for Test

# X Test
gesture_x_test = pd.read_csv("HW3-data/uWaveGestureLibrary_X_TEST", header=None, sep=" ",
                             prefix="xt", skipinitialspace=True)
gesture_x_test.rename(columns={"xt0":"class"}, inplace=True)
gesture_x_test["class"] = gesture_x_test["class"].astype("int").astype("category")

# Y Test
gesture_y_test = pd.read_csv("HW3-data/uWaveGestureLibrary_Y_TEST", header=None, sep=" ",
                             prefix="yt", skipinitialspace=True)
gesture_y_test.rename(columns={"yt0":"class"}, inplace=True)
gesture_y_test["class"] = gesture_y_test["class"].astype("int").astype("category")
gesture_y_test.drop(["class"], axis=1, inplace=True)

# Z Test
gesture_z_test = pd.read_csv("HW3-data/uWaveGestureLibrary_Z_TEST", header=None, sep=" ",
                             prefix="zt", skipinitialspace=True)
gesture_z_test.rename(columns={"zt0":"class"}, inplace=True)
gesture_z_test["class"] = gesture_z_test["class"].astype("int").astype("category")
gesture_z_test.drop(["class"], axis=1, inplace=True)

In [6]:
# Concatenate the variables for test
concat_gesture_test = pd.concat([gesture_x_test, gesture_y_test, gesture_z_test], join="inner", axis=1)
X_test = concat_gesture_test.drop("class", axis=1)
y_test = concat_gesture_test["class"]
X_test.head()

Unnamed: 0,xt1,xt2,xt3,xt4,xt5,xt6,xt7,xt8,xt9,xt10,...,zt306,zt307,zt308,zt309,zt310,zt311,zt312,zt313,zt314,zt315
0,1.557018,1.557018,1.557018,1.557018,1.557018,1.557018,1.557018,1.557018,1.557018,1.557018,...,-0.348423,-0.348423,-0.344675,-0.340728,-0.333147,-0.325423,-0.3177,-0.309976,-0.302252,-0.294528
1,-0.075096,-0.075096,-0.075096,-0.075096,-0.075096,-0.075096,-0.075096,-0.075096,-0.075096,-0.075096,...,0.96135,0.849142,0.725546,0.589277,0.539075,0.51581,0.51581,0.51581,0.51581,0.51581
2,1.509012,1.526296,1.54358,1.560865,1.573811,1.576913,1.578592,1.578592,1.578592,1.578592,...,-0.565962,-0.47568,-0.399089,-0.344074,-0.289058,-0.234042,-0.179026,-0.124011,-0.068995,-0.013979
3,1.303974,1.303974,1.303974,1.303974,1.303974,1.303974,1.303974,1.303974,1.303974,1.303974,...,1.04495,1.04495,1.04495,1.04495,1.079339,1.115085,1.171001,1.227442,1.283883,1.340324
4,1.298089,1.298089,1.298089,1.298089,1.298089,1.298089,1.298089,1.298089,1.298089,1.298089,...,-0.69346,-0.689765,-0.68607,-0.682375,-0.67868,-0.674985,-0.67129,-0.667595,-0.6639,-0.660205


# TASK 1 - Nearest Neighbors Applications on the Gesture Data

### a) 

In [7]:
# Two distance measures will be used for the KNN:
# Euclidean Distance, because it is the most used distance measure (default measure of the algorithm)
# Manhattan Distance, because it is said to perform better on a high dimensional data.

In [8]:
knn_l1 = KNeighborsClassifier(p=1)
knn_l2 = KNeighborsClassifier(p=2)

In [9]:
param_grid = {"n_neighbors": list(np.arange(1,21,1))}

# n-jobs allows the computer to use all the processors simultaneously.
l1_cv_model = GridSearchCV(knn_l1, cv=10, n_jobs=-1, verbose=2, param_grid= param_grid)
l2_cv_model = GridSearchCV(knn_l2, cv=10, n_jobs=-1, verbose=2, param_grid= param_grid)

In [10]:
np.random.seed(582) # For consistent results

l1_cv_model.fit(X_train, y_train)
l1_cv_model.best_params_

Fitting 10 folds for each of 20 candidates, totalling 200 fits


{'n_neighbors': 1}

In [11]:
np.random.seed(582) # For consistent results

l2_cv_model.fit(X_train, y_train)
l2_cv_model.best_params_

Fitting 10 folds for each of 20 candidates, totalling 200 fits


{'n_neighbors': 3}

### For Manhattan Distance, k=1 gives the least error.
### For Euclidean Distance, k=3 gives the least error.

### b)

In [12]:
# Perform predictions on the test data with the tuned models and check the model runtimes.
start = time.time()
y_pred_l1 = l1_cv_model.best_estimator_.predict(X_test)
end = time.time()
print("Runtime For KNN with L1-norm: {:.3f} seconds".format(end-start))

start = time.time()
y_pred_l2 = l2_cv_model.best_estimator_.predict(X_test)
end = time.time()
print("Runtime For KNN with L2-norm: {:.3f} seconds".format(end-start))

Runtime For KNN with L1-norm: 3.628 seconds
Runtime For KNN with L2-norm: 0.495 seconds


In [13]:
confusion_df_l1 = pd.DataFrame({"Actual":y_test, "Predicted":y_pred_l1})
print("Confusion Matrix For KNN with L1-norm and k=1")
c_matrix_l1 = pd.crosstab(confusion_df_l1["Actual"], confusion_df_l1["Predicted"])
c_matrix_l1

Confusion Matrix For KNN with L1-norm and k=1


Predicted,1,2,3,4,5,6,7,8
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,430,0,0,2,0,5,0,0
2,1,451,0,0,0,0,0,0
3,2,0,418,0,16,14,4,0
4,3,0,0,392,40,11,0,4
5,3,0,9,5,415,1,0,0
6,3,0,5,13,16,411,0,1
7,0,0,2,0,0,0,445,0
8,0,0,0,3,1,0,0,456


In [14]:
true_pred_l1=0
for i in range(0,8):
    true_pred_l1+= c_matrix_l1.iloc[i].iloc[i]
acc_score_l2 = true_pred_l1/c_matrix_l1.sum().sum()
print("Accuracy Score for KNN with L1 Norm is: {:.3f}".format(acc_score_l2))

Accuracy Score for KNN with L1 Norm is: 0.954


In [15]:
confusion_df_l2 = pd.DataFrame({"Actual":y_test, "Predicted":y_pred_l2})
print("Confusion Matrix For KNN with L2-norm and k=3")
c_matrix_l2 = pd.crosstab(confusion_df_l2["Actual"], confusion_df_l2["Predicted"])
c_matrix_l2

Confusion Matrix For KNN with L2-norm and k=3


Predicted,1,2,3,4,5,6,7,8
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,431,0,0,2,0,4,0,0
2,1,449,0,0,0,0,2,0
3,2,0,413,0,15,20,4,0
4,7,0,0,370,60,6,0,7
5,3,0,6,1,422,1,0,0
6,6,0,7,15,28,392,1,0
7,0,0,1,0,0,0,446,0
8,0,0,0,1,1,0,0,458


In [16]:
true_pred_l2=0
for i in range(0,8):
    true_pred_l2+= c_matrix_l2.iloc[i].iloc[i]
acc_score_l2 = true_pred_l2/c_matrix_l2.sum().sum()
print("Accuracy Score for KNN with L2 Norm is: {:.3f}".format(acc_score_l2))

Accuracy Score for KNN with L2 Norm is: 0.944


### c)

### In my opinion, it is not reasonable to weight the different axis equally. I think that it makes sense to weight the distances over different axes. For example, to weight x and y axis more than the z-axis might perform better because gestures are defined by how they accelerate at these axes (Which can be see on the Figure 1 of the HW3 instructions). Information of the z axis is more like an extra information used due to the fact that people do not present gestures perfectly perpendicular to their devices.

# TASK 2 - Logistic Regression Applications on the Gesture Data

In [17]:
# Convert targets to indicate if the class is 3 or not.
y_train_class3 = (y_train==3)*1
y_test_class3 = (y_test==3)*1

### a)

In [18]:
# Find the ratio of Class 3 instances in the training data
ratio_class3 = y_train_class3.sum()/len(y_train)
ratio_class3

0.11830357142857142

In [19]:
# Create Logistic Regression model with no penalization.
lr_model = LogisticRegression(max_iter=100000, penalty='none')
lr_model.fit(X_train, y_train_class3)

LogisticRegression(max_iter=100000, penalty='none')

In [20]:
# Get prediction probabilities of the test data and convert them to binary predictions by using the Class3 ratio as threshold.
y_pred_prob = lr_model.predict_proba(X_test)
y_pred_lr = (y_pred_prob[:,1]>ratio_class3)*1

In [21]:
confusion_lr = pd.DataFrame({"Actual":y_test_class3, "Predicted":y_pred_lr})
print("Confusion Matrix For Logistic Regression on Class3")
c_matrix_lr = pd.crosstab(confusion_lr["Actual"], confusion_lr["Predicted"])
c_matrix_lr

Confusion Matrix For Logistic Regression on Class3


Predicted,0,1
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2953,175
1,165,289


In [22]:
# Calculate the accuracy of the LR model
accuracy = (c_matrix_lr[0][0] + c_matrix_lr[1][1])/(c_matrix_lr[0][0] + c_matrix_lr[1][1] + c_matrix_lr[1][0] + c_matrix_lr[0][1])
accuracy

0.9050809603573423

### Accuracy for LR with no penalty is 0.905

### b)

In [23]:
# Create LR model with Lasso Penalty
lr_lasso_model = LogisticRegression(max_iter=100000, penalty='l1', solver='liblinear')

In [24]:
# Perform 10-fold Cross-Validation with the Binomial Deviance as the loss function
param_grid = {'C': [0.0001, 0.001, 0.05, 0.01, 0.5, 0.1, 1, 5, 10, 100]}
LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
lr_lasso_cv_model = GridSearchCV(lr_lasso_model, cv=10, n_jobs=-1, verbose=2, param_grid=param_grid, scoring=LogLoss)
lr_lasso_cv_model.fit(X_train, y_train_class3)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


GridSearchCV(cv=10,
             estimator=LogisticRegression(max_iter=100000, penalty='l1',
                                          solver='liblinear'),
             n_jobs=-1,
             param_grid={'C': [0.0001, 0.001, 0.05, 0.01, 0.5, 0.1, 1, 5, 10,
                               100]},
             scoring=make_scorer(log_loss, greater_is_better=False, needs_proba=True),
             verbose=2)

In [25]:
lr_lasso_cv_model.best_params_

{'C': 0.5}

### We find that best C value is 0.5 . It is defined as 'Inverse of Regularization Strength', therefore lambda= 2 is our parameter.

In [26]:
# Get prediction probabilities of the test data and convert them to binary predictions by using the Class3 ratio as threshold.

lr_lasso_tuned = lr_lasso_cv_model.best_estimator_
y_pred_prob = lr_lasso_tuned.predict_proba(X_test)
y_pred_lr_lasso = (y_pred_prob[:,1]>ratio_class3)*1

In [27]:
confusion_lr_lasso = pd.DataFrame({"Actual":y_test_class3, "Predicted":y_pred_lr_lasso})
print("Confusion Matrix For Logistic Regression w/Lasso on Class3")
c_matrix_lr_lasso = pd.crosstab(confusion_lr_lasso["Actual"], confusion_lr_lasso["Predicted"])
c_matrix_lr_lasso

Confusion Matrix For Logistic Regression w/Lasso on Class3


Predicted,0,1
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2871,257
1,33,421


In [28]:
# Calculate the accuracy score for the penalized LR
accuracy_lasso = (c_matrix_lr_lasso[0][0] + c_matrix_lr_lasso[1][1])/(c_matrix_lr_lasso[0][0] + c_matrix_lr_lasso[1][1] + c_matrix_lr_lasso[1][0] + c_matrix_lr_lasso[0][1])
accuracy_lasso

0.9190396426577331

### Accuracy for LR with L1 penalization is 0.919

### Logistic Regression model with Lasso regularization has a better accuracy score and it is better at predicting the Class3 gestures. However, Logistic Regression in part a) is better at detecting the observations that are not Class 3.

In [29]:
# Print the coefficients of the LR with Lasso penalty, ones that have positive values.
lr_lasso_coefs = pd.DataFrame({"Coef":lr_lasso_tuned.coef_[0]}, index=X_train.columns)
lr_lasso_coefs[lr_lasso_coefs["Coef"]>0]

Unnamed: 0,Coef
xt127,0.044427
xt128,0.014068
xt208,0.091109
xt235,0.131123
xt236,0.334157
xt237,0.27788
xt238,0.19276
xt248,0.083842
xt249,0.146576
xt258,0.373261


### We see that nearly half of the significant features are from the x-axis. It makes sense because if we look at the shape of the Class 3 gesture, it is a straight line from left to right. Therefore, we mostly need x-axis coordinate to define it. Also, another interesting thing is that initial timepoints are not significant (s.t. xt127, yt160 and zt87 are the earliest timepoints for the respective axes) at determining if a gesture belongs to class 3. We need y values from t=160 to t=231. In my opinion, it is because if there is no vertical movement in the middle of the gesture, it is more likely that the gesture is performed on the horizontal line (Class 3 or Class 4) and thus, it narrows down the choice of the gestures.

### c)

In [30]:
# Create copies of original datasets
X_train_copy = X_train.copy()
X_test_copy = X_test.copy()

In [31]:
X_train_copy.head()

Unnamed: 0,xt1,xt2,xt3,xt4,xt5,xt6,xt7,xt8,xt9,xt10,...,zt306,zt307,zt308,zt309,zt310,zt311,zt312,zt313,zt314,zt315
0,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,-0.304243,...,0.523217,0.513994,0.503481,0.492967,0.474522,0.456077,0.437632,0.419187,0.400743,0.382298
1,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,1.627311,...,-0.42701,-0.42701,-0.42701,-0.427172,-0.428773,-0.44072,-0.452667,-0.464613,-0.47656,-0.488507
2,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,0.661277,...,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717,-0.862717
3,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,0.005185,...,-0.187384,-0.123549,-0.05587,0.011808,0.079487,0.157056,0.25374,0.445503,0.648538,0.851573
4,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,1.286198,...,1.867473,1.834319,1.756,1.638582,1.521164,1.453266,1.515219,1.632637,1.750054,1.867473


In [32]:
# Create Distance Matrices

X_train_dist = pd.DataFrame(distance_matrix(X_train_copy, X_train_copy, p=2))
X_test_dist = pd.DataFrame(distance_matrix(X_test_copy, X_train_copy, p=2))

In [33]:
X_test_dist

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,886,887,888,889,890,891,892,893,894,895
0,51.687559,13.263419,28.321555,39.541255,34.286788,38.211025,36.045863,48.764229,37.568076,59.428712,...,44.366890,49.494730,32.777330,51.240891,35.257192,43.642435,51.738237,41.011374,41.870528,28.349480
1,34.947350,54.334337,54.694516,46.732487,45.800128,46.545913,41.582900,37.517528,46.173264,31.870273,...,37.760887,42.294340,44.207834,38.800478,50.753346,42.983088,38.362504,44.871331,37.517618,55.648530
2,42.171916,25.570757,35.860929,42.167440,28.760080,37.984422,41.091221,44.311017,31.095472,50.164764,...,46.952663,50.504509,30.280558,39.453718,45.695170,47.538985,50.386797,40.439901,43.688492,35.511051
3,41.876963,34.269027,41.884193,44.840555,25.517543,43.336345,38.773427,38.422948,20.204116,50.381287,...,45.197983,53.934857,28.368976,42.632988,48.775380,49.697657,44.197285,42.866915,42.667953,41.614170
4,49.642237,14.399477,16.490769,41.308488,32.737889,37.112181,44.964582,47.155949,37.856534,57.156578,...,47.806687,46.909740,32.783804,49.979809,34.794183,40.932294,53.618109,46.638917,47.715370,22.199250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3577,46.919133,46.765478,51.150679,42.750231,49.449106,51.989018,32.597605,43.849780,47.833537,39.578175,...,24.917959,36.331291,46.715895,42.166525,44.962159,45.163310,41.930947,42.661861,33.012213,50.092056
3578,47.290969,46.446540,52.547933,41.966591,48.338859,51.043864,29.589466,45.947603,47.185385,41.416932,...,25.853045,41.150591,46.721835,42.480450,45.848125,46.424821,43.050711,43.155771,30.219649,50.473456
3579,30.910078,56.796213,53.676871,43.537100,48.571039,50.366663,48.056857,38.580897,47.209050,9.944823,...,41.886535,33.476275,49.285624,32.852784,49.330139,41.213599,37.312409,45.346724,43.464060,54.690024
3580,49.808545,37.031432,41.234293,37.913667,38.738186,24.698258,43.717791,49.216430,41.037846,49.819356,...,47.958613,51.547784,39.629474,40.324372,42.509035,54.469956,39.716742,40.802395,43.870367,33.544740


In [34]:
# Create LR model with Lasso Penalty
lr_lasso_dist_model = LogisticRegression(max_iter=100000, penalty='l1', solver='liblinear')

In [35]:
# 10-fold Cross-Validation with distance matrix as input and Binomial Deviance as the loss function

param_grid = {'C': [0.0001, 0.001, 0.05, 0.01, 0.5, 0.1, 1, 5, 10, 100]}
LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
lr_lasso_dist_cv_model = GridSearchCV(lr_lasso_dist_model, cv=10, n_jobs=-1, verbose=2, param_grid=param_grid, scoring=LogLoss)
lr_lasso_dist_cv_model.fit(X_train_dist, y_train_class3)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


GridSearchCV(cv=10,
             estimator=LogisticRegression(max_iter=100000, penalty='l1',
                                          solver='liblinear'),
             n_jobs=-1,
             param_grid={'C': [0.0001, 0.001, 0.05, 0.01, 0.5, 0.1, 1, 5, 10,
                               100]},
             scoring=make_scorer(log_loss, greater_is_better=False, needs_proba=True),
             verbose=2)

In [36]:
lr_lasso_dist_cv_model.best_params_

{'C': 5}

### We find that best C value is 5 . It is defined as 'Inverse of Regularization Strength', therefore lambda= 1/5 is our parameter.

In [37]:
# Get prediction probabilities of the test data and convert them to binary predictions by using the Class3 ratio as threshold.

lr_lasso_dist_tuned = lr_lasso_dist_cv_model.best_estimator_
y_pred_prob_dist = lr_lasso_dist_tuned.predict_proba(X_test_dist)
y_pred_lr_lasso_dist = (y_pred_prob_dist[:,1]>ratio_class3)*1

In [38]:
confusion_lr_lasso_dist = pd.DataFrame({"Actual":y_test_class3, "Predicted":y_pred_lr_lasso_dist})
print("Confusion Matrix For Logistic Regression w/Lasso on Class3")
c_matrix_lr_lasso_dist = pd.crosstab(confusion_lr_lasso_dist["Actual"], confusion_lr_lasso_dist["Predicted"])
c_matrix_lr_lasso_dist

Confusion Matrix For Logistic Regression w/Lasso on Class3


Predicted,0,1
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3099,29
1,24,430


In [39]:
# Calculate the accuracy for the LR with distance as input
accuracy_dist = (c_matrix_lr_lasso_dist[0][0] + c_matrix_lr_lasso_dist[1][1])/(c_matrix_lr_lasso_dist[0][0] + c_matrix_lr_lasso_dist[1][1] + c_matrix_lr_lasso_dist[1][0] + c_matrix_lr_lasso_dist[0][1])
accuracy_dist

0.9852037967615858

### Accuracy for the LR with distance matrix as input and L1 penalization is 0.9852

In [40]:
# Print the coefficients of the LR with Lasso penalty, ones that have positive values.
lr_lasso_dist_coefs = pd.DataFrame({"Coef":lr_lasso_dist_tuned.coef_[0]}, index=X_train_dist.columns)
lr_lasso_dist_coefs[lr_lasso_dist_coefs["Coef"]>0]

Unnamed: 0,Coef
1,0.010742
2,0.010108
6,0.000027
7,0.002900
9,0.063880
...,...
889,0.008591
890,0.097688
892,0.000037
894,0.004418


### Regression coefficients are the scalers for the distances between the test instance and the corresponding training instances. For example, if we choose a point from the test data that belongs to the class 3 (such as the 12th point), and multiply its distances (from the training data instances) with regression coefficients, sum these multiplications and transform the result with the logit function, we should get a probability that is close to 1, because class 3 is our target class (and model has an almost perfect accuracy).

In [41]:
# Test points that belongs to class 3. (We pick the 12th point for demonstration)
(y_test.where(y_test==3).dropna())[:10]

12    3
19    3
24    3
28    3
50    3
56    3
59    3
63    3
67    3
98    3
Name: class, dtype: category
Categories (8, int64): [1, 2, 3, 4, 5, 6, 7, 8]

In [42]:
x = (lr_lasso_dist_coefs["Coef"]*X_test_dist.loc[12]).sum()
1/(1+np.e**-x)

0.9741422285207803

##### As we see, the resulting probability is 0.974.

### d) Comparisons of the Models

#### Accuracy Score for KNN with L1 Norm is: 0.954
#### Accuracy Score for KNN with L2 Norm is: 0.944
#### Accuracy for LR with no penalty is 0.905
#### Accuracy for LR with L1 penalization is 0.919
#### Accuracy for the LR with distance matrix as input and L1 penalization is 0.9852

### As we see, KNN models outperformed the Logistic Regression models by default. This is due to the relation between the inputs and output of the data, which is nonlinear, and KNN algorithm is a distance-based algorithm and can catch these types of relations. However, when we introduce nonlinearity to the Logistic Regression with the distance matrix as an input, it's performance drastically increased to almost 1, which is better than the both KNN models.

In [43]:
# This line is used to create html version of the notebook.

In [45]:
import os

os.system('jupyter nbconvert --to html HW3.ipynb')

0