In [63]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [51]:
X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv').values.ravel()
y_test = pd.read_csv('y_test.csv').values.ravel()

In [85]:
steps = [('scaler', StandardScaler()),
        ('knn', KNeighborsClassifier())]

pipeline = Pipeline(steps)

knn_scaled = pipeline.fit(X_train, y_train)

knn_unscaled = KNeighborsClassifier().fit(X_train, y_train)

print("\n---------------------SCALED REPORT-------------------\n")

print('Accuracy with Scaling: {}'.format(knn_scaled.score(X_test, y_test)))

y_pred_scaled = knn_scaled.predict(X_test)

print('\n---Scaled confusion matrix---\n')

print(confusion_matrix(y_test, y_pred_scaled))

print('\n---Scaled classification report---\n')

print(classification_report(y_test, y_pred_scaled))


print("\n---------------------UNSCALED REPORT-------------------\n")

print('Accuracy without Scaling: {}'.format(knn_unscaled.score(X_test, y_test)))

y_pred_unscaled = knn_unscaled.predict(X_test)

print('\n---Unscaled confusion matrix---\n')

print(confusion_matrix(y_test, y_pred_unscaled))

print('\n---Unscaled classification report---\n')

print(classification_report(y_test, y_pred_unscaled))



---------------------SCALED REPORT-------------------

Accuracy with Scaling: 0.861244019138756

---Scaled confusion matrix---

[[241  25]
 [ 33 119]]

---Scaled classification report---

              precision    recall  f1-score   support

           0       0.88      0.91      0.89       266
           1       0.83      0.78      0.80       152

    accuracy                           0.86       418
   macro avg       0.85      0.84      0.85       418
weighted avg       0.86      0.86      0.86       418


---------------------UNSCALED REPORT-------------------

Accuracy without Scaling: 0.6483253588516746

---Unscaled confusion matrix---

[[243  23]
 [124  28]]

---Unscaled classification report---

              precision    recall  f1-score   support

           0       0.66      0.91      0.77       266
           1       0.55      0.18      0.28       152

    accuracy                           0.65       418
   macro avg       0.61      0.55      0.52       418
weighted avg 

In [70]:
steps = [('scaler', StandardScaler()),
         ('SVM', SVC())]

pipeline = Pipeline(steps)

parameters = {'SVM__C':[1, 10, 100],
              'SVM__gamma':[0.1, 0.01]}

cv = GridSearchCV(pipeline, parameters, cv=3)

cv.fit(X_train, y_train)

y_pred = cv.predict(X_test)

print("Accuracy: {}".format(cv.score(X_test, y_test)))

print(classification_report(y_test, y_pred))

print("Tuned Model Parameters: {}".format(cv.best_params_))

Accuracy: 0.8875598086124402
              precision    recall  f1-score   support

           0       0.88      0.96      0.92       266
           1       0.91      0.76      0.83       152

    accuracy                           0.89       418
   macro avg       0.89      0.86      0.87       418
weighted avg       0.89      0.89      0.89       418

Tuned Model Parameters: {'SVM__C': 1, 'SVM__gamma': 0.1}


In [33]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[253  13]
 [133  19]]
              precision    recall  f1-score   support

           0       0.66      0.95      0.78       266
           1       0.59      0.12      0.21       152

    accuracy                           0.65       418
   macro avg       0.62      0.54      0.49       418
weighted avg       0.63      0.65      0.57       418

