### SVM

In [None]:
##### SVM Classifier

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC


X_train_svm, X_test_svm, y_train_svm, y_test_svm = train_test_split(X_final, y, test_size=0.2, random_state=0, stratify=y)

print(X_train_svm.shape, X_test_svm.shape)

#create 10-fold CV object
kFold_svm = StratifiedKFold(n_splits=10)


# Exhaustive Grid Search
pipe_svm = Pipeline([('feature_selection', SelectFromModel(RandomForestClassifier(), threshold='median')),
                     ('scaler', StandardScaler()),
                     ('svm', SVC(random_state=0, class_weight='balanced'))])

param_grid_svm = [{'svm__C': [1, 10, 100, 1000], 'svm__kernel': ['linear']},
                  {'svm__C': [1, 10, 100, 1000], 'svm__gamma': [0.01, 0.001, 0.0001], 'svm__kernel': ['rbf']},
                  {'svm__C': [1, 10, 100, 1000], 'svm__gamma': [0.01, 0.001, 0.0001], 'svm__degree': [2, 3, 5, 7], 'svm__coef0': [0, 1, 2], 'svm__kernel': ['poly']},
                  {'svm__C': [1, 10, 100, 1000], 'svm__gamma': [0.01, 0.001, 0.0001], 'svm__coef0': [0, 1, 2], 'svm__kernel': ['sigmoid']}]

grid_svm = GridSearchCV(estimator=pipe_svm, 
                        param_grid=param_grid_svm, 
                        scoring='accuracy',
                        cv=kFold_svm, 
                        n_jobs=-1)
grid_svm.fit(X_train, y_train)

print('Best CV accuracy: {:.4f}'.format(grid_svm.best_score_))
print('Test score: {:.4f}'.format(grid_svm.score(X_test, y_test)))
print('Best parameters: {}'.format(grid_svm.best_params_))

In [None]:
# extract best params
extr_bp_svm = grid_svm.best_estimator_ 

# fit with best params
extr_bp_svm.fit(X_train_svm, y_train_svm)


In [None]:
from sklearn.metrics import confusion_matrix
from sklearn import metrics

# Predict Recommendation
extr_bp_svm.fit(X_train_svm, y_train_svm)

# Train and Test Score
print("Train score: {:.4f}".format(extr_bp_svm.score(X_train_svm, y_train_svm)))
print("Test score: {:.4f}".format(extr_bp_svm.score(X_test_svm, y_test_svm)))

# Predict Classes
y_pred_svm = extr_bp_svm.predict(X_test_svm)

# Confusion Matrix
print(metrics.confusion_matrix(y_test_svm, y_pred_svm))

# Classification report
print(metrics.classification_report(y_test_svm, y_pred_svm))


### KNN 

In [None]:
##### KNN Classifier

from sklearn import neighbors 
from sklearn.neighbors import KNeighborsClassifier

X_train_knn, X_test_knn, y_train_knn, y_test_knn = train_test_split(X_final, y, test_size=0.2, random_state=0, stratify=y)

print(X_train_svm.shape, X_test_svm.shape)

#create 10-fold CV object
kFold_knn = StratifiedKFold(n_splits=10)


# Exhaustive Grid Search
pipe_knn = Pipeline([('feature_selection', SelectFromModel(RandomForestClassifier(), threshold='median')),
                     ('scaler', StandardScaler()),
                     ('knn', KNeighborsClassifier())])

param_grid_knn = [{'knn__leaf_size': [10, 20, 30, 40, 50], 
                   'knn__n_neighbors': [5, 10, 20, 30], 
                   'knn__p': [1,2],
                   'knn__weights': ['uniform', 'distance']}]

grid_knn = GridSearchCV(estimator=pipe_knn, 
                        param_grid=param_grid_knn, 
                        scoring='accuracy',
                        cv=kFold_knn, 
                        n_jobs=-1)

grid_knn.fit(X_train_knn, y_train_knn)

print('Best CV accuracy: {:.4f}'.format(grid_knn.best_score_))
print('Test score: {:.4f}'.format(grid_knn.score(X_test_knn, y_test_knn)))
print('Best parameters: {}'.format(grid_knn.best_params_))



In [None]:
# extract best params
extr_bp_knn = grid_knn.best_estimator_ 

# fit with best params
extr_bp_knn.fit(X_train_knn, y_train_knn)


In [None]:
# Predict Recommendation
extr_bp_knn.fit(X_train_knn, y_train_knn)

# Train and Test Score
print("Train score: {:.4f}".format(extr_bp_knn.score(X_train_knn, y_train_knn)))
print("Test score: {:.4f}".format(extr_bp_knn.score(X_test_knn, y_test_knn)))

# Predict Classes
y_pred_knn = extr_bp_knn.predict(X_test_knn)

# Confusion Matrix
print(metrics.confusion_matrix(y_test_knn, y_pred_knn))

# Classification report
print(metrics.classification_report(y_test_knn, y_pred_knn))