# Practical 7 Machine Learning

- Aayush Shah
- 19BCE245

## Grid Search with KNN

In [31]:
from pprint import pprint

import numpy as np
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import matplotlib.pyplot as plt

In [32]:
X, y = load_iris(return_X_y=True)

X_train, y_train = X[0:150:2, ...], y[0:150:2]
X_test, y_test = X[1:150:2, ...], y[1:150:2]

In [33]:
param_grid = {
    'n_neighbors': [1, 5, 10, 20],
    'weights'    : ['distance', 'uniform']
}

In [34]:
knn_model = KNeighborsClassifier()

In [35]:
search_model = GridSearchCV(knn_model, param_grid, n_jobs=-1, scoring='accuracy')

In [36]:
search_model.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=-1,
             param_grid={'n_neighbors': [1, 5, 10, 20],
                         'weights': ['distance', 'uniform']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [37]:
print("Best param : ",search_model.best_params_)

Best param :  {'n_neighbors': 5, 'weights': 'distance'}


In [38]:
y_pred = search_model.predict(X_test)

In [39]:
print(f"accuracy : {accuracy_score(y_test, y_pred)}")

accuracy : 0.9866666666666667


In [40]:
print("Confusion matrix : ",confusion_matrix(y_test, y_pred))

Confusion matrix :  [[25  0  0]
 [ 0 24  1]
 [ 0  0 25]]


In [41]:
print("Classification report : ",classification_report(y_test, y_pred))

Classification report :                precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       1.00      0.96      0.98        25
           2       0.96      1.00      0.98        25

    accuracy                           0.99        75
   macro avg       0.99      0.99      0.99        75
weighted avg       0.99      0.99      0.99        75



In [42]:
pprint(search_model.cv_results_)

{'mean_fit_time': array([0.00087857, 0.00068216, 0.00081472, 0.00067987, 0.00072827,
       0.00100598, 0.00064368, 0.00093093]),
 'mean_score_time': array([0.00211287, 0.00222936, 0.00172081, 0.00320048, 0.00264702,
       0.00330009, 0.00161638, 0.00227656]),
 'mean_test_score': array([0.94666667, 0.94666667, 0.96      , 0.96      , 0.96      ,
       0.94666667, 0.93333333, 0.92      ]),
 'param_n_neighbors': masked_array(data=[1, 1, 5, 5, 10, 10, 20, 20],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object),
 'param_weights': masked_array(data=['distance', 'uniform', 'distance', 'uniform',
                   'distance', 'uniform', 'distance', 'uniform'],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object),
 'params': [{'n_neighbors': 1, 'weights': 'distance'},
            {'n_neighbors': 1, 'weights': 'uniform'},
            {'n_neighbors'

In [43]:
search_model.best_index_

2

## Grid Search with SVM

In [44]:
param_grid = {
    'penalty': ['l1', 'l2'],
    'loss'   : ['hinge', 'squared_hinge'],
    'C'      : [0.5, 1.0, 2.0, 5.0, 10.0]
}

In [45]:
svc_model = LinearSVC()

In [46]:
search_model = GridSearchCV(svc_model, param_grid, n_jobs=-1, scoring='accuracy')

In [47]:
search_model.fit(X_train, y_train)



GridSearchCV(cv=None, error_score=nan,
             estimator=LinearSVC(C=1.0, class_weight=None, dual=True,
                                 fit_intercept=True, intercept_scaling=1,
                                 loss='squared_hinge', max_iter=1000,
                                 multi_class='ovr', penalty='l2',
                                 random_state=None, tol=0.0001, verbose=0),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.5, 1.0, 2.0, 5.0, 10.0],
                         'loss': ['hinge', 'squared_hinge'],
                         'penalty': ['l1', 'l2']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [48]:
print("Best param : ",search_model.best_params_)

Best param :  {'C': 2.0, 'loss': 'squared_hinge', 'penalty': 'l2'}


In [49]:
y_pred = search_model.predict(X_test)

In [50]:
print(f"accuracy : {accuracy_score(y_test, y_pred)}")

accuracy : 0.9466666666666667


In [51]:
print("Confusion matrix : ",confusion_matrix(y_test, y_pred))

Confusion matrix :  [[25  0  0]
 [ 0 24  1]
 [ 0  3 22]]


In [52]:
print("Classification report : ",classification_report(y_test, y_pred))

Classification report :                precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       0.89      0.96      0.92        25
           2       0.96      0.88      0.92        25

    accuracy                           0.95        75
   macro avg       0.95      0.95      0.95        75
weighted avg       0.95      0.95      0.95        75



In [53]:
pprint(search_model.cv_results_)

{'mean_fit_time': array([0.00065522, 0.0026547 , 0.00053196, 0.00932889, 0.00049925,
       0.00223246, 0.00066447, 0.01215959, 0.00045056, 0.00361872,
       0.00052738, 0.00588355, 0.00084724, 0.00364995, 0.00047655,
       0.010116  , 0.00050411, 0.0051692 , 0.00072131, 0.00755215]),
 'mean_score_time': array([0.        , 0.00193143, 0.        , 0.00054579, 0.        ,
       0.00043759, 0.        , 0.00106173, 0.        , 0.00055356,
       0.        , 0.00056076, 0.        , 0.00047431, 0.        ,
       0.00075426, 0.        , 0.00066524, 0.        , 0.0005393 ]),
 'mean_test_score': array([       nan, 0.86666667,        nan, 0.96      ,        nan,
       0.93333333,        nan, 0.96      ,        nan, 0.93333333,
              nan, 0.97333333,        nan, 0.94666667,        nan,
       0.97333333,        nan, 0.93333333,        nan, 0.93333333]),
 'param_C': masked_array(data=[0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0,
                   2.0, 5.0, 5.0, 5.0, 5.0, 10

In [54]:
search_model.best_index_

11