In [None]:
# Importing important libraries

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

pd.set_option('display.max_columns', None)
pd.set_option('display.max_row', None)

In [None]:
df_train = pd.read_csv('train.csv')
df_train.head()

In [None]:
# Checking diamenions of train data

df_train.shape

In [None]:
# Checking column wise information

df_train.info()

In [None]:
# Checking numerical attributes of train data

df_train.describe()

In [None]:
# Checking target variable 'label'

df_train['label'].value_counts().plot(kind = 'bar', edgecolor = 'black', color = 'cyan')
plt.show()

In [None]:
list(np.sort(df_train['label'].unique()))

In [None]:
digit_df = df_train.groupby('label').mean()
digit_df

In [None]:
X = df_train.drop('label', axis = 1)
y = df_train['label']

In [None]:
X.shape

In [None]:
y.shape

In [None]:
y.head()

In [None]:
from sklearn.preprocessing import scale

In [None]:
X.shape

In [None]:
X_scaled = scale(X)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled,y, test_size = 0.3, random_state = 100) 

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

### Model Building

In [None]:
from sklearn.svm import SVC

model_svm = SVC(kernel = 'linear')

model_svm.fit(X_train, y_train)

In [None]:
# Prdict

y_pred = model_svm.predict(X_test)

In [None]:
# Confusion matrix and accuracy 

from sklearn import metrics

# accuracy

print('Accuracy : ', 100*(metrics.accuracy_score(y_test, y_pred)), '\n')

# Confusion matrix

print(metrics.confusion_matrix(y_test, y_pred))

## Grid Search: Hyperparameter Tuning

In [None]:
# creating a KFold object with 5 splits
from sklearn.model_selection import KFold

folds = KFold(n_splits = 5, shuffle = True, random_state = 101)

folds

In [None]:
hyper_params = [{'gamma' : [1e-2, 1e-3, 1e-4],
                'C' : [1,10,100,1000]}]

model = SVC(kernel = 'rbf')

model

In [None]:
from sklearn.model_selection import GridSearchCV

model_cv = GridSearchCV(estimator = model,
                       param_grid = hyper_params,
                       cv = folds,
                       verbose = 1,
                       return_train_score = True)


model_cv

In [None]:
model_cv.fit(X_train, y_train)

In [None]:
# CV results

cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

In [None]:
# converting C to numeric type for plotting on x-axis
cv_results['param_C'] = cv_results['param_C'].astype('int')

# # plotting
plt.figure(figsize=(16,6))

# subplot 1/3
plt.subplot(131)
gamma_01 = cv_results[cv_results['param_gamma']==0.01]

plt.plot(gamma_01["param_C"], gamma_01["mean_test_score"])
plt.plot(gamma_01["param_C"], gamma_01["mean_train_score"])
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.title("Gamma=0.01")
plt.ylim([0.60, 1])
plt.legend(['test accuracy', 'train accuracy'], loc='upper left')
plt.xscale('log')

# subplot 2/3
plt.subplot(132)
gamma_001 = cv_results[cv_results['param_gamma']==0.001]

plt.plot(gamma_001["param_C"], gamma_001["mean_test_score"])
plt.plot(gamma_001["param_C"], gamma_001["mean_train_score"])
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.title("Gamma=0.001")
plt.ylim([0.60, 1])
plt.legend(['test accuracy', 'train accuracy'], loc='upper left')
plt.xscale('log')


# subplot 3/3
plt.subplot(133)
gamma_0001 = cv_results[cv_results['param_gamma']==0.0001]

plt.plot(gamma_0001["param_C"], gamma_0001["mean_test_score"])
plt.plot(gamma_0001["param_C"], gamma_0001["mean_train_score"])
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.title("Gamma=0.0001")
plt.ylim([0.60, 1])
plt.legend(['test accuracy', 'train accuracy'], loc='upper left')
plt.xscale('log')


In [None]:
# printing the optimal accuracy score and hyperparameters
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best test score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

In [None]:
# model with optimal hyperparameters

# model
model = SVC(C=####, gamma=#####, kernel="rbf")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", metrics.accuracy_score(y_test, y_pred), "\n")
print(metrics.confusion_matrix(y_test, y_pred), "\n")