In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sbn
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv(r'D:\Datasets\glass.csv')
data.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [3]:
data.shape

(214, 10)

In [4]:
data.size

2140

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RI      214 non-null    float64
 1   Na      214 non-null    float64
 2   Mg      214 non-null    float64
 3   Al      214 non-null    float64
 4   Si      214 non-null    float64
 5   K       214 non-null    float64
 6   Ca      214 non-null    float64
 7   Ba      214 non-null    float64
 8   Fe      214 non-null    float64
 9   Type    214 non-null    int64  
dtypes: float64(9), int64(1)
memory usage: 16.8 KB


In [6]:
data['Type'].value_counts()

2    76
1    70
7    29
3    17
5    13
6     9
Name: Type, dtype: int64

In [7]:
data['Type'] = np.where(data['Type']<3,0,1)
data['Type'].value_counts()

0    146
1     68
Name: Type, dtype: int64

In [8]:
x = data.drop('Type',axis=1)
y = data['Type']

In [9]:
from imblearn.over_sampling import SMOTE
s = SMOTE()
x, y = s.fit_resample(x,y)
print(x.shape, y.shape)

(292, 9) (292,)


In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=101)

In [11]:
from sklearn.svm import SVC
svcl = SVC()
svcl.fit(x_train,y_train)

SVC()

In [12]:
y_pred = svcl.predict(x_test)

In [13]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred))
print(metrics.confusion_matrix(y_test,y_pred))
print(metrics.classification_report(y_test,y_pred))

0.4657534246575342
[[34  0]
 [39  0]]
              precision    recall  f1-score   support

           0       0.47      1.00      0.64        34
           1       0.00      0.00      0.00        39

    accuracy                           0.47        73
   macro avg       0.23      0.50      0.32        73
weighted avg       0.22      0.47      0.30        73



In [14]:
svcl.score(x_train,y_train)

0.5114155251141552

# Manual HyperParameter Tuning

## 1 - C (default=1)

In [15]:
svc1 = SVC(C=3)
svc1.fit(x_train,y_train)

SVC(C=3)

In [16]:
y_pred1 = svc1.predict(x_test)

In [17]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred1))
print(metrics.confusion_matrix(y_test,y_pred1))
print(metrics.classification_report(y_test,y_pred1))

0.726027397260274
[[32  2]
 [18 21]]
              precision    recall  f1-score   support

           0       0.64      0.94      0.76        34
           1       0.91      0.54      0.68        39

    accuracy                           0.73        73
   macro avg       0.78      0.74      0.72        73
weighted avg       0.79      0.73      0.72        73



In [18]:
svc1.score(x_train,y_train)

0.776255707762557

## 2 - kernel (default='rbf')

In [19]:
svc2 = SVC(kernel='poly')
svc2.fit(x_train,y_train)

SVC(kernel='poly')

In [20]:
y_pred2 = svc2.predict(x_test)

In [21]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred2))
print(metrics.confusion_matrix(y_test,y_pred2))
print(metrics.classification_report(y_test,y_pred2))

0.6575342465753424
[[33  1]
 [24 15]]
              precision    recall  f1-score   support

           0       0.58      0.97      0.73        34
           1       0.94      0.38      0.55        39

    accuracy                           0.66        73
   macro avg       0.76      0.68      0.64        73
weighted avg       0.77      0.66      0.63        73



In [22]:
svc2.score(x_train,y_train)

0.7214611872146118

## 3 - degree (Only works if kernel is 'poly') (default = 3)

In [23]:
svc3 = SVC(kernel='poly',degree=1)
svc3.fit(x_train,y_train)

SVC(degree=1, kernel='poly')

In [24]:
y_pred3 = svc3.predict(x_test)

In [25]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred3))
print(metrics.confusion_matrix(y_test,y_pred3))
print(metrics.classification_report(y_test,y_pred3))

0.4657534246575342
[[34  0]
 [39  0]]
              precision    recall  f1-score   support

           0       0.47      1.00      0.64        34
           1       0.00      0.00      0.00        39

    accuracy                           0.47        73
   macro avg       0.23      0.50      0.32        73
weighted avg       0.22      0.47      0.30        73



In [26]:
svc3.score(x_train,y_train)

0.5114155251141552

## 4 - coef0 (default = 0.0) (works only when kernel='poly' or 'sigmoid')

In [27]:
svc4 = SVC(coef0=0.05,kernel="sigmoid")
svc4.fit(x_train,y_train)

SVC(coef0=0.05, kernel='sigmoid')

In [28]:
y_pred4 = svc4.predict(x_test)

In [29]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred4))
print(metrics.confusion_matrix(y_test,y_pred4))
print(metrics.classification_report(y_test,y_pred4))

0.4657534246575342
[[34  0]
 [39  0]]
              precision    recall  f1-score   support

           0       0.47      1.00      0.64        34
           1       0.00      0.00      0.00        39

    accuracy                           0.47        73
   macro avg       0.23      0.50      0.32        73
weighted avg       0.22      0.47      0.30        73



In [30]:
svc4.score(x_train,y_train)

0.5114155251141552

## 5 - gamma (default='scale')

In [31]:
svc5 = SVC(gamma='auto')
svc5.fit(x_train,y_train)

SVC(gamma='auto')

In [32]:
y_pred5 = svc5.predict(x_test)

In [33]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred5))
print(metrics.confusion_matrix(y_test,y_pred5))
print(metrics.classification_report(y_test,y_pred5))

0.8493150684931506
[[32  2]
 [ 9 30]]
              precision    recall  f1-score   support

           0       0.78      0.94      0.85        34
           1       0.94      0.77      0.85        39

    accuracy                           0.85        73
   macro avg       0.86      0.86      0.85        73
weighted avg       0.86      0.85      0.85        73



In [34]:
svc5.score(x_train,y_train)

0.8584474885844748

## 6 - tol (default=1e-3)

In [35]:
svc6 = SVC(tol=3.0)
svc6.fit(x_train,y_train)

SVC(tol=3.0)

In [36]:
y_pred6 = svc6.predict(x_test)

In [37]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred6))
print(metrics.confusion_matrix(y_test,y_pred6))
print(metrics.classification_report(y_test,y_pred6))

0.5342465753424658
[[ 0 34]
 [ 0 39]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.53      1.00      0.70        39

    accuracy                           0.53        73
   macro avg       0.27      0.50      0.35        73
weighted avg       0.29      0.53      0.37        73



In [38]:
svc6.score(x_train,y_train)

0.4885844748858447

## 7 - decision_function_shape (default='ovr')

In [39]:
svc7 = SVC(decision_function_shape='ovo')
svc7.fit(x_train,y_train)

SVC(decision_function_shape='ovo')

In [40]:
y_pred7 = svc7.predict(x_test)

In [41]:
from sklearn import metrics
print(metrics.accuracy_score(y_test,y_pred7))
print(metrics.confusion_matrix(y_test,y_pred7))
print(metrics.classification_report(y_test,y_pred7))

0.4657534246575342
[[34  0]
 [39  0]]
              precision    recall  f1-score   support

           0       0.47      1.00      0.64        34
           1       0.00      0.00      0.00        39

    accuracy                           0.47        73
   macro avg       0.23      0.50      0.32        73
weighted avg       0.22      0.47      0.30        73



In [42]:
svc7.score(x_train,y_train)

0.5114155251141552

# HyperParameter Tuning using RandomizedSearchCV

In [43]:
from sklearn.model_selection import RandomizedSearchCV

In [44]:
params = {
    'C'      : [int(i) for i in range(0,10)],
    'kernel' : ['rbf','sigmoid','poly'],
    'degree' : [int(i) for i in range(1,10)],
    'coef0'  : [float(i) for i in np.linspace(0,5,100)],
    'gamma'  : ['scale','auto'],
    'tol'    : [0.001,0.01,0.1,1,2,3,4,5,6],
    'decision_function_shape' : ['ovo','ovr']
}

In [45]:
#params

In [None]:
svcclf = SVC()
random_cv_svcclf = RandomizedSearchCV(estimator = svcclf , param_distributions=params , n_iter = 100 , cv = 5 , n_jobs = 7,verbose=True)
random_cv_svcclf.fit(x_train,y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=7)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=7)]: Done  36 tasks      | elapsed:    1.8s


In [None]:
best_params = random_cv_svcclf.best_estimator_
best_params

In [None]:
best_p = random_cv_svcclf.best_params_
best_p

In [None]:
svc_final = SVC(C=best_p['C'], coef0=best_p['coef0'], decision_function_shape=best_p['decision_function_shape'], degree=best_p['degree'],
    gamma=best_p['gamma'], tol=best_p['tol'])
svc_final.fit(x_train,y_train)

In [None]:
y_pr = svc_final.predict(x_test)


In [None]:
print(metrics.accuracy_score(y_test,y_pr))
print(metrics.confusion_matrix(y_test,y_pr))
print(metrics.classification_report(y_test,y_pr))

In [None]:
svc_final.score(x_train,y_train)