In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, log_loss
import matplotlib.pyplot as plt
import os

In [2]:
glass = pd.read_csv(r"/home/sarthakredasani/Documents/CDAC_ML/Cases/Cases/Glass Identification/Glass.csv")
le = LabelEncoder()
glass['Type'] = le.fit_transform(glass['Type'])

In [3]:
X, y = glass.drop('Type', axis=1), glass['Type']
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25, stratify=y)

In [4]:
svm = SVC(kernel='linear', decision_function_shape='ovo', probability=True, random_state=25)
svm.fit(X_train, y_train)
y_pred_prob = svm.predict_proba(X_test)
print(log_loss(y_test, y_pred_prob))

0.9595681477553203


In [5]:
svm = SVC(kernel='linear', decision_function_shape='ovr', probability=True, random_state=25)
svm.fit(X_train, y_train)
y_pred_prob = svm.predict_proba(X_test)
print(log_loss(y_test, y_pred_prob))

0.9595681477553203


In [6]:
Cs = np.linspace(0.01, 5, 20)
dfn = ['ovo','ovr']
scores = []
for c in Cs:
    for f in dfn:
        svm = SVC(kernel='linear', C=c, decision_function_shape=f, 
                  probability=True, random_state=25)
        svm.fit(X_train, y_train)
        y_pred_prob = svm.predict_proba(X_test)
        scores.append([c,f,log_loss(y_test, y_pred_prob)])
df_scores = pd.DataFrame(scores, columns=['C','Decision func','score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,C,Decision func,score
30,3.949474,ovo,0.950273
31,3.949474,ovr,0.950273
33,4.212105,ovr,0.950342
32,4.212105,ovo,0.950342
35,4.474737,ovr,0.950974
34,4.474737,ovo,0.950974
29,3.686842,ovr,0.951301
28,3.686842,ovo,0.951301
37,4.737368,ovr,0.953127
36,4.737368,ovo,0.953127


In [7]:
#  Using Scaling

In [8]:
scaler = StandardScaler()
X_trn_scl = scaler.fit_transform(X_train)
X_tst_scl = scaler.transform(X_test)

In [9]:
Cs = np.linspace(0.01, 5, 20)
dfn = ['ovo','ovr']
scores = []
for c in Cs:
    for f in dfn:
        svm = SVC(kernel='linear', C=c, decision_function_shape=f, 
                  probability=True, random_state=25)
        svm.fit(X_trn_scl, y_train)
        y_pred_prob = svm.predict_proba(X_tst_scl)
        scores.append([c,f,log_loss(y_test, y_pred_prob)])
df_scores = pd.DataFrame(scores, columns=['C','Decision func','score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,C,Decision func,score
8,1.060526,ovo,0.983908
9,1.060526,ovr,0.983908
10,1.323158,ovo,0.984643
11,1.323158,ovr,0.984643
13,1.585789,ovr,0.988364
12,1.585789,ovo,0.988364
27,3.424211,ovr,0.992184
26,3.424211,ovo,0.992184
14,1.848421,ovo,0.992205
15,1.848421,ovr,0.992205


In [10]:
Cs = np.linspace(0.01, 5, 20)
dfn = ['ovo','ovr']
Gs = np.linspace(0.01, 5, 20)
scores = []
for c in Cs:
    for f in dfn:
        for g in Gs:
            svm = SVC(kernel='rbf', C=c, decision_function_shape=f, gamma=g,
                      probability=True, random_state=25)
            svm.fit(X_trn_scl, y_train)
            y_pred_prob = svm.predict_proba(X_tst_scl)
            scores.append([c,f,g,log_loss(y_test, y_pred_prob)])
df_scores = pd.DataFrame(scores, columns=['C','Decision func','Gamma','score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,C,Decision func,Gamma,score
781,5.000000,ovr,0.272632,0.793241
761,5.000000,ovo,0.272632,0.793241
741,4.737368,ovr,0.272632,0.793247
721,4.737368,ovo,0.272632,0.793247
701,4.474737,ovr,0.272632,0.793626
...,...,...,...,...
38,0.010000,ovr,4.737368,1.378129
59,0.272632,ovo,5.000000,1.378613
79,0.272632,ovr,5.000000,1.378613
39,0.010000,ovr,5.000000,1.386614
