In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, log_loss
import matplotlib.pyplot as plt

In [2]:
sonar = pd.read_csv(r"/home/sarthakredasani/Documents/CDAC_ML/Cases/Cases/Sonar/Sonar.csv")
sonar.head(5)


Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V52,V53,V54,V55,V56,V57,V58,V59,V60,Class
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [3]:
# doing label Encoding

In [5]:
le = LabelEncoder()
sonar['Class'] = le.fit_transform(sonar['Class'])
X, y = sonar.drop('Class', axis=1), sonar['Class']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=25, stratify=y)

In [6]:
svm = SVC(kernel='poly')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.7936507936507936


In [7]:
#  using some hyper parameter of SVM

In [8]:
Ds = [2,3,4]
Cs = np.linspace(0.01, 5, 20)
scores = []
for d in Ds:
    for c in Cs:
        svm = SVC(kernel='poly', degree=d, C=c)
        svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)
        scores.append([c,d,(accuracy_score(y_test, y_pred))])
df_scores = pd.DataFrame(scores, columns=['C', 'Degree','score'])
df_scores.sort_values('score', ascending=False)

Unnamed: 0,C,Degree,score
28,2.111053,3,0.809524
41,0.272632,4,0.809524
42,0.535263,4,0.793651
24,1.060526,3,0.793651
23,0.797895,3,0.793651
22,0.535263,3,0.793651
21,0.272632,3,0.793651
27,1.848421,3,0.793651
29,2.373684,3,0.793651
26,1.585789,3,0.793651


In [9]:
# Evaluating with ROC AUC/Log Loss
# SVC(probability=True.....)need to be set

In [10]:
svm = SVC(kernel='linear', probability=True, random_state=25)
svm.fit(X_train, y_train)
y_pred_prob = svm.predict_proba(X_test)
print(roc_auc_score(y_test, y_pred_prob[:,1]))

0.8052738336713996


In [12]:
# ROC AUC

In [13]:
Ds = [2,3,4]
Cs = np.linspace(0.01, 5, 20)
scores = []
for d in Ds:
    for c in Cs:
        svm = SVC(kernel='poly', degree=d, C=c, probability=True, random_state=25)
        svm.fit(X_train, y_train)
        y_pred_prob = svm.predict_proba(X_test)
        scores.append([c,d,(roc_auc_score(y_test, y_pred_prob[:,1]))])
df_scores = pd.DataFrame(scores, columns=['C', 'Degree','score'])
df_scores.sort_values('score', ascending=False)

Unnamed: 0,C,Degree,score
42,0.535263,4,0.882353
43,0.797895,4,0.880325
44,1.060526,4,0.87931
45,1.323158,4,0.877282
46,1.585789,4,0.871197
41,0.272632,4,0.870183
25,1.323158,3,0.868154
26,1.585789,3,0.86714
24,1.060526,3,0.866126
29,2.373684,3,0.865619


In [14]:
# Log Loss

In [15]:
Ds = [2,3,4]
Cs = np.linspace(0.01, 5, 20)
scores = []
for d in Ds:
    for c in Cs:
        svm = SVC(kernel='poly', degree=d, C=c, probability=True, random_state=25)
        svm.fit(X_train, y_train)
        y_pred_prob = svm.predict_proba(X_test)
        scores.append([c,d,(log_loss(y_test, y_pred_prob))])
df_scores = pd.DataFrame(scores, columns=['C', 'Degree','score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,C,Degree,score
43,0.797895,4,0.42741
42,0.535263,4,0.431545
44,1.060526,4,0.433653
45,1.323158,4,0.437697
46,1.585789,4,0.442098
47,1.848421,4,0.451597
41,0.272632,4,0.45585
28,2.111053,3,0.459368
35,3.949474,3,0.459767
34,3.686842,3,0.459816
