In [150]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, f1_score

In [125]:
df = pd.read_csv('Kyphosis.csv')

In [126]:
df

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15
...,...,...,...,...
76,present,157,3,13
77,absent,26,7,13
78,absent,120,2,13
79,present,42,7,6


In [127]:
X = df.drop('Kyphosis', axis = 1)
y = df['Kyphosis']

In [128]:
from tqdm import tqdm
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24, stratify=y)

In [129]:
C = [0.001,0.5,1,2,3]
scores = []
for i in tqdm(range(len(C))):
    svm = SVC(kernel='linear', C=C[i])
    svm.fit(X_train,y_train)
    y_pred = svm.predict(X_test)
    scores.append(accuracy_score(y_test,y_pred))


i_max = np.argmax(scores)
print('Max score is ', scores[i_max])
print('K value is ', C[i_max])
print(scores)   
    




100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 69.64it/s]

Max score is  0.7647058823529411
K value is  0.001
[0.7647058823529411, 0.7647058823529411, 0.7647058823529411, 0.7647058823529411, 0.7647058823529411]





In [168]:
# USing MinMaxScaler and SVM 

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)
scaler =StandardScaler()


In [170]:

C = [0.001,0.5,1,2,3]
scores1 = []
for i in tqdm(range(len(C))):
    svm = SVC(kernel='linear', C=C[i])
    pipe = Pipeline([('MInMaxScaler', scaler), ('SVM',svm)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    scores1.append(accuracy_score(y_test,y_pred))


i_max = np.argmax(scores1)
print('Max score is ', scores1[i_max])
print('K value is ', C[i_max])
print(scores)   
    





100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 227.60it/s]

Max score is  0.8
K value is  0.001
[]





# Radial Basis Function  

In [196]:
Cs = [0.001,0.5,1,2,3]
Gs = [0.001,0.5,1,2,3]

scores = []

scaler = MinMaxScaler()

for i in tqdm(range(len(Cs))):
    for j in Gs:
        svm = SVC(kernel='rbf', C = Cs[i] , gamma= j)
        pipe = Pipeline([('MInMaxScaler', scaler), ('SVM',svm)])
        pipe.fit(X_train,y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i],j,f1_score(y_test,y_pred, pos_label='present')])

df_data = pd.DataFrame(data = scores, columns = ['Cs', 'gamma', 'f1'])
df_data.sort_values('f1' , ascending = False).iloc[0]



100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 45.71it/s]


Cs       3.000000
gamma    3.000000
f1       0.615385
Name: 24, dtype: float64

In [198]:
df_data

Unnamed: 0,Cs,gamma,f1
0,0.001,0.001,0.0
1,0.001,0.5,0.0
2,0.001,1.0,0.0
3,0.001,2.0,0.0
4,0.001,3.0,0.0
5,0.5,0.001,0.0
6,0.5,0.5,0.0
7,0.5,1.0,0.0
8,0.5,2.0,0.0
9,0.5,3.0,0.0


In [200]:
y.value_counts()

Kyphosis
absent     64
present    17
Name: count, dtype: int64