In [1]:
import pandas as pd
import numpy as np 
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import classification_report,f1_score 
from sklearn.compose import make_column_transformer,make_column_selector

In [6]:
kyph = pd.read_csv('../Cases/Kyphosis/Kyphosis.csv')

X = kyph.drop('Kyphosis', axis=1)
y = kyph['Kyphosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=25, test_size=0.3, stratify=y)

svc = SVC(kernel='linear', C=1)
svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)

print(f1_score(y_test, y_pred, pos_label='present'))
print(classification_report(y_test, y_pred))

0.5714285714285714
              precision    recall  f1-score   support

      absent       0.87      1.00      0.93        20
     present       1.00      0.40      0.57         5

    accuracy                           0.88        25
   macro avg       0.93      0.70      0.75        25
weighted avg       0.90      0.88      0.86        25



In [12]:
# different values of C

scores = []
for c in np.linspace(0.001, 5, 20):
    svc = SVC(kernel='linear', C=c)
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    scores.append([c, f1_score(y_test, y_pred, pos_label='present')])

scores = pd.DataFrame(scores, columns=['C', 'f1 score'])
scores = scores.sort_values('f1 score',ascending=False)
scores

Unnamed: 0,C,f1 score
1,0.264105,0.571429
2,0.527211,0.571429
3,0.790316,0.571429
4,1.053421,0.571429
12,3.158263,0.571429
5,1.316526,0.571429
6,1.579632,0.571429
7,1.842737,0.571429
8,2.105842,0.571429
9,2.368947,0.571429


In [13]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

scores = []
for c in np.linspace(0.001, 5, 20):
    svc = SVC(kernel='linear', C=c)
    svc.fit(X_train_scaled, y_train)
    y_pred = svc.predict(X_test_scaled)
    scores.append([c, f1_score(y_test, y_pred, pos_label='present')])

scores = pd.DataFrame(scores, columns=['C', 'f1 score'])
scores = scores.sort_values('f1 score',ascending=False)
scores

Unnamed: 0,C,f1 score
3,0.790316,0.571429
2,0.527211,0.571429
5,1.316526,0.571429
4,1.053421,0.571429
12,3.158263,0.571429
13,3.421368,0.571429
6,1.579632,0.571429
7,1.842737,0.571429
8,2.105842,0.571429
9,2.368947,0.571429
