In [32]:
import pandas as pd
import numpy as np 
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder,MinMaxScaler
from sklearn.metrics import classification_report,f1_score,accuracy_score
from sklearn.compose import make_column_transformer,make_column_selector

<h1 style = 'color:orange'>Support Vector Machine</h1>

In [2]:
kyph = pd.read_csv('../Cases/Kyphosis/Kyphosis.csv')
y = kyph['Kyphosis']
X = kyph.drop('Kyphosis',axis=1)

ohe = OneHotEncoder().set_output(transform='pandas')

column_tranformer = make_column_transformer((ohe,make_column_selector(dtype_include=object)),remainder='passthrough',verbose_feature_names_out=False)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

svm = SVC(kernel='linear',C=1) #svm model
svm.fit(X_train,y_train) 
y_pred = svm.predict(X_test)
print(classification_report(y_test,y_pred))
print(f1_score(y_test,y_pred,pos_label='absent'))

              precision    recall  f1-score   support

      absent       0.87      1.00      0.93        20
     present       1.00      0.40      0.57         5

    accuracy                           0.88        25
   macro avg       0.93      0.70      0.75        25
weighted avg       0.90      0.88      0.86        25

0.9302325581395349


In [3]:
#different values of c

sc = StandardScaler()

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

X_train_scaled = sc.fit_transform(X_train)
X_test_scaled =sc.transform(X_test)

c_value = np.linspace(0.0001,5,20)
scores = []
for c in c_value:
    svm = SVC(kernel='linear',C = c)
    svm.fit(X_train_scaled,y_train)
    y_pred = svm.predict(X_test_scaled)
    scores.append([c,f1_score(y_test,y_pred,pos_label='present')])
scores = pd.DataFrame(scores,columns=['C value','F1 score'])
scores.sort_values('F1 score',ascending = False)

Unnamed: 0,C value,F1 score
1,0.263253,0.571429
2,0.526405,0.571429
3,0.789558,0.571429
4,1.052711,0.571429
12,3.157932,0.571429
5,1.315863,0.571429
6,1.579016,0.571429
7,1.842168,0.571429
8,2.105321,0.571429
9,2.368474,0.571429


<h1 style = 'color:orange'>SVM on Wisconsin Dataset</h1>

In [10]:
wisconsin = pd.read_csv('../Cases/Wisconsin/BreastCancer.csv')


X = wisconsin.drop(['Class','Code'],axis = 1)
y = wisconsin['Class']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)


c_value = np.linspace(0.0001,5,20)
scores = []
for c in c_value:
    svm = SVC(kernel='linear',C = c)
    svm.fit(X_train,y_train)
    y_pred = svm.predict(X_test)
    scores.append([c,f1_score(y_test,y_pred,pos_label='Malignant')])
scores = pd.DataFrame(scores,columns=['C value','F1 score'])
scores.sort_values('F1 score',ascending = False)

Unnamed: 0,C value,F1 score
1,0.263253,0.957746
2,0.526405,0.957746
3,0.789558,0.957746
4,1.052711,0.957746
12,3.157932,0.957746
5,1.315863,0.957746
6,1.579016,0.957746
7,1.842168,0.957746
8,2.105321,0.957746
9,2.368474,0.957746


<h1 style = 'color:ORange'>Radial Basis Function</h1>

In [12]:
c_value = np.linspace(0.0001,5,20)
g_value = np.linspace(0.0001,5,20)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)
scores = []
for g in g_value:
    for c in c_value:
        svm = SVC(kernel='rbf',C = c,gamma=g)
        svm.fit(X_train,y_train)
        y_pred = svm.predict(X_test)
        scores.append([g,c,f1_score(y_test,y_pred,pos_label='Malignant')])
    
scores = pd.DataFrame(scores,columns=['Gamma','C value','F1 score'])
scores.sort_values('F1 score',ascending = False)

Unnamed: 0,Gamma,C value,F1 score
15,0.000100,3.947389,0.957143
13,0.000100,3.421084,0.957143
12,0.000100,3.157932,0.957143
10,0.000100,2.631626,0.957143
11,0.000100,2.894779,0.957143
...,...,...,...
281,3.684237,0.263253,0.000000
320,4.210542,0.000100,0.000000
321,4.210542,0.263253,0.000000
341,4.473695,0.263253,0.000000


<h1 style = 'color:orange'>Polynomial Kernel</h1>

In [13]:
wisconsin = pd.read_csv('../Cases/Wisconsin/BreastCancer.csv')

X = wisconsin.drop(['Class','Code'],axis = 1)
y = wisconsin['Class']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

svm = SVC(kernel='poly',C = c,degree=4)
svm.fit(X_train,y_train)

y_pred = svm.predict(X_test)

print(classification_report(y_test,y_pred))


              precision    recall  f1-score   support

      Benign       0.93      0.99      0.96       138
   Malignant       0.98      0.86      0.92        72

    accuracy                           0.95       210
   macro avg       0.96      0.93      0.94       210
weighted avg       0.95      0.95      0.95       210



<h1 style = 'color:orange'>Multiclass Svm</h1>
<h1>OVO</h1>

In [27]:
glass = pd.read_csv('../Cases/Glass_Identification/Glass.csv')

y = glass['Type']
X  = glass.drop('Type',axis=1)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

sc = StandardScaler()
X_train_scaled =sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

svm = SVC(kernel = 'linear',C=5,decision_function_shape='ovo')

svm.fit(X_train_scaled,y_train)
y_pred = svm.predict(X_test_scaled)
print(classification_report(y_test,y_pred))

                                      precision    recall  f1-score   support

    building_windows_float_processed       0.60      0.71      0.65        21
building_windows_non_float_processed       0.56      0.61      0.58        23
                          containers       0.50      0.50      0.50         4
                           headlamps       1.00      0.78      0.88         9
                           tableware       0.67      0.67      0.67         3
     vehicle_windows_float_processed       0.00      0.00      0.00         5

                            accuracy                           0.62        65
                           macro avg       0.55      0.54      0.55        65
                        weighted avg       0.59      0.62      0.60        65



<h1 style = 'color:orange'>OYR</h1>

In [36]:
glass = pd.read_csv('../Cases/Glass_Identification/Glass.csv')

y = glass['Type']
X  = glass.drop('Type',axis=1)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

sc = StandardScaler()
X_train_scaled =sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

svm = SVC(kernel = 'linear',C=5,decision_function_shape='ovr')

svm.fit(X_train_scaled,y_train)
y_pred = svm.predict(X_test_scaled)
print(classification_report(y_test,y_pred))

                                      precision    recall  f1-score   support

    building_windows_float_processed       0.60      0.71      0.65        21
building_windows_non_float_processed       0.56      0.61      0.58        23
                          containers       0.50      0.50      0.50         4
                           headlamps       1.00      0.78      0.88         9
                           tableware       0.67      0.67      0.67         3
     vehicle_windows_float_processed       0.00      0.00      0.00         5

                            accuracy                           0.62        65
                           macro avg       0.55      0.54      0.55        65
                        weighted avg       0.59      0.62      0.60        65



In [37]:
#without scaling
glass = pd.read_csv('../Cases/Glass_Identification/Glass.csv')

y = glass['Type']
X  = glass.drop('Type',axis=1)

c_values = np.linspace(0.001,5,20)
scores = []

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)



for c in c_values: 
    svm = SVC(kernel = 'linear',C=c,decision_function_shape='ovr')
    svm.fit(X_train,y_train)
    y_pred = svm.predict(X_test)
    scores.append([c,accuracy_score(y_pred,y_test)])
scores = pd.DataFrame(scores,columns=['C values','Score'])
scores.sort_values('Score',ascending=False)

Unnamed: 0,C values,Score
5,1.316526,0.661538
7,1.842737,0.646154
4,1.053421,0.646154
6,1.579632,0.646154
8,2.105842,0.646154
18,4.736895,0.646154
19,5.0,0.646154
13,3.421368,0.630769
11,2.895158,0.630769
10,2.632053,0.630769


In [30]:
#with standard scaling
glass = pd.read_csv('../Cases/Glass_Identification/Glass.csv')

y = glass['Type']
X  = glass.drop('Type',axis=1)

c_values = np.linspace(0.001,5,20)
scores = []

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

sc = StandardScaler()
X_train_scaled =sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

for c in c_values: 
    svm = SVC(kernel = 'linear',C=c,decision_function_shape='ovr')
    svm.fit(X_train_scaled,y_train)
    y_pred = svm.predict(X_test_scaled)
    scores.append([c,accuracy_score(y_pred,y_test)])
scores = pd.DataFrame(scores,columns=['C values','Score'])
scores.sort_values('Score',ascending=False)

Unnamed: 0,C values,Score
1,0.264105,0.615385
19,5.0,0.615385
18,4.736895,0.615385
3,0.790316,0.6
15,3.947579,0.6
14,3.684474,0.6
7,1.842737,0.6
6,1.579632,0.6
13,3.421368,0.6
12,3.158263,0.6


In [34]:
#with minmax scaling
glass = pd.read_csv('../Cases/Glass_Identification/Glass.csv')

y = glass['Type']
X  = glass.drop('Type',axis=1)

c_values = np.linspace(0.001,5,20)
scores = []

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

sc = MinMaxScaler()
X_train_scaled =sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

for c in c_values: 
    svm = SVC(kernel = 'linear',C=c,decision_function_shape='ovr')
    svm.fit(X_train_scaled,y_train)
    y_pred = svm.predict(X_test_scaled)
    scores.append([c,accuracy_score(y_pred,y_test)])
scores = pd.DataFrame(scores,columns=['C values','Score'])
scores.sort_values('Score',ascending=False)

Unnamed: 0,C values,Score
13,3.421368,0.584615
10,2.632053,0.569231
6,1.579632,0.569231
12,3.158263,0.569231
16,4.210684,0.569231
14,3.684474,0.569231
17,4.473789,0.553846
15,3.947579,0.553846
18,4.736895,0.553846
19,5.0,0.553846


In [None]:
#with radial basic function