# SVM Classifier

In [1]:
import pandas as pd

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('datasets/magic04.data', 
                   names=['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist', 'class'])

data.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [3]:
data['class'] = (data['class'] == 'g').astype(int)
data['class'].unique()

array([1, 0])

In [4]:
X = data.drop('class', axis=1)
y = data['class']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = X_scaled
X.shape

(19020, 10)

##### Finding best hyperparamters

In [5]:
def train_eval(X, y, c, kernel, gamma, iternum):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=iternum)
    
    sv_model = SVC(C=c, kernel=kernel, gamma=gamma)
    sv_model.fit(X_train, y_train)
    
    y_pred = sv_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy
    

In [None]:
best_acc = -1
best_model = None

for C in [0.1, 1, 10]:
    for kerneltype in ['linear', 'rbf']:
        for gammaval in [0.1, 1, 'scale']:
            for state in range(0, 100):
                acc = train_eval(X, y, c=C, kernel=kerneltype, gamma=gammaval, iternum=state)
                
                model = f'Accuracy {round(acc, 5)} C {C} Kernel {kerneltype} Gamma {gammaval} RandomState {state}'
                
                if acc > best_acc:
                    print(model)
                    best_acc = acc
                    best_model = model
                    
                if state%10 == 0:
                    print(f'-----------------------------------------------------------------------> RandomState {state}')

Accuracy 0.7918 C 0.1 Kernel linear Gamma 0.1 RandomState 0
-----------------------------------------------------------------------> RandomState 0
Accuracy 0.7939 C 0.1 Kernel linear Gamma 0.1 RandomState 3
Accuracy 0.79784 C 0.1 Kernel linear Gamma 0.1 RandomState 6
-----------------------------------------------------------------------> RandomState 10
Accuracy 0.79995 C 0.1 Kernel linear Gamma 0.1 RandomState 19
-----------------------------------------------------------------------> RandomState 20
