# SVM Classifier

#### Best Params
Split Size - 0.2  
C - 10  
Kernel - rbf  
Gamma - 0.1  
Random State - 95  

Accuracy - 88.93%  

In [1]:
import pandas as pd

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('datasets/magic04.data', 
                   names=['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist', 'class'])

data.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [3]:
data['class'] = (data['class'] == 'g').astype(int)
data['class'].unique()

array([1, 0])

In [4]:
X = data.drop('class', axis=1)
y = data['class']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = X_scaled
X.shape

(19020, 10)

##### Finding best hyperparamters

In [5]:
def train_eval(X, y, c, kernel, gamma, iternum):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=iternum)
    
    sv_model = SVC(C=c, kernel=kernel, gamma=gamma)
    sv_model.fit(X_train, y_train)
    
    y_pred = sv_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy
    

In [6]:
best_acc = -1
best_model = None

for C in [0.1, 1, 10]:
    for kerneltype in ['linear', 'rbf']:
        for gammaval in [0.1, 1, 'scale']:
            for state in range(0, 1):
                acc = train_eval(X, y, c=C, kernel=kerneltype, gamma=gammaval, iternum=state)
                
                model = f'Accuracy {round(acc, 5)} C {C} Kernel {kerneltype} Gamma {gammaval} RandomState {state}'
                
                if acc > best_acc:
                    print(model)
                    best_acc = acc
                    best_model = model
                    print('\n', best_model, '\n')
                    
                else:
                    print(model)

Accuracy 0.7918 C 0.1 Kernel linear Gamma 0.1 RandomState 0

 Accuracy 0.7918 C 0.1 Kernel linear Gamma 0.1 RandomState 0 

Accuracy 0.7918 C 0.1 Kernel linear Gamma 1 RandomState 0
Accuracy 0.7918 C 0.1 Kernel linear Gamma scale RandomState 0
Accuracy 0.85331 C 0.1 Kernel rbf Gamma 0.1 RandomState 0

 Accuracy 0.85331 C 0.1 Kernel rbf Gamma 0.1 RandomState 0 

Accuracy 0.84306 C 0.1 Kernel rbf Gamma 1 RandomState 0
Accuracy 0.85331 C 0.1 Kernel rbf Gamma scale RandomState 0
Accuracy 0.79259 C 1 Kernel linear Gamma 0.1 RandomState 0
Accuracy 0.79259 C 1 Kernel linear Gamma 1 RandomState 0
Accuracy 0.79259 C 1 Kernel linear Gamma scale RandomState 0
Accuracy 0.87014 C 1 Kernel rbf Gamma 0.1 RandomState 0

 Accuracy 0.87014 C 1 Kernel rbf Gamma 0.1 RandomState 0 

Accuracy 0.86278 C 1 Kernel rbf Gamma 1 RandomState 0
Accuracy 0.87014 C 1 Kernel rbf Gamma scale RandomState 0
Accuracy 0.79259 C 10 Kernel linear Gamma 0.1 RandomState 0
Accuracy 0.79259 C 10 Kernel linear Gamma 1 RandomState

In [7]:
best_model

'Accuracy 0.87382 C 10 Kernel rbf Gamma 0.1 RandomState 0'

In [10]:
best_acc = -1
best_model = None
for i in range(1, 100):
    acc = train_eval(X, y, c=10, kernel='rbf', gamma=0.1, iternum=i)          
    model = f'Accuracy {round(acc, 5)} C 10 Kernel rbf Gamma 0.1 RandomState {i}'

    if acc > best_acc:
        best_acc = acc
        best_model = model
        print(best_model)

    if i%10 == 0:
        print(f'-------------------------------------------------------------------------------------> RandomState {i}')

Accuracy 0.86987 C 10 Kernel rbf Gamma 0.1 RandomState 1
Accuracy 0.87198 C 10 Kernel rbf Gamma 0.1 RandomState 2
Accuracy 0.8725 C 10 Kernel rbf Gamma 0.1 RandomState 4
Accuracy 0.87802 C 10 Kernel rbf Gamma 0.1 RandomState 6
Accuracy 0.8796 C 10 Kernel rbf Gamma 0.1 RandomState 9
-------------------------------------------------------------------------------------> RandomState 10
-------------------------------------------------------------------------------------> RandomState 20
-------------------------------------------------------------------------------------> RandomState 30
Accuracy 0.88433 C 10 Kernel rbf Gamma 0.1 RandomState 37
-------------------------------------------------------------------------------------> RandomState 40
Accuracy 0.88696 C 10 Kernel rbf Gamma 0.1 RandomState 46
-------------------------------------------------------------------------------------> RandomState 50
-------------------------------------------------------------------------------------> Rand

In [11]:
best_model

'Accuracy 0.88933 C 10 Kernel rbf Gamma 0.1 RandomState 95'