In [28]:
import numpy as np

import pandas as pd 

import sklearn.metrics as metrics 

from sklearn.svm import SVC 

In [29]:
# Load in Feature dataframes

df_n = pd.read_csv('Non_Segmented_Features.csv')
df_s = pd.read_csv('Segmented_Features.csv')

#Split Train, Validate, Test 

df_nonseg_train = df_n[df_n['split']=='train']
df_nonseg_valid = df_n[df_n['split']=='valid']
df_nonseg_test = df_n[df_n['split']=='test']

df_seg_train = df_s[df_s['split']=='train']
df_seg_valid = df_s[df_s['split']=='valid']
df_seg_test = df_s[df_s['split']=='test']

In [30]:
#Convert to Numpy Arrays 

np_nonseg_train = df_nonseg_train.to_numpy()
np_nonseg_valid = df_nonseg_valid.to_numpy()
np_nonseg_test = df_nonseg_test.to_numpy()

np_seg_train = df_seg_train.to_numpy() 
np_seg_valid = df_seg_valid.to_numpy()
np_seg_test = df_seg_test.to_numpy()

In [39]:
## Non Segmented Train, Validate Data 

x_train_ns = np_nonseg_train[:,6:]
x_val_ns = np_nonseg_valid[:,6:]

y_train_ns = np_nonseg_train[:,2]
y_val_ns = np_nonseg_valid[:,2]

In [40]:
## Segmented Train, Validate Data 

x_train_s = np_seg_train[:,6:]
x_val_s = np_seg_valid[:,6:]

y_train_s = np_seg_train[:,2]
y_val_s = np_seg_valid[:,2]

## Okay, Ready to use 

In [45]:
def Train_SVM(X_train, Y_train, X_val, Y_val, C, kernel, verbose = False):
    
    """C value is a penalty value for slack variable 
    Large C makes slack 0 
    low C allows more slack in classifier boundary
    
    Kernel options: 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' 
    
    """
    
    Y_train = Y_train.astype('int')
    Y_val = Y_val.astype('int')
    
    X_train = X_train.astype('float32')
    X_val = X_val.astype('float32')

    if kernel == 'linear': 
        
        clf = SVC(C = 1, kernel = kernel) # 
        clf.fit(X, Y) # Y.ravel())
        w = clf.coef_[0]
        b = clf.intercept_

    else: 

        # fit SVM 
        clf = SVC(C= 10, kernel = 'rbf') #'radial basis function kernel'
        clf_probs = SVC(C= 10, kernel = 'rbf', probability = True)
        clf.fit(X_train,Y_train)
        clf_probs.fit(X_train,Y_train)
        

        Y_val_pred = clf.predict(X_val)
        Y_val_pred_p = clf_probs.predict_proba(X_val)
        
        #metrics
        f1_micro = metrics.f1_score(Y_val, Y_val_pred, average = 'micro') #
        f1_macro = metrics.f1_score(Y_val, Y_val_pred,  average = 'macro') #
        f1_weighted = metrics.f1_score(Y_val, Y_val_pred,  average = 'weighted') #
        
        acc = metrics.accuracy_score(Y_val, Y_val_pred)
        AUC = metrics.roc_auc_score(Y_val, Y_val_pred_p, multi_class = 'ovo')
        
        
        print('f1 scores:', f1_micro, f1_macro, f1_weighted)
        print('accuracy: ', acc)
        print('AUC: ', AUC)
        
        
        if verbose: 
            
            plt.scatter(X[:,0], X[:,1], s = 30, c= Y,
                       cmap = plt.cm.Paired)
        
            
        
            #svmContour(clf)

            plt.show()

In [47]:
Train_SVM(x_train_ns, y_train_ns, x_val_ns, y_val_ns, 10, 'rbf')

f1 scores: 0.565 0.49221426893942466 0.4922142689394247
accuracy:  0.565
AUC:  0.7961


In [49]:
Train_SVM(x_train_s, y_train_s, x_val_s, y_val_s, 10, 'rbf')

f1 scores: 0.775 0.7682178932178931 0.7682178932178931
accuracy:  0.775
AUC:  0.9429333333333334
