# Support Vector Machine

## Preliminaries

Import preprocessing helper functions

In [6]:
import sys
sys.path.append("../../helper-modules")
from preprocessing_utils import read_in_data, preprocess

Read in the data to df_train, df_val and df_test

In [7]:
df_train, df_val, df_test = read_in_data()

Create X_train, y_train, X_val, y_val, X_test, y_test

In [None]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = preprocess("SVM", df_train, df_test, df_val)

## Model Building

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score

Set a seed for reproducibility

In [None]:
SEED = 42

Create a Linear SVM classifier

In [None]:
clf = svm.LinearSVC(dual=False, random_state=SEED) # dual = False is recommended, since number of training examples >> number of features
clf.fit(X_train,y_train)

LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
          verbose=0)

Predict on the validation set

In [None]:
y_pred_val = clf.predict(X_val)
val_acc = accuracy_score(y_val, y_pred_val)
print("Validation Accuracy:",val_acc)

Validation Accuracy: 0.6408125


Write SVM experiments results to file

In [None]:
model_type = "SVM"
model_number = 1
num_params = X_train.shape[1] + 1 # number of features plus intercept term

In [None]:
with open("../training-results/ExperimentLogs_SVM.csv", "w") as file:
  header = "ModelNumber;ValAccuracy;NumParams\n" # Header row
  file.write(header) 
  row = f"{model_number};{val_acc};{num_params}"
  file.write(row)

Save SVM model to file

In [None]:
import pickle

In [None]:
MODELS_PATH = "../trained-models/"

In [None]:
pkl_filename = f"{model_type}_{model_number}.pkl"
with open(MODELS_PATH + pkl_filename, 'wb') as file:
    pickle.dump(clf, file)