
# Support Vector Machine Classification


## Importing the libraries

In [None]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


## Start stopwatch

In [None]:

from time import process_time
time_start = process_time() 


## Import the dataset

In [None]:

X = pd.read_csv('/Users/rafavarela/Projects/MLhub/data/classification/features.csv')
y = pd.read_csv('/Users/rafavarela/Projects/MLhub/data/classification/labels.csv')


### Categorize dataset

In [None]:
def categorize_column(dataframe, category_threshold, column):
    
    uniq = dataframe[column].unique()
    if len(uniq) <= category_threshold:
        dataframe[column] = dataframe[column].astype('category').cat.codes
        

def categorize_dataframe(dataframe, category_threshold):
    for c in dataframe.columns:
        categorize_column(dataframe, category_threshold, c)
        
categorize_dataframe(X, 10)
categorize_dataframe(y, 10)
X = X.values
y = y.values


## Splitting the dataset into the Training set and Test set

In [None]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.1,
                        train_size=None,
                        random_state=0,
                        shuffle=0,
                        stratify=X if "None" == "features" else y if "None" == "labels" else None)


## Training the Simple Decision Tree model on the Training set

In [None]:

from sklearn.svm import SVC
classifier = SVC(C = 1.0,
                kernel = "rbf",
                degree = 3,
                gamma= scale if "scale".isnumeric() else "scale",
                coef0 = 0.0,
                shrinking = True,
                probability = False,
                tol = 0.001,
                max_iter = -1,
                decision_function_shape = "ovr",
                break_ties = False)
classifier.fit(X_train, y_train)


## Predicting the Test set results

In [None]:

y_pred = classifier.predict(X_test)


## Evaluating the Model Performance

In [None]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, plot_confusion_matrix
import seaborn as sns

acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)**0.5
f1 = f1_score(y_test, y_pred)
print(f'Accuracy Score: {acc}')
print(f'Precision Score: {precision}')
print(f'Recall Score: {recall}')
print(f'F1 Score: {f1}')

cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm)           
ax = plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax);  
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix');           
plt.show()

time_stop = process_time()
cpu_time = round(time_stop - time_start, 2)
print(f'Elapsed CPU Time: {cpu_time} seconds')


## Saving Model Statistics

In [None]:

import os
import json

path = 'statistics'
if not os.path.exists(path):
    os.mkdir(path)

stats = {
    "Accuracy Score": acc,
    "Precision Score": precision,
    "Recall Score": recall,
    "F1 Score": f1,
    "cpu time": cpu_time,
    "predicted": y_pred.flatten().tolist(), 
    "real": y_test.flatten().tolist()
}

with open(os.path.join(path, "svm classifier.json"), "w") as f:
    json.dump(stats, f, ensure_ascii=False, indent=4)
