# SVM

In [36]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics

## Loading Dataset

In [37]:
input_header = {"PROC_TRACEINFO" : "id",
                "OP070_V_1_angle_value": "angle_1",
                "OP090_SnapRingPeakForce_value" : "snap_ring_peak_force",
                "OP070_V_2_angle_value" : "angle_2",
                "OP120_Rodage_I_mesure_value" : "rodage_i",
                "OP090_SnapRingFinalStroke_value" : "snap_ring_final_stroke",
                "OP110_Vissage_M8_torque_value" : "vissage_m8_torque",
                "OP100_Capuchon_insertion_mesure" : "capuchon_insertion",
                "OP120_Rodage_U_mesure_value" : "rodage_u",
                "OP070_V_1_torque_value" : "torque_1",
                "OP090_StartLinePeakForce_value" : "start_line_peak_force",
                "OP110_Vissage_M8_angle_value" : "vissage_m8_angle",
                "OP090_SnapRingMidPointForce_val" : "snap_ring_midpoint_force",
                "OP070_V_2_torque_value" : "torque_2"}
output_header = {"PROC_TRACEINFO" : "id",
                 "Binar OP130_Resultat_Global_v" : "result"}

train_input = pd.read_csv("../data/train_inputs.csv", header=0).rename(columns=input_header)
train_output = pd.read_csv("../data/train_output.csv", header=0).rename(columns=output_header)

## Preparing Dataset

In [32]:
train_input_ = train_input[train_input.columns[~train_input.columns.isin(["id", "capuchon_insertion"])]]
X_train, X_test, y_train, y_test = train_test_split(train_input_, train_output["result"], test_size = 0.3, random_state = 123)

## Model Generation

In [33]:
svm = svm.SVC()
svm.fit(X_train, y_train)

SVC()

## Model Evaluation

In [34]:
y_pred = svm.predict(X_test)

metrics.accuracy_score(y_test, y_pred)

0.9911154031868662

We observe the same issue, we had with the Naive Bayesian Classifier...  
Overfitting !!

## Balancing Classes

In [35]:
svm = svm.SVC(class_weight="balanced")
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
metrics.accuracy_score(y_test, y_pred)

AttributeError: 'SVC' object has no attribute 'SVC'

More realistic.

## Cross Validation

In [38]:
train_input_ = train_input[train_input.columns[~train_input.columns.isin(["id", "capuchon_insertion"])]]

svm_cross = svm.SVC(class_weight="balanced")

scores = cross_val_score(svm_cross, train_input_, train_output["result"], cv = 5)
scores

array([0.50818485, 0.52846588, 0.52238157, 0.5116616 , 0.53136318])

## Tests