# Support Vector Machines

### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectPercentile, f_classif

### Model 1
#### Read CSV

In [2]:
train_m1 = pd.read_csv('../../../../Data_AA2/train_m1.csv', sep = ',')
train_m1.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,0.37521,-0.756929,-1.925779,0.580938,-0.515445,0.939376,-0.453565,-0.763196,1.192581,0.001762,...,-0.561357,0.196894,-0.348973,0.188003,-0.118566,-0.651221,-0.324357,0.259352,0.395979,0
1,0.37521,1.471189,-0.283258,-0.824024,-0.515445,0.939376,-0.453565,1.310279,-0.098471,-0.713715,...,0.148956,0.196894,-0.348973,0.188003,0.837519,1.529013,-0.281151,0.769787,0.843536,0
2,0.37521,0.078615,-0.283258,1.049258,-0.515445,0.939376,-0.453565,-0.763196,-1.389523,1.432715,...,-0.561357,0.196894,-0.348973,0.188003,0.837519,-0.230686,0.95022,0.773829,0.843536,0
3,0.37521,-0.756929,-0.283258,-0.824024,-0.515445,-1.090085,-0.453565,1.310279,1.192581,1.432715,...,-0.2062,0.196894,-0.348973,0.188003,-0.118566,-0.651221,-0.324357,0.284758,0.395979,0
4,0.37521,-1.035444,-1.925779,1.049258,1.938451,0.939376,-0.453565,1.310279,0.76223,-0.713715,...,0.859269,0.196894,-0.348973,0.188003,0.646302,0.717242,0.885411,0.710313,0.32947,0


In [3]:
test_m1 = pd.read_csv('../../../../Data_AA2/test_m1.csv', sep = ',')
test_m1.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,0.37521,1.471189,-0.283258,0.580938,-0.515445,0.939376,-0.453565,1.310279,0.76223,0.717238,...,-0.561357,0.196894,-0.348973,0.188003,0.646302,0.717242,0.885411,0.710313,0.32947,0
1,0.37521,0.914159,-1.925779,-0.355703,-0.515445,-1.090085,-0.453565,1.310279,0.76223,1.432715,...,-0.561357,0.196894,-0.348973,0.188003,0.646302,0.717242,0.885411,0.709736,0.32947,0
2,0.37521,1.471189,-0.283258,0.580938,1.938451,0.939376,-0.453565,1.310279,0.76223,-1.429191,...,-0.561357,0.196894,-0.348973,0.188003,0.646302,0.717242,0.885411,0.711468,0.32947,0
3,0.37521,-0.756929,1.359263,-1.760665,-0.515445,-1.090085,-0.453565,-0.763196,0.76223,1.432715,...,-0.561357,0.196894,1.662067,-2.562036,-1.20213,-1.180338,-1.231682,-1.354522,-0.943918,0
4,0.37521,-1.035444,-0.283258,1.049258,1.938451,0.939376,-0.453565,-0.763196,-1.389523,0.717238,...,0.148956,0.196894,-0.348973,0.188003,0.837519,-0.230686,0.95022,0.771519,0.843536,0


In [4]:
#get target
train_m1_target = train_m1['y']
train_m1 = train_m1.drop(columns=['y'])

In [5]:
#get target
test_m1_target = test_m1['y']
test_m1 = test_m1.drop(columns=['y'])

#### Hyperparameter tuning using GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
parameters = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf', 'poly']} 

# Create the model
svc_model = SVC()

# Best model
opt_model_svc = GridSearchCV(svc_model, parameters,  refit = True, scoring='accuracy', verbose=10)

# Fit the model
opt_model_svc.fit(train_m1, train_m1_target)

print (opt_model_svc.best_estimator_)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV 1/5; 1/50] START C=0.1, gamma=1, kernel=rbf.................................


#### Create and fit the model

In [19]:
# Create model with best parameters
svm_best_model = SVC(C=0.01, gamma=1,kernel="rbf",verbose=True)

# Fit the best model
svm_best_model.fit(train_m1, train_m1_target)
svm_best_model

RandomForestClassifier(max_depth=13, max_features=3, min_samples_leaf=3,
                       min_samples_split=8)

In [20]:
predictions = svm_best_model.predict(test_m1)

# Confusion matrix
confusion = pd.crosstab(test_m1_target,predictions, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(confusion )

Predicted      0    1    All
Actual                      
0          10733  235  10968
1            831  558   1389
All        11564  793  12357


In [21]:
# Accuracy
score = accuracy_score(test_m1_target,predictions)
print('Accuracy:{0:f}'.format(score))

Accuracy:0.913733


In [22]:
# Report
print(classification_report(test_m1_target,predictions))

              precision    recall  f1-score   support

           0       0.93      0.98      0.95     10968
           1       0.70      0.40      0.51      1389

    accuracy                           0.91     12357
   macro avg       0.82      0.69      0.73     12357
weighted avg       0.90      0.91      0.90     12357



In [None]:
# Validate model using best model and cross validation
pecc_rf = cross_val_score(svm_best_model, train_m1, train_m1_target, cv = 5).mean()
pecc_rf

### Model 2
#### Read CSV

In [None]:
train_m2 = pd.read_csv('../../../../Data_AA2/train_m2.csv', sep = ',')
train_m2.head()

In [None]:
test_m2 = pd.read_csv('../../../../Data_AA2/test_m2.csv', sep = ',')
test_m2.head()

In [None]:
#get target
train_m2_target = train_m2['y']
train_m2 = train_m2.drop(columns=['y'])

In [None]:
#get target
test_m2_target = test_m2['y']
test_m2 = test_m2.drop(columns=['y'])

#### Hyperparameter tuning using GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
parameters = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf', 'poly']} 

# Create the model
svc_model = SVC()

# Best model
opt_model_svc = GridSearchCV(svc_model, parameters,  refit = True, scoring='accuracy', verbose=10)

# Fit the model
opt_model_svc.fit(train_m2, train_m2_target)

print (opt_model_svc.best_estimator_)

#### Create and fit the model

In [None]:
# Create model with best parameters
svm_best_model = SVC(C=0.01, gamma=1,kernel="rbf",verbose=True)

# Fit the best model
svm_best_model.fit(train_m2, train_m2_target)
svm_best_model

In [None]:
predictions = svm_best_model.predict(test_m2)

# Confusion matrix
confusion = pd.crosstab(test_m2_target,predictions, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(confusion )

In [None]:
# Accuracy
score = accuracy_score(test_m2_target,predictions)
print('Accuracy:{0:f}'.format(score))

In [None]:
# Report
print(classification_report(test_m2_target,predictions))

In [None]:
# Validate model using best model and cross validation
pecc_rf = cross_val_score(svm_best_model, train_m2, train_m2_target, cv = 5).mean()
pecc_rf

### Model 3
#### Read CSV

In [None]:
train_m3 = pd.read_csv('../../../../Data_AA2/train_m3.csv', sep = ',')
train_m3.head()

In [None]:
test_m3 = pd.read_csv('../../../../Data_AA2/test_m3.csv', sep = ',')
test_m3.head()

In [None]:
#get target
train_m3_target = train_m3['y']
train_m3 = train_m3.drop(columns=['y'])

In [None]:
#get target
test_m3_target = test_m3['y']
test_m3 = test_m3.drop(columns=['y'])

#### Hyperparameter tuning using GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
parameters = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf', 'poly']} 

# Create the model
svc_model = SVC()

# Best model
opt_model_svc = GridSearchCV(svc_model, parameters,  refit = True, scoring='accuracy', verbose=10)

# Fit the model
opt_model_svc.fit(train_m3, train_m3_target)

print (opt_model_svc.best_estimator_)

#### Create and fit the model

In [None]:
# Create model with best parameters
svm_best_model = SVC(C=0.01, gamma=1,kernel="rbf",verbose=True)

# Fit the best model
svm_best_model.fit(train_m3, train_m3_target)
svm_best_model

In [None]:
predictions = svm_best_model.predict(test_m3)

# Confusion matrix
confusion = pd.crosstab(test_m3_target,predictions, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(confusion )

In [None]:
# Accuracy
score = accuracy_score(test_m3_target,predictions)
print('Accuracy:{0:f}'.format(score))

In [None]:
# Report
print(classification_report(test_m3_target,predictions))

In [None]:
# Validate model using best model and cross validation
pecc_rf = cross_val_score(svm_best_model, train_m3, train_m3_target, cv = 5).mean()
pecc_rf

### Model 4
#### Read CSV

In [None]:
train_m4 = pd.read_csv('../../../../Data_AA2/train_m4.csv', sep = ',')
train_m4.head()

In [None]:
test_m4 = pd.read_csv('../../../../Data_AA2/test_m4.csv', sep = ',')
test_m4.head()

In [None]:
#get target
train_m4_target = train_m4['y']
train_m4 = train_m4.drop(columns=['y'])

In [None]:
#get target
test_m4_target = test_m4['y']
test_m4 = test_m4.drop(columns=['y'])

#### Hyperparameter tuning using GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
parameters = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf', 'poly']} 

# Create the model
svc_model = SVC()

# Best model
opt_model_svc = GridSearchCV(svc_model, parameters,  refit = True, scoring='accuracy', verbose=10)

# Fit the model
opt_model_svc.fit(train_m4, train_m4_target)

print (opt_model_svc.best_estimator_)

#### Create and fit the model

In [None]:
# Create model with best parameters
svm_best_model = SVC(C=0.01, gamma=1,kernel="rbf",verbose=True)

# Fit the best model
svm_best_model.fit(train_m4, train_m4_target)
svm_best_model

In [None]:
predictions = svm_best_model.predict(test_m4)

# Confusion matrix
confusion = pd.crosstab(test_m4_target,predictions, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(confusion )

In [None]:
# Accuracy
score = accuracy_score(test_m4_target,predictions)
print('Accuracy:{0:f}'.format(score))

In [None]:
# Report
print(classification_report(test_m4_target,predictions))

In [None]:
# Validate model using best model and cross validation
pecc_rf = cross_val_score(svm_best_model, train_m4, train_m4_target, cv = 5).mean()
pecc_rf