### Support Vector Machine

In [1]:
# This code appears in every demonstration Notebook.
# By default, when you run each cell, only the last output of the codes will show.
# This code makes all outputs of a cell show.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

1. Import libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import sklearn.model_selection as skm # This is for cross-validation grid search
from sklearn.svm import SVC # SVM module
from sklearn.metrics import accuracy_score, confusion_matrix

2. Import Carseats dataset

In [3]:
Carseats = pd.read_csv('Carseats.csv')

In [4]:
Carseats.info()
Carseats.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Sales        400 non-null    float64
 1   CompPrice    400 non-null    int64  
 2   Income       400 non-null    int64  
 3   Advertising  400 non-null    int64  
 4   Population   400 non-null    int64  
 5   Price        400 non-null    int64  
 6   ShelveLoc    400 non-null    object 
 7   Age          400 non-null    int64  
 8   Education    400 non-null    int64  
 9   Urban        400 non-null    object 
 10  US           400 non-null    object 
dtypes: float64(1), int64(7), object(3)
memory usage: 34.5+ KB


Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,9.5,138,73,11,276,120,Bad,42,17,Yes,Yes
1,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
2,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
3,7.4,117,100,4,466,97,Medium,55,14,Yes,Yes
4,4.15,141,64,3,340,128,Bad,38,13,Yes,No


3. We intend to classify the sales of carseats into high or low categories.<br>
First, we need to transform the numeric 'Sales' into high/low categories.

In [5]:
# The variable we create will be our y.
Carseats['Sales_c'] = pd.cut(Carseats['Sales'], bins = [-1, Carseats['Sales'].mean(), float('inf')], labels = ['low', 'high'])

In [6]:
Carseats['Sales_c'].value_counts()

low     201
high    199
Name: Sales_c, dtype: int64

4. Prepare X and y

In [7]:
# Drop the dependent variable from Carseats to get X
X = Carseats.drop(['Sales', 'Sales_c', 'CompPrice'], axis = 1)
y = Carseats['Sales_c']

In [8]:
X.head()

Unnamed: 0,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,73,11,276,120,Bad,42,17,Yes,Yes
1,48,16,260,83,Good,65,10,Yes,Yes
2,35,10,269,80,Medium,59,12,Yes,Yes
3,100,4,466,97,Medium,55,14,Yes,Yes
4,64,3,340,128,Bad,38,13,Yes,No


In [9]:
X = pd.get_dummies(X, columns = ['ShelveLoc', 'Urban', 'US'], drop_first=True)

5. Split training and test datasets

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=30)

In [11]:
y_train.value_counts()

low     147
high    133
Name: Sales_c, dtype: int64

6. Fit a support vector classifier.

#### Attention: the C argument mentioned here is different from the Tuning C parameter mentioned in the textbook and slides. They lead to opposite directions.

In [12]:
# Initiating the classifier
# The C argument allows us to specify the cost of a violation
# to the margin. When the cost argument is small, then the margins will be
# wide and many support vectors will be on the margin or will violate the
# margin. When the C argument is large, then the margins will be narrow and
# there will be few support vectors on the margin or violating the margin.
svm_linear = SVC(C = 10, kernel = 'linear')
svm_linear.fit(X, y)

In [13]:
# Make predictions and check performance
svm_linear_pred = svm_linear.predict(X_test)
accuracy_score(y_test, svm_linear_pred)
confusion_matrix(y_test, svm_linear_pred)

0.7833333333333333

array([[52, 14],
       [12, 42]])

7. Cross validation to select the best C parameter using grid search.<br>
Note: It is computationally expensive.

In [None]:
# First define the grid as a dictionary. Here we only have one parameter C.
param_grid = {'C':[0.001,0.01,0.1,1,5,10,100]}
# Grid search: cv gives the K for K fold cross-validation, scoring gives what metric to evaluate
svm_grid = skm.GridSearchCV(svm_linear ,
{'C':[0.001,0.01,0.1,1,5,10,100]}, refit=True, cv=5, scoring = 'accuracy')
svm_grid.fit(X, y)
svm_grid.best_params_

In [14]:
import sklearn.model_selection as skm
from sklearn.svm import SVC  # Assuming you're using an SVM classifier

# Initialize the SVM classifier
svm_linear = SVC(kernel='linear')

# Define the grid as a dictionary with the parameter 'C' you want to tune
param_grid = {'C': [0.001, 0.01, 0.1, 1, 5, 10, 100]}

# Initialize GridSearchCV with the SVM classifier, the parameter grid, and the settings for cross-validation
svm_grid = skm.GridSearchCV(svm_linear, param_grid, refit=True, cv=5, scoring='accuracy')

# Fit the grid search model
svm_grid.fit(X, y)  # Make sure X and y are defined as your features and target variable

# After fitting, you can find the best parameters
best_params = svm_grid.best_params_
print("Best parameters:", best_params)


Best parameters: {'C': 1}


In [None]:
# Fit the grid
svm_grid.fit(X, y)

# Access grid search results by calling the attributes
# Best C identified
svm_grid.best_params_
# Best resulting accuracy
svm_grid.best_score_
# All CV results. We pay most attention to mean score results
svm_grid.cv_results_

In [None]:
# Then we can use the best selected model to make predictions and examine performance
best_svm = grid.best_estimator_
best_svm_pred = best_svm.predict(X_test)
accuracy_score(y_test, best_svm_pred)
confusion_matrix(y_test, best_svm_pred)

8. Support Vector Machine: using a non-linear kernel.

In [None]:
# Kernel: radial
# Small gamma: Results in a smoother decision boundary, as the influence of a single training
# example extends farther. This can lead to underfitting, especially if the data is complex.
# Large gamma: Results in a more complex and tightly fit decision boundary, as the influence 
# of a single training example is more localized. This can lead to overfitting, 
# especially if the data is noisy.
svm_rbf = SVC(kernel="rbf", gamma = 1, C = 1)

In [None]:
svm_rbf.fit(X_train, y_train)

In [None]:
svm_rbf_pred = svm_rbf.predict(X_test)
accuracy_score(y_test, svm_rbf_pred)
confusion_matrix(y_test, svm_rbf_pred)

9. Change to polynomial kernel

In [None]:
# We initialize the SVC with the polynomial kernel by setting kernel='poly'.
# The degree parameter specifies the degree of the polynomial kernel (default is 3).
# The C parameter controls the regularization strength (default is 1.0).
# The gamma parameter controls the kernel coefficient (default is 'scale').
svc_poly = SVC(kernel='poly', degree=3, C=1.0, gamma='scale')

In [None]:
svc_poly.fit(X_train, y_train)
svc_poly_pred = svc_poly.predict(X_test)
accuracy_score(y_test, svc_poly_pred)
confusion_matrix(y_test, svc_poly_pred)