# **Import Libraries**

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

# **Import Dataset**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
df = pd.read_csv('/content/drive/MyDrive/Machine Learning Basic/Support Vector Machine (SVM)/Data/Social_Network_Ads.csv')

# **Exploratory Data Analysis (EDA)**

In [4]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [6]:
df['Gender'].unique()

array(['Male', 'Female'], dtype=object)

In [11]:
df = df.join(pd.get_dummies(df['Gender']))

In [13]:
df.drop('Gender', axis=1, inplace=True)

In [14]:
df.head()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Female,Male
0,15624510,19,19000,0,False,True
1,15810944,35,20000,0,False,True
2,15668575,26,43000,0,True,False
3,15603246,27,57000,0,True,False
4,15804002,19,76000,0,False,True


**Note**

>- 1 for Male
>- 0 for Female

In [15]:
df['Male'] = df['Male'].astype(int)
df['Female'] = df['Female'].astype(int)

In [16]:
df.head()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Female,Male
0,15624510,19,19000,0,0,1
1,15810944,35,20000,0,0,1
2,15668575,26,43000,0,1,0
3,15603246,27,57000,0,1,0
4,15804002,19,76000,0,0,1


# **Define Dependent & Independent Features**

In [17]:
X = df.drop('Purchased', axis=1)
y = df['Purchased']

# **Data Splitting**

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((320, 5), (80, 5), (320,), (80,))

# **Feature Scaling**

In [20]:
scaler = StandardScaler()

In [21]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [27]:
X_train[5], X_test[5]

(array([-1.46650253, -0.57615058,  1.44629156, -1.        ,  1.        ]),
 array([ 1.5250491 , -0.77239133, -0.24089709,  1.        , -1.        ]))

# **Training**

## **Linear Kernel**

In [29]:
svc_linear = SVC(kernel='linear', random_state=42)
svc_linear.fit(X_train, y_train)

### **Predict**

In [30]:
y_pred  = svc_linear.predict(X_test)
y_pred

array([0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0])

In [33]:
cm1 = confusion_matrix(y_test, y_pred)
acc1 = accuracy_score(y_test, y_pred)
f1_score1 = f1_score(y_test, y_pred)

In [34]:
print(cm1)
print(acc1)
print(f1_score1)

[[50  2]
 [ 9 19]]
0.8625
0.7755102040816326


## **Sigmoid Kernel**

In [35]:
svc_sigmoid = SVC(kernel='sigmoid', random_state=42)
svc_sigmoid.fit(X_train, y_train)

### **Predict**

In [36]:
y_pred1  = svc_sigmoid.predict(X_test)
y_pred1

array([0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0])

In [37]:
cm2 = confusion_matrix(y_test, y_pred1)
acc2 = accuracy_score(y_test, y_pred1)
f1_score2 = f1_score(y_test, y_pred1)

In [38]:
print(cm2)
print(acc2)
print(f1_score2)

[[48  4]
 [ 8 20]]
0.85
0.7692307692307693


## **RBF Kernel**

In [39]:
svm_rbf = SVC(kernel='rbf', random_state=42)
svm_rbf.fit(X_train, y_train)

### **Predict**

In [40]:
y_pred2  = svm_rbf.predict(X_test)
y_pred2

array([1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0])

In [41]:
cm3 = confusion_matrix(y_test, y_pred2)
acc3 = accuracy_score(y_test, y_pred2)
f1_score3 = f1_score(y_test, y_pred2)

In [42]:
print(cm3)
print(acc3)
print(f1_score3)

[[48  4]
 [ 2 26]]
0.925
0.896551724137931
