In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('Advertising_data.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [3]:
gender = pd.get_dummies(df['Gender'], drop_first= True)
gender

Unnamed: 0,Male
0,1
1,1
2,0
3,0
4,1
...,...
395,0
396,1
397,0
398,1


In [4]:
df1 = pd.concat([df, gender], axis= 1)
df1

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased,Male
0,15624510,Male,19.0,19000.0,0,1
1,15810944,Male,35.0,20000.0,0,1
2,15668575,Female,26.0,43000.0,0,0
3,15603246,Female,27.0,57000.0,0,0
4,15804002,Male,19.0,76000.0,0,1
...,...,...,...,...,...,...
395,15691863,Female,46.0,41000.0,1,0
396,15706071,Male,51.0,23000.0,1,1
397,15654296,Female,50.0,20000.0,1,0
398,15755018,Male,36.0,33000.0,0,1


In [5]:
df1 = df1.drop(['User ID','Gender'], axis= 1)
df1

Unnamed: 0,Age,EstimatedSalary,Purchased,Male
0,19.0,19000.0,0,1
1,35.0,20000.0,0,1
2,26.0,43000.0,0,0
3,27.0,57000.0,0,0
4,19.0,76000.0,0,1
...,...,...,...,...
395,46.0,41000.0,1,0
396,51.0,23000.0,1,1
397,50.0,20000.0,1,0
398,36.0,33000.0,0,1


In [6]:
x = df1[['Age','EstimatedSalary','Male']]

x.head()

Unnamed: 0,Age,EstimatedSalary,Male
0,19.0,19000.0,1
1,35.0,20000.0,1
2,26.0,43000.0,0
3,27.0,57000.0,0
4,19.0,76000.0,1


In [7]:
y = df1['Purchased']

y.head()

0    0
1    0
2    0
3    0
4    0
Name: Purchased, dtype: int64

In [8]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.3, random_state= 0)

In [9]:
print(x_train.shape, x_test.shape)

(280, 3) (120, 3)


In [10]:
print(y_train.shape, y_test.shape)

(280,) (120,)


### Feature scalling.... Standardscaler

In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [12]:
x_train = scaler.fit_transform(x_train)

x_test = scaler.transform(x_test)

### SVM model.... classification problem...... Use Default parameters for this problem

In [13]:
from sklearn.svm import SVC

In [14]:
classification = SVC(kernel= 'linear', random_state= 0)

classification.fit(x_train, y_train)

SVC(kernel='linear', random_state=0)

In [15]:
## predict the x_test value

y_pred = classification.predict(x_test)

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [17]:
metrix = confusion_matrix(y_test, y_pred)
metrix

array([[76,  3],
       [13, 28]], dtype=int64)

In [18]:
score = accuracy_score(y_test, y_pred)
score

0.8666666666666667

# Use GridSearchCV .... HyperParameter
# Use GridSearchCV to find the best parameters for the model

In [19]:
from sklearn.model_selection import GridSearchCV

In [22]:
parameters = [{'C' : [1,5,10,12,50,100,1000], 'kernel' : ['linear']},
             
             {'C': [1,5,10,12,50,100,1000], 'kernel': ['rbf'], 'gamma':[0.01,0.05,0.1,0.2,0.3,0.5,0.6,0.7,0.8,0.9]}]

In [23]:
grid_search = GridSearchCV(estimator= classification, param_grid= parameters, scoring= 'accuracy', cv = 10, n_jobs= -1)

grid_search.fit(x_train, y_train)

GridSearchCV(cv=10, estimator=SVC(kernel='linear', random_state=0), n_jobs=-1,
             param_grid=[{'C': [1, 5, 10, 12, 50, 100, 1000],
                          'kernel': ['linear']},
                         {'C': [1, 5, 10, 12, 50, 100, 1000],
                          'gamma': [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.6, 0.7,
                                    0.8, 0.9],
                          'kernel': ['rbf']}],
             scoring='accuracy')

In [24]:
accuracy = grid_search.best_score_

accuracy

0.9071428571428569

In [25]:
grid_search.best_params_

{'C': 1, 'gamma': 0.8, 'kernel': 'rbf'}

# Use this best parameters in the SVM model

In [27]:
from sklearn.svm import SVC

classification = SVC(C = 1, kernel= 'rbf', gamma= 0.8)

In [28]:
classification.fit(x_train, y_train)

SVC(C=1, gamma=0.8)

In [29]:
y_pred2 = classification.predict(x_test)

In [30]:
cm = confusion_matrix(y_pred2, y_test)

cm

array([[73,  4],
       [ 6, 37]], dtype=int64)

In [31]:
sc = accuracy_score(y_test, y_pred2)

sc

0.9166666666666666