In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [3]:
df = pd.get_dummies(data=df, columns=['Gender'], drop_first=True)
df

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,1
1,15810944,35,20000,0,1
2,15668575,26,43000,0,0
3,15603246,27,57000,0,0
4,15804002,19,76000,0,1
...,...,...,...,...,...
395,15691863,46,41000,1,0
396,15706071,51,23000,1,1
397,15654296,50,20000,1,0
398,15755018,36,33000,0,1


In [4]:
x = df[['Age', 'EstimatedSalary', 'Gender_Male']]
y = df['Purchased']

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.2)

In [6]:
scaler = StandardScaler()

scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [7]:
model = SVC(random_state=42)
model.fit(x_train, y_train)

SVC(random_state=42)

In [8]:
y_pred = model.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(model.score(x_test, y_test))

[[48  4]
 [ 2 26]]
0.925


In [9]:
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator=model, X=x_train, y=y_train, cv=10)
print(accuracies.mean())

0.90625


In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:

params = [
        {'C':[1, 10, 100], 'kernel':['linear', 'sigmoid', 'poly']},
        {'C':[1, 10, 100], 'kernel':['rbf'], 'gamma':[0.5, 0.6, 0.7, 0.1, 0.01, 0.01]}
         ]

grid_search = GridSearchCV(estimator=model,
                           param_grid=params,
                           scoring='accuracy',
                           cv=10)
grid_search.fit(x_train, y_train)

GridSearchCV(cv=10, estimator=SVC(random_state=42),
             param_grid=[{'C': [1, 10, 100],
                          'kernel': ['linear', 'sigmoid', 'poly']},
                         {'C': [1, 10, 100],
                          'gamma': [0.5, 0.6, 0.7, 0.1, 0.01, 0.01],
                          'kernel': ['rbf']}],
             scoring='accuracy')

In [12]:
grid_search.best_score_

0.9125

In [13]:
grid_search.best_params_

{'C': 1, 'gamma': 0.5, 'kernel': 'rbf'}

In [14]:
grid_search.best_estimator_

SVC(C=1, gamma=0.5, random_state=42)

**another example**

In [15]:
from sklearn.ensemble import RandomForestClassifier

In [16]:
model = RandomForestClassifier(random_state=42)
model.fit(x_train, y_train)

RandomForestClassifier(random_state=42)

In [17]:
y_pred = model.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(model.score(x_test, y_test))

[[47  5]
 [ 5 23]]
0.875


In [18]:

params = [
        {'criterion':['gini', 'entropy'], 'n_estimators':[20, 50, 100, 150, 200, 500]}
         ]

grid_search = GridSearchCV(estimator=model,
                           param_grid=params,
                           scoring='accuracy',
                           cv=10, verbose=1, n_jobs=-1)
grid_search.fit(x_train, y_train)

Fitting 10 folds for each of 12 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    7.5s finished


GridSearchCV(cv=10, estimator=RandomForestClassifier(random_state=42),
             n_jobs=-1,
             param_grid=[{'criterion': ['gini', 'entropy'],
                          'n_estimators': [20, 50, 100, 150, 200, 500]}],
             scoring='accuracy', verbose=1)

In [19]:
grid_search.best_score_

0.890625

In [20]:
grid_search.best_params_

{'criterion': 'gini', 'n_estimators': 20}

In [21]:
grid_search.best_estimator_

RandomForestClassifier(n_estimators=20, random_state=42)

# Great Work!