In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [3]:
df = pd.get_dummies(data=df, columns=['Gender'], drop_first=True)
df

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,1
1,15810944,35,20000,0,1
2,15668575,26,43000,0,0
3,15603246,27,57000,0,0
4,15804002,19,76000,0,1
...,...,...,...,...,...
395,15691863,46,41000,1,0
396,15706071,51,23000,1,1
397,15654296,50,20000,1,0
398,15755018,36,33000,0,1


In [4]:
x = df[['Age', 'EstimatedSalary', 'Gender_Male']]
y = df['Purchased']

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y)

In [6]:
scaler = StandardScaler()

scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [7]:
model = SVC(random_state=42)
model.fit(x_train, y_train)

SVC(random_state=42)

In [8]:
y_pred = model.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(model.score(x_test, y_test))

[[53  5]
 [ 3 39]]
0.92


In [9]:
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator=model, X=x_train, y=y_train, cv=10)
print(accuracies.mean())

0.9033333333333335


In [10]:
from sklearn.model_selection import RandomizedSearchCV

In [11]:

params = [
        {'C':np.linspace(1, 200), 'kernel':['linear', 'sigmoid', 'poly']},
        {'C':np.linspace(1, 200), 'kernel':['rbf'], 'gamma':np.linspace(0.01, 0.5)}
         ]

randomized_search = RandomizedSearchCV(estimator=model,
                           param_distributions=params,
                           scoring='accuracy',
                           cv=10)
randomized_search.fit(x_train, y_train)

RandomizedSearchCV(cv=10, estimator=SVC(random_state=42),
                   param_distributions=[{'C': array([  1.        ,   5.06122449,   9.12244898,  13.18367347,
        17.24489796,  21.30612245,  25.36734694,  29.42857143,
        33.48979592,  37.55102041,  41.6122449 ,  45.67346939,
        49.73469388,  53.79591837,  57.85714286,  61.91836735,
        65.97959184,  70.04081633,  74.10204082,  78.16326531,
        82.2244898 ,  86.28571429,  90.3469387...
       179.69387755, 183.75510204, 187.81632653, 191.87755102,
       195.93877551, 200.        ]),
                                         'gamma': array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
       0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22,
       0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33,
       0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,
       0.45, 0.46, 0.47, 0.48, 0.49, 0.5 ]),
                                         'ke

In [12]:
randomized_search.best_score_

0.9100000000000001

In [13]:
randomized_search.best_params_

{'kernel': 'rbf', 'gamma': 0.04, 'C': 94.40816326530613}

In [14]:
randomized_search.best_estimator_

SVC(C=94.40816326530613, gamma=0.04, random_state=42)

**another example**

In [15]:
from sklearn.ensemble import RandomForestClassifier

In [16]:
model = RandomForestClassifier(random_state=42)
model.fit(x_train, y_train)

RandomForestClassifier(random_state=42)

In [17]:
y_pred = model.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(model.score(x_test, y_test))

[[53  5]
 [ 6 36]]
0.89


In [18]:

params = [
        {'criterion':['gini', 'entropy'], 'n_estimators':range(20, 500)}
         ]

randomized_search = RandomizedSearchCV(estimator=model,
                           param_distributions=params,
                           scoring='accuracy',
                           cv=10, verbose=1, n_jobs=-1)
randomized_search.fit(x_train, y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   12.8s finished


RandomizedSearchCV(cv=10, estimator=RandomForestClassifier(random_state=42),
                   n_jobs=-1,
                   param_distributions=[{'criterion': ['gini', 'entropy'],
                                         'n_estimators': range(20, 500)}],
                   scoring='accuracy', verbose=1)

In [19]:
randomized_search.best_score_

0.9066666666666668

In [20]:
randomized_search.best_params_

{'n_estimators': 51, 'criterion': 'gini'}

In [21]:
randomized_search.best_estimator_

RandomForestClassifier(n_estimators=51, random_state=42)

# Great Work!