In [1]:
import pandas as pd
import numpy as ny
import matplotlib.pyplot as plt

In [2]:
dataset=pd.read_csv("Social_Network_Ads.csv")

In [3]:
dataset

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [4]:
dataset=pd.get_dummies(dataset,drop_first=True,dtype=int)

In [5]:
dataset["Purchased"].value_counts()

Purchased
0    257
1    143
Name: count, dtype: int64

In [6]:
dataset.columns

Index(['User ID', 'Age', 'EstimatedSalary', 'Purchased', 'Gender_Male'], dtype='object')

In [7]:
Independent=dataset[[ 'Age', 'EstimatedSalary','Gender_Male']]

In [8]:
Independent.shape

(400, 3)

In [9]:
Independent

Unnamed: 0,Age,EstimatedSalary,Gender_Male
0,19,19000,1
1,35,20000,1
2,26,43000,0
3,27,57000,0
4,19,76000,1
...,...,...,...
395,46,41000,0
396,51,23000,1
397,50,20000,0
398,36,33000,1


In [10]:
Dependent=dataset[["Purchased"]]

In [11]:
Dependent

Unnamed: 0,Purchased
0,0
1,0
2,0
3,0
4,0
...,...
395,1
396,1
397,1
398,0


In [12]:
from sklearn.model_selection import train_test_split,GridSearchCV
x_train,x_test,y_train,y_test=train_test_split(Independent,Dependent,test_size=0.30,random_state=0)

In [13]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_test=sc.fit_transform(x_test)
x_train=sc.transform(x_train)

In [14]:
from sklearn.tree import DecisionTreeClassifier
param_grid={'criterion':['gini','entropy','log_loss'],'splitter':['best','random'],'max_features':['sqrt','log2']}
grid=GridSearchCV(DecisionTreeClassifier(),param_grid,verbose=3,refit=True,n_jobs=-1,cv=5)

In [15]:
grid.fit(x_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [16]:
y_pred=grid.predict(x_test)

In [17]:
from sklearn.metrics import confusion_matrix,classification_report,roc_auc_score

In [18]:
cm=confusion_matrix(y_test,y_pred)

In [19]:
print(cm)

[[70  9]
 [ 5 36]]


In [20]:
clf_rpt=classification_report(y_test,y_pred)

In [21]:
print(clf_rpt)

              precision    recall  f1-score   support

           0       0.93      0.89      0.91        79
           1       0.80      0.88      0.84        41

    accuracy                           0.88       120
   macro avg       0.87      0.88      0.87       120
weighted avg       0.89      0.88      0.88       120



In [22]:
roc_score=roc_auc_score(y_test,grid.predict_proba(x_test)[:,1])

In [23]:
roc_score

0.8820623649274467

In [24]:
re=grid.cv_results_

In [25]:
print("The best parameters are:", grid.best_params_)

The best parameters are: {'criterion': 'gini', 'max_features': 'sqrt', 'splitter': 'random'}


In [26]:
table=pd.DataFrame.from_dict(re)

In [27]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.009147,0.001993,0.008661,0.001796,gini,sqrt,best,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.821429,0.821429,0.839286,0.803571,0.892857,0.835714,0.030723,7
1,0.005847,0.001853,0.007926,0.002759,gini,sqrt,random,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.875,0.821429,0.875,0.875,0.910714,0.871429,0.028571,1
2,0.006901,0.000531,0.009237,0.000992,gini,log2,best,"{'criterion': 'gini', 'max_features': 'log2', ...",0.857143,0.821429,0.803571,0.857143,0.892857,0.846429,0.031135,4
3,0.006135,0.001756,0.008164,0.001471,gini,log2,random,"{'criterion': 'gini', 'max_features': 'log2', ...",0.839286,0.857143,0.821429,0.875,0.821429,0.842857,0.020825,5
4,0.006641,0.002814,0.007619,0.00194,entropy,sqrt,best,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.821429,0.875,0.821429,0.821429,0.892857,0.846429,0.031135,3
5,0.005151,0.001734,0.007395,0.00179,entropy,sqrt,random,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.839286,0.803571,0.839286,0.803571,0.821429,0.821429,0.015972,12
6,0.00653,0.002577,0.006909,0.001328,entropy,log2,best,"{'criterion': 'entropy', 'max_features': 'log2...",0.839286,0.821429,0.839286,0.821429,0.875,0.839286,0.019562,6
7,0.007149,0.001311,0.007781,0.001259,entropy,log2,random,"{'criterion': 'entropy', 'max_features': 'log2...",0.839286,0.803571,0.803571,0.821429,0.875,0.828571,0.026726,10
8,0.005982,0.001423,0.008461,0.000781,log_loss,sqrt,best,"{'criterion': 'log_loss', 'max_features': 'sqr...",0.875,0.821429,0.821429,0.839286,0.910714,0.853571,0.034626,2
9,0.005621,0.001017,0.007604,0.001343,log_loss,sqrt,random,"{'criterion': 'log_loss', 'max_features': 'sqr...",0.839286,0.875,0.803571,0.857143,0.785714,0.832143,0.03312,8
