## GridsearchCV

In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix,roc_auc_score,roc_curve
from pandas_profiling import ProfileReport
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df=pd.read_csv("https://raw.githubusercontent.com/shrikant-temburwar/Wine-Quality-Dataset/master/winequality-red.csv",sep = ';')

In [3]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


#### Seperate independent and dependent features

In [4]:
x = df.iloc[:,:-1]
y = df['quality']

#### Train Test Split

In [5]:
X_train , X_test, y_train  , y_test = train_test_split(x,y,test_size=0.25,random_state=300)

In [6]:
X_train.shape,y_train.shape

((1199, 11), (1199,))

In [7]:
X_test.shape,y_test.shape

((400, 11), (400,))

#### Take sample of 500 records and seperate independent and dependent features

In [8]:
df1 = df.head(500)
x1 = df1.drop(columns='quality')
y1 = df1['quality']

In [9]:
dt_model_ccp = DecisionTreeClassifier(random_state=0,ccp_alpha = 0.02)
dt_model_ccp.fit(x1,y1)

In [10]:
# Accuracy of training model after hyperparameter tuning
dt_model_ccp.score(x1,y1)

0.596

In [11]:
# Accuracy of test data after hyperparameter tunning
dt_model_ccp.score(X_test,y_test)

0.54

## GridsearchCV

In [14]:
grid_param = {"criterion" : ['gini','entropy'],
              "splitter" : ['best','random'],
              "max_depth" : range(2,40,1),
              "min_samples_split" : range(2,10,1),
              "min_samples_leaf" : range(1,10,1)
              }

In [15]:
grid_ccp = GridSearchCV(estimator=dt_model_ccp,param_grid = grid_param, cv = 5, n_jobs=-1)
# n_jobs = -1 means engage all the processors

In [16]:
grid_ccp.fit(x1,y1)



![](https://i.imgur.com/RnRUzj5.jpg)

#### Find the best possible parameter for hyperparameter tuning

In [22]:
grid_ccp.best_params_

{'criterion': 'entropy',
 'max_depth': 7,
 'min_samples_leaf': 2,
 'min_samples_split': 6,
 'splitter': 'random'}

- These are our best possible parameters
- Now agian train the model based on these parameters

In [23]:
model_on_para = DecisionTreeClassifier(criterion="entropy", max_depth=7 , min_samples_leaf = 2, min_samples_split= 6, splitter = "random")

In [24]:
model_on_para.fit(x1,y1)

![](https://i.imgur.com/W3qcll6.jpg)

In [25]:
## Training accuracy
model_on_para.score(x1,y1)

0.702

In [26]:
## Test data accuracy
model_on_para.score(X_test,y_test)

0.54

#### Check the accuracy by changing parameters of gridsearchCV

In [27]:
grid_ccp = GridSearchCV(estimator=dt_model_ccp,param_grid = grid_param, cv = 10, n_jobs=-1)

In [28]:
grid_ccp.fit(x1,y1)



In [29]:
grid_ccp.best_params_

{'criterion': 'entropy',
 'max_depth': 15,
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'splitter': 'random'}

In [30]:
model_on_para = DecisionTreeClassifier(criterion="entropy", max_depth=15 , min_samples_leaf = 2, min_samples_split= 2, splitter = "random",ccp_alpha = 0.014)
model_on_para.fit(x1,y1)
## Training accuracy
print(model_on_para.score(x1,y1))
## Test data accuracy
print(model_on_para.score(X_test,y_test))

0.672
0.5375


### Confusion_Matrix on multiclass classification

In [35]:
pred = model_on_para.predict(X_train)

In [36]:
confusion_matrix(y_train,pred)

array([[  0,   0,   4,   1,   0,   0],
       [  0,   1,  26,  13,   1,   0],
       [  0,   6, 342, 142,  22,   0],
       [  0,   2, 145, 251,  89,   0],
       [  0,   0,  15,  45,  79,   0],
       [  0,   0,   0,   6,   9,   0]], dtype=int64)

- We have 6 classes, therefore we get 6 X 6 matrix