In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

In [2]:
x, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [3]:
tree = DecisionTreeClassifier(criterion='entropy',max_depth=10)
tree.fit(x_train,y_train)

y_pred = tree.predict(x_test)

cr = classification_report(y_test,y_pred)

print(cr)

              precision    recall  f1-score   support

           0       0.79      0.79      0.79       106
           1       0.76      0.76      0.76        94

    accuracy                           0.78       200
   macro avg       0.77      0.77      0.77       200
weighted avg       0.77      0.78      0.77       200



In [8]:
# Raw Methode

criterion = ['gini','entropy']
max_depth = [5,10,15]

avg_score = {}

for c in criterion:
    for d in max_depth:
        clf = DecisionTreeClassifier(criterion=c,max_depth=d)
        score_list = cross_val_score(clf,x,y,cv=5)
        avg_score[c+"_"+str(d)] = np.average(score_list)
avg_score

{'gini_5': np.float64(0.777),
 'gini_10': np.float64(0.7849999999999999),
 'gini_15': np.float64(0.8029999999999999),
 'entropy_5': np.float64(0.779),
 'entropy_10': np.float64(0.794),
 'entropy_15': np.float64(0.8039999999999999)}

In [9]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(
    DecisionTreeClassifier(),
    {'criterion':['gini','entropy'],
     'max_depth':[5,10,15]},
    cv=5,
    return_train_score=False
)
clf.fit(x,y)
clf.cv_results_

{'mean_fit_time': array([0.00379167, 0.00496168, 0.00337391, 0.00252409, 0.0034905 ,
        0.00372276]),
 'std_fit_time': array([3.84742248e-04, 2.35973259e-03, 2.12347713e-04, 4.69434982e-05,
        7.58997449e-05, 1.02699323e-04]),
 'mean_score_time': array([0.00097618, 0.00031028, 0.00017982, 0.00014105, 0.00014319,
        0.00018382]),
 'std_score_time': array([1.19629757e-03, 2.06148544e-04, 3.89991583e-05, 2.68303011e-06,
        6.33063062e-06, 3.85362905e-05]),
 'param_criterion': masked_array(data=['gini', 'gini', 'gini', 'entropy', 'entropy',
                    'entropy'],
              mask=[False, False, False, False, False, False],
        fill_value=np.str_('?'),
             dtype=object),
 'param_max_depth': masked_array(data=[5, 10, 15, 5, 10, 15],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'params': [{'criterion': 'gini', 'max_depth': 5},
  {'criterion': 'gini', 'max_depth': 10},
  {'criterion': 'gini', 'max_depth'

In [10]:
import pandas as pd
df = pd.DataFrame(clf.cv_results_)

In [11]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003792,0.000385,0.000976,0.001196,gini,5,"{'criterion': 'gini', 'max_depth': 5}",0.775,0.795,0.735,0.795,0.775,0.775,0.021909,6
1,0.004962,0.00236,0.00031,0.000206,gini,10,"{'criterion': 'gini', 'max_depth': 10}",0.79,0.725,0.81,0.785,0.81,0.784,0.031209,4
2,0.003374,0.000212,0.00018,3.9e-05,gini,15,"{'criterion': 'gini', 'max_depth': 15}",0.815,0.715,0.825,0.78,0.815,0.79,0.040497,3
3,0.002524,4.7e-05,0.000141,3e-06,entropy,5,"{'criterion': 'entropy', 'max_depth': 5}",0.765,0.785,0.765,0.815,0.78,0.782,0.01833,5
4,0.00349,7.6e-05,0.000143,6e-06,entropy,10,"{'criterion': 'entropy', 'max_depth': 10}",0.77,0.79,0.82,0.765,0.81,0.791,0.021541,2
5,0.003723,0.000103,0.000184,3.9e-05,entropy,15,"{'criterion': 'entropy', 'max_depth': 15}",0.78,0.81,0.85,0.8,0.85,0.818,0.027857,1


In [12]:
df[['param_criterion','param_max_depth','mean_test_score']]

Unnamed: 0,param_criterion,param_max_depth,mean_test_score
0,gini,5,0.775
1,gini,10,0.784
2,gini,15,0.79
3,entropy,5,0.782
4,entropy,10,0.791
5,entropy,15,0.818


In [13]:
clf.best_params_

{'criterion': 'entropy', 'max_depth': 15}

In [14]:
clf.best_score_

np.float64(0.818)

In [15]:
clf.best_estimator_

In [16]:
from sklearn import svm

In [18]:
model_param = {
    'DecisionTreeClassifier':{
        'model':DecisionTreeClassifier(),
        'params':{
            'criterion':['gini','entropy'],
            'max_depth':[5,10,15]
        }
    },
    'svm':{
        'model':svm.SVC(),
        'params':{
            'gamma':['scale','auto'],
            'C':[1,10,20],
            'kernel':['rbf','linear']
        }
    }
}

scores = []

for key,val in model_param.items():
    clf = GridSearchCV(val['model'],val['params'],cv=5,return_train_score=False)
    clf.fit(x_train,y_train)
    scores.append({
        'model':key,
        'best_params':clf.best_params_,
        'best_score':clf.best_score_
    })
    
pd.DataFrame(scores)

Unnamed: 0,model,best_params,best_score
0,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10}",0.8275
1,svm,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.915
