In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score,KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
iris_data = load_iris()

In [7]:
features = iris_data.data
target = iris_data.target

In [12]:
pd.Series(target).value_counts()

2    50
1    50
0    50
dtype: int64

In [8]:
dt = DecisionTreeClassifier()

In [16]:
# Getting the reliable or confident accuracy which may not cause overfitting
test_score = cross_val_score(dt,features,target,cv=5,scoring='accuracy')

In [17]:
test_score

array([0.96666667, 0.96666667, 0.9       , 0.93333333, 1.        ])

In [18]:
np.mean(test_score)

0.9533333333333334

In [21]:
# Parameter Tuning
# Best Max Depth
mean_score = {}
for depth in range(2,5):
    dt1 = DecisionTreeClassifier(max_depth=depth)
    test_score = cross_val_score(dt1,features,target,cv=5,scoring='accuracy')
    mean_score[depth]= np.mean(test_score)

In [22]:
mean_score

{2: 0.9333333333333332, 3: 0.9733333333333334, 4: 0.9533333333333334}

In [23]:
# Parameter Tuning
# Best Max Depth
mean_score = {}
for depth in range(2,5):
    for min_split in range(4,10):
        dt1 = DecisionTreeClassifier(max_depth=depth,min_samples_split=min_split)
        test_score = cross_val_score(dt1,features,target,cv=5,scoring='accuracy')
        mean_score[depth,min_split]= np.mean(test_score)

In [24]:
mean_score

{(2, 4): 0.9333333333333332,
 (2, 5): 0.9333333333333332,
 (2, 6): 0.9333333333333332,
 (2, 7): 0.9333333333333332,
 (2, 8): 0.9333333333333332,
 (2, 9): 0.9333333333333332,
 (3, 4): 0.96,
 (3, 5): 0.9733333333333334,
 (3, 6): 0.9733333333333334,
 (3, 7): 0.96,
 (3, 8): 0.96,
 (3, 9): 0.9733333333333334,
 (4, 4): 0.9666666666666668,
 (4, 5): 0.9666666666666668,
 (4, 6): 0.9666666666666668,
 (4, 7): 0.9666666666666668,
 (4, 8): 0.9666666666666668,
 (4, 9): 0.9666666666666668}

In [None]:
# Cross Vaidation for model Selection

In [26]:
# Getting the reliable or confident accuracy which may not cause overfitting
test_score = cross_val_score(dt,features,target,cv=5,scoring='accuracy')
score = test_score.mean()
print(score)

0.9666666666666668


In [27]:
from sklearn.linear_model import LogisticRegression

In [28]:
logreg = LogisticRegression()

In [29]:
# Getting the reliable or confident accuracy which may not cause overfitting
test_score_log = cross_val_score(logreg,features,target,cv=5,scoring='accuracy')
score = test_score_log.mean()
print(score)

0.9600000000000002




In [31]:
iris_data.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [32]:
features

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [35]:
feature_2 = features[:,0:2]

In [37]:
# Cross Vaidation for feature selection
# Getting the reliable or confident accuracy which may not cause overfitting
test_score = cross_val_score(dt,feature_2,target,cv=5,scoring='accuracy')
score = test_score.mean()
print(score)

0.72


In [39]:
feature3 = features[:,0:3]

In [40]:
# Cross Vaidation for feature selection
# Getting the reliable or confident accuracy which may not cause overfitting
test_score = cross_val_score(dt,feature3,target,cv=5,scoring='accuracy')
score = test_score.mean()
print(score)

0.9400000000000001
