# Q3.

### Importing libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### Importing the dataset

In [3]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.DataFrame(iris.target, columns=['target'])

In [4]:
X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
y

Unnamed: 0,target
0,0
1,0
2,0
3,0
4,0
...,...
145,2
146,2
147,2
148,2


### For different training size the result evaluations with Decision tree classifier model 

In [6]:
train_sizes = [0.6, 0.7, 0.8, 0.9]

for size in train_sizes:
    print("\n==============================")
    print(f"Training Size: {int(size*100)}%")
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=size, random_state=42)
    
    model = DecisionTreeClassifier(criterion='entropy', random_state=42)
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)
    
    print("Accuracy:", acc)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", cr)


Training Size: 60%
Accuracy: 0.9833333333333333
Confusion Matrix:
 [[23  0  0]
 [ 0 19  0]
 [ 0  1 17]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.95      1.00      0.97        19
           2       1.00      0.94      0.97        18

    accuracy                           0.98        60
   macro avg       0.98      0.98      0.98        60
weighted avg       0.98      0.98      0.98        60


Training Size: 70%
Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  1 12]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.93      1.00      0.96        13
           2       1.00      0.92      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98  

In [7]:
print("Effect of Different Parameters:")

Effect of Different Parameters:


#### max_depth = 3

In [8]:
model_depth = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)
model_depth.fit(X_train, y_train)
y_pred_depth = model_depth.predict(X_test)
acc_depth = accuracy_score(y_test, y_pred_depth)
print("\nAccuracy with max_depth=3:", acc_depth)


Accuracy with max_depth=3: 1.0


#### min_samples_split = 5

In [9]:
model_min_split = DecisionTreeClassifier(criterion='entropy', min_samples_split=5, random_state=42)
model_min_split.fit(X_train, y_train)
y_pred_min_split = model_min_split.predict(X_test)
acc_min_split = accuracy_score(y_test, y_pred_min_split)
print("\nAccuracy with min_samples_split=5:", acc_min_split)


Accuracy with min_samples_split=5: 1.0


#### max_depth=3 and min_samples_split=5 together

In [10]:
model_both = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_split=5, random_state=42)
model_both.fit(X_train, y_train)
y_pred_both = model_both.predict(X_test)
acc_both = accuracy_score(y_test, y_pred_both)
print("\nAccuracy with max_depth=3 and min_samples_split=5:", acc_both)


Accuracy with max_depth=3 and min_samples_split=5: 1.0
