In [None]:
# 🔹 Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore')  # To ignore warnings while training
# ✅ These are the libraries needed for:
# •	Data handling (pandas, numpy)
# •	Plotting graphs (matplotlib)
# •	Machine learning (sklearn)
# •	Avoiding warnings

# 🔹 Step 2: Load and Understand the Iris Dataset
iris = load_iris()
print(iris.feature_names)   # ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
print(iris.target_names)    # ['setosa' 'versicolor' 'virginica']
# ✅ This dataset contains:
# •	4 features: sepal/petal length and width
# •	3 classes: setosa, versicolor, virginica

# 🔹 Step 3: Create Features (X) and Labels (y)
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# ✅ We are separating input features X and output labels y.

# 🔹 Step 4: Train-Test Split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
# ✅ Split data so we can train on one part and test on another.

# 🔹 Step 5: Define Hyperparameters for Pre-Pruning
params = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'splitter': ['best', 'random'],
    'max_depth': [1, 2, 3, 4, 5],
    'max_features': ['auto', 'sqrt', 'log2']
}

params
# {'criterion': ['gini', 'entropy', 'log_loss'],
#  'splitter': ['best', 'random'],
#  'max_depth': [1, 2, 3, 4, 5],
#  'max_features': ['auto', 'sqrt', 'log2']}

# ✅ These are the parameters we want to try different combinations of:
# •	criterion: How splits are chosen
# •	splitter: Strategy for choosing split
# •	max_depth: Tree depth (controls overfitting)
# •	max_features: Number of features to consider at each split

# 🔹 Step 6: Use GridSearchCV for Hyperparameter Tuning
model = DecisionTreeClassifier()
grid = GridSearchCV(estimator=model, param_grid=params, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)
# ✅ GridSearchCV:
# •	Tries all combinations of parameters
# •	Uses cross-validation (5-fold in this case)
# •	Picks the best performing combination

# 🔹 Step 7: View Best Parameters and Score
print("Best Parameters:", grid.best_params_)
print("Best Cross-Validation Accuracy:", grid.best_score_)
# ✅ Example output:
# Best Parameters: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'sqrt', 'splitter': 'random'}
# Best Cross-Validation Accuracy: 0.95

# 🔹 Step 8: Test Accuracy with Best Parameters
y_pred = grid.predict(X_test)

# Print evaluation metrics
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Test Accuracy:", accuracy_score(y_test, y_pred))
# ✅ Output example:
# Best Parameters: {'criterion': 'log_loss', 'max_depth': 3, 'max_features': 'log2', 'splitter': 'best'}
# Best Cross-Validation Accuracy: 0.95
# Confusion Matrix:
#  [[10  0  0]
#  [ 0 11  2]
#  [ 0  0  7]]
# Classification Report:
#                precision    recall  f1-score   support

#            0       1.00      1.00      1.00        10
#            1       1.00      0.85      0.92        13
#            2       0.78      1.00      0.88         7

#     accuracy                           0.93        30
#    macro avg       0.93      0.95      0.93        30
# weighted avg       0.95      0.93      0.93        30

# Test Accuracy: 0.9333333333333333








['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']
Best Parameters: {'criterion': 'log_loss', 'max_depth': 3, 'max_features': 'log2', 'splitter': 'best'}
Best Cross-Validation Accuracy: 0.95
Confusion Matrix:
 [[10  0  0]
 [ 0 11  2]
 [ 0  0  7]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.85      0.92        13
           2       0.78      1.00      0.88         7

    accuracy                           0.93        30
   macro avg       0.93      0.95      0.93        30
weighted avg       0.95      0.93      0.93        30

Test Accuracy: 0.9333333333333333
