In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedKFold,cross_val_score
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the parameter grid to search
param_grid = {
    'criterion': ['gini', 'entropy'],  # Split quality measure
    'splitter': ['best', 'random'],    # Split strategy
    'max_depth': [None, 10, 20, 30],    # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],    # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],      # Minimum samples required in each leaf
    'max_features': [None,  'sqrt', 'log2', 1, 2],  # Features to consider for splig
    'random_state': [42],  # For reproducibility
}

# Create the Decision Tree model
dt_model = DecisionTreeClassifier(class_weight="balanced")

# Initialize GridSearchCV with cross-validation (cv=5)
grid_search = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1,error_score='raise')

# Train the model using GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best parameters and the best model
print(f"Best Parameters: {grid_search.best_params_}")
best_model = grid_search.best_estimator_

# Make predictions with the best model
y_pred = best_model.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred):.2f}')
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Fitting 5 folds for each of 720 candidates, totalling 3600 fits
Best Parameters: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 10, 'random_state': 42, 'splitter': 'best'}
Accuracy: 1.00
Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [12]:
stra = StratifiedKFold(n_splits=20)
cross_val_score(best_model,X,y,cv = stra)

array([1.        , 0.875     , 0.875     , 0.875     , 1.        ,
       1.        , 0.875     , 1.        , 1.        , 1.        ,
       0.85714286, 0.85714286, 0.85714286, 1.        , 0.85714286,
       1.        , 1.        , 1.        , 1.        , 1.        ])