In [15]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV,StratifiedKFold,cross_val_score,train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Target va

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the Decision Tree model with all hyperparameters
model = DecisionTreeClassifier(
    criterion='entropy',  # Using entropy for information gain
    splitter='random',    # Using random strategy for splits
    max_depth=10,         # Limit the depth of the tree to 10
    min_samples_split=4,  # Require at least 4 samples to split a node
    min_samples_leaf=2,   # Require at least 2 samples in each leaf node
    min_weight_fraction_leaf=0.0,  # No minimum weight for leaves
    max_features='sqrt', # Use sqrt(number of features) for best split
    random_state=42,     # For reproducibility
    max_leaf_nodes=20,   # Limit to 20 leaf nodes
    min_impurity_decrease=0.01,  # Only split if impurity decrease >= 0.01
    class_weight='balanced',  # Automatically adjust weights to balance classes
    
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred):.2f}')
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.93
Confusion Matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  2 11]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.86      0.92      0.89        13
           2       0.92      0.85      0.88        13

    accuracy                           0.93        45
   macro avg       0.92      0.92      0.92        45
weighted avg       0.93      0.93      0.93        45



In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV,StratifiedKFold,cross_val_score,train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Target va

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the Decision Tree model with all hyperparameters
model = DecisionTreeClassifier(
    criterion='entropy',  # Using entropy for information gain
    splitter='random',    # Using random strategy for splits
    max_depth=10,         # Limit the depth of the tree to 10
    min_samples_split=4,  # Require at least 4 samples to split a node
    min_samples_leaf=2,   # Require at least 2 samples in each leaf node
    min_weight_fraction_leaf=0.0,  # No minimum weight for leaves
    max_features='sqrt', # Use sqrt(number of features) for best split
    random_state=42,     # For reproducibility
    max_leaf_nodes=20,   # Limit to 20 leaf nodes
    min_impurity_decrease=0.01,  # Only split if impurity decrease >= 0.01
    class_weight='balanced',  # Automatically adjust weights to balance classes
    
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred):.2f}')
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.93
Confusion Matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  2 11]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.86      0.92      0.89        13
           2       0.92      0.85      0.88        13

    accuracy                           0.93        45
   macro avg       0.92      0.92      0.92        45
weighted avg       0.93      0.93      0.93        45



In [13]:
stra = StratifiedKFold(n_splits=20)
cross_val_score(model,X,y,cv = stra)

array([1.        , 0.875     , 0.875     , 1.        , 1.        ,
       1.        , 1.        , 1.        , 0.875     , 1.        ,
       0.85714286, 1.        , 0.85714286, 1.        , 0.71428571,
       1.        , 0.85714286, 1.        , 1.        , 1.        ])