## Problem :  Decision Tree Classification
---
Build a classification model based on Decision trees to classify the [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris) , plit the data to 70% training set and 30% testing set and report the training and testing accuracy.

In [11]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [18]:
def load_preprocess_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['target'] = iris.target
    X = df.drop('target', axis=1)
    y = df['target']
    return X, y

In [15]:

def train_evaluate_model(X_train, X_test, y_train, y_test):
    # Define the hyperparameter grid for GridSearchCV
    param_grid = {
        'max_depth': [None, 5, 10, 15],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    # Create a Decision Tree classifier
    model = DecisionTreeClassifier(random_state=42)

    # Perform GridSearchCV to find the best hyperparameters
    grid_search = GridSearchCV(model, param_grid, cv=5)
    grid_search.fit(X_train, y_train)

    # Get the best model from GridSearchCV
    best_model = grid_search.best_estimator_

    # Make predictions and evaluate the model
    y_train_pred = best_model.predict(X_train)
    y_test_pred = best_model.predict(X_test)

    training_accuracy = accuracy_score(y_train, y_train_pred)
    testing_accuracy = accuracy_score(y_test, y_test_pred)

    return training_accuracy, testing_accuracy


In [16]:
def main():
    # Step 1: Load and preprocess the dataset
    X, y = load_preprocess_data()

    # Step 2: Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Step 3 and 4: Train, tune hyperparameters, and evaluate the model
    training_accuracy, testing_accuracy = train_evaluate_model(X_train, X_test, y_train, y_test)

    print("Training Accuracy:", training_accuracy)
    print("Testing Accuracy:", testing_accuracy)


In [17]:
if __name__ == "__main__":
    main()

Training Accuracy: 0.9523809523809523
Testing Accuracy: 1.0
