#### Import Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:
iris = load_iris()
df=pd.DataFrame(data=iris.data,columns=iris.feature_names)
df['target']=iris.target

### Split Dataset

In [4]:
X=df.iloc[:,:4]
Y=df['target']

In [5]:
XTrain, XNone, YTrain, YNone = train_test_split(X,Y, test_size=0.3, random_state=42, shuffle=True)
XVal, XTest, YVal, YTest = train_test_split(XNone, YNone, test_size=0.5, random_state=42,shuffle=True)

In [6]:
print(f"Training size: {XTrain.shape[0]}")
print(f"Validation size: {XVal.shape[0]}")
print(f"Test size: {XTest.shape[0]}")

Training size: 105
Validation size: 22
Test size: 23


### Decision Tree

In [7]:
DecisionTreeParameters = {
    'criterion': ['entropy','gini'],
    'max_depth': [1, 3, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
DecisionTreeClassification = GridSearchCV(DecisionTreeClassifier(random_state=42), DecisionTreeParameters, cv=3)
DecisionTreeClassification.fit(XTrain, YTrain)
print("Best parameters for Decision Tree is:", DecisionTreeClassification.best_params_)
print("Training accuracy is:", DecisionTreeClassification.best_score_)

Best parameters for Decision Tree is: {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training accuracy is: 0.9333333333333332


In [8]:
DecisionTreeClassification = GridSearchCV(DecisionTreeClassifier(random_state=42), DecisionTreeParameters, cv=3)
DecisionTreeClassification.fit(XTrain, YTrain)
print("Best parameters for Decision Tree is:", DecisionTreeClassification.best_params_)
print("Training accuracy is:", DecisionTreeClassification.best_score_)

Best parameters for Decision Tree is: {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training accuracy is: 0.9333333333333332


In [10]:
print("Best parameters for Decision Tree is:", DecisionTreeClassification.best_params_)
print("Training accuracy is:", DecisionTreeClassification.best_score_)

Best parameters for Decision Tree is: {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training accuracy is: 0.9333333333333332


### Evaluate on Test Set

In [11]:
YTestPrediction = DecisionTreeClassification.predict(XTest)
print(classification_report(YTest, YTestPrediction))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      0.90      0.95        10
           2       0.88      1.00      0.93         7

    accuracy                           0.96        23
   macro avg       0.96      0.97      0.96        23
weighted avg       0.96      0.96      0.96        23



### Standardize Data for Logistic Regression

In [12]:
StandardizeData = StandardScaler()
XTrainScaled = StandardizeData.fit_transform(XTrain)
XValScaled = StandardizeData.transform(XVal)
XTestScaled = StandardizeData.transform(XTest)

### Logistic Regression

In [13]:
LogisticRegressionParameters = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],  
    'solver': ['liblinear']
}

In [14]:
LogisticRegressionClassificcation = GridSearchCV(LogisticRegression(random_state=42, max_iter=1000), LogisticRegressionParameters, cv=3)
LogisticRegressionClassificcation.fit(XTrainScaled, YTrain)

In [15]:
print("Best parameters for Logistic Regression is:", LogisticRegressionClassificcation.best_params_)
print("Training accuracy is:", LogisticRegressionClassificcation.best_score_)

Best parameters for Logistic Regression is: {'C': 10, 'penalty': 'l1', 'solver': 'liblinear'}
Training accuracy is: 0.9333333333333332


### Evaluate on Test Set

In [16]:
YTestPredictionLogisticRegression = LogisticRegressionClassificcation.predict(XTestScaled)
print(classification_report(YTest, YTestPredictionLogisticRegression))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00         7

    accuracy                           1.00        23
   macro avg       1.00      1.00      1.00        23
weighted avg       1.00      1.00      1.00        23



In [17]:
print("Decision Tree:")
YTestPredictionDecisionTree= DecisionTreeClassification.predict(XTest)
print(classification_report(YTest, YTestPredictionDecisionTree))
print("Confusion Matrix for Decision Tree:\n", confusion_matrix(YTest, YTestPredictionDecisionTree))

print("Logistic Regression:")
YTestPredictionLogisticRegression = LogisticRegressionClassificcation.predict(XTestScaled)
print(classification_report(YTest, YTestPredictionLogisticRegression))
print("Confusion Matrix for Logistic Regression:\n", confusion_matrix(YTest, YTestPredictionLogisticRegression))

Decision Tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      0.90      0.95        10
           2       0.88      1.00      0.93         7

    accuracy                           0.96        23
   macro avg       0.96      0.97      0.96        23
weighted avg       0.96      0.96      0.96        23

Confusion Matrix for Decision Tree:
 [[6 0 0]
 [0 9 1]
 [0 0 7]]
Logistic Regression:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00         7

    accuracy                           1.00        23
   macro avg       1.00      1.00      1.00        23
weighted avg       1.00      1.00      1.00        23

Confusion Matrix for Logistic Regression:
 [[ 6  0  0]
 [ 0 10  0]
 [ 0  0  7]]


### Comparison and Analysis

In [18]:
AccuracyDesicionTree = accuracy_score(YTest, YTestPredictionDecisionTree)
AccuracyLogisticRegression = accuracy_score(YTest, YTestPredictionLogisticRegression)

print(f"Accuracy of Decision Tree: {AccuracyDesicionTree:.2f}")
print(f"Accuracy of Logistic Regression: {AccuracyLogisticRegression:.2f}")

Accuracy of Decision Tree: 0.96
Accuracy of Logistic Regression: 1.00
