In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Labels

# Convert to DataFrame for better visualization (optional)
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = y
print(df.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   species  
0        0  
1        0  
2        0  
3        0  
4        0  


In [3]:
print(df.head(500))

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                  5.1               3.5                1.4               0.2   
1                  4.9               3.0                1.4               0.2   
2                  4.7               3.2                1.3               0.2   
3                  4.6               3.1                1.5               0.2   
4                  5.0               3.6                1.4               0.2   
..                 ...               ...                ...               ...   
145                6.7               3.0                5.2               2.3   
146                6.3               2.5                5.0               1.9   
147                6.5               3.0                5.2               2.0   
148                6.2               3.4                5.4               2.3   
149                5.9               3.0                5.1               1.8   

     species  
0          0

In [4]:
df.to_csv('iris_data.csv', index=False)

In [5]:

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Initialize the Random Forest Classifier
# rf_classifier = RandomForestClassifier(n_estimators=3, random_state=42)

In [6]:

# # Train the model
# rf_classifier.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred = rf_classifier.predict(X_test)

In [7]:

# Initialize the models
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)  # Random Forest
dt_classifier = DecisionTreeClassifier(random_state=42)  # Single Decision Tree

# Train the models
rf_classifier.fit(X_train, y_train)
dt_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_classifier.predict(X_test)
y_pred_dt = dt_classifier.predict(X_test)



In [8]:
# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy:.2f}")

# # Classification report
# print("\nClassification Report:")
# print(classification_report(y_test, y_pred, target_names=iris.target_names))

# # Confusion matrix
# print("\nConfusion Matrix:")
# print(confusion_matrix(y_test, y_pred))

# # Feature importance
# print("\nFeature Importances:")
# for feature, importance in zip(iris.feature_names, rf_classifier.feature_importances_):
#     print(f"{feature}: {importance:.4f}")

In [9]:
# Evaluate Random Forest
print("Random Forest Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf, target_names=iris.target_names))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

# Evaluate Decision Tree
print("\nDecision Tree Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_dt):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_dt, target_names=iris.target_names))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_dt))

# Feature importance comparison
print("\nRandom Forest Feature Importances:")
for feature, importance in zip(iris.feature_names, rf_classifier.feature_importances_):
    print(f"{feature}: {importance:.4f}")

print("\nDecision Tree Feature Importances:")
for feature, importance in zip(iris.feature_names, dt_classifier.feature_importances_):
    print(f"{feature}: {importance:.4f}")

Random Forest Results:
Accuracy: 0.98

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        23
  versicolor       0.95      1.00      0.97        19
   virginica       1.00      0.94      0.97        18

    accuracy                           0.98        60
   macro avg       0.98      0.98      0.98        60
weighted avg       0.98      0.98      0.98        60


Confusion Matrix:
[[23  0  0]
 [ 0 19  0]
 [ 0  1 17]]

Decision Tree Results:
Accuracy: 0.97

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        23
  versicolor       0.95      0.95      0.95        19
   virginica       0.94      0.94      0.94        18

    accuracy                           0.97        60
   macro avg       0.96      0.96      0.96        60
weighted avg       0.97      0.97      0.97        60


Confusion Matrix:
[[23  0  0]
 [ 0 18  1]
 [ 0  1 17]]

