**ðŸŒ¸ Iris Flower Classification**

---


(Using Iris.csv + Plotly Figures)

This code assumes Iris.csv has columns like:
SepalLengthCm, SepalWidthCm, PetalLengthCm, PetalWidthCm, Species

In [109]:
import os

os.makedirs("figures_html", exist_ok=True)

In [110]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import plotly.express as px
import plotly.graph_objects as go

In [111]:
df = pd.read_csv("/content/Iris.csv")
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [112]:
# Drop Id column if present
if "Id" in df.columns:
    df.drop(columns=["Id"], inplace=True)

df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [113]:
X = df.drop(columns=["Species"])
y = df["Species"]

le = LabelEncoder()
y = le.fit_transform(y)

In [114]:
fig_scatter = px.scatter_matrix(
    df,
    dimensions=df.columns[:-1],
    color="Species",
    title="Iris Feature Scatter Matrix"
)

fig_scatter.show()

fig_scatter.write_html("figures_html/scatter_matrix.html")

In [115]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models = {
    "Logistic Regression": LogisticRegression(max_iter=500),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "SVM": SVC(kernel="rbf"),
    "Random Forest": RandomForestClassifier(n_estimators=200, random_state=42)
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append({"Model": name, "Accuracy": acc})

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Accuracy
0,Logistic Regression,0.933333
1,KNN,0.933333
2,SVM,0.966667
3,Random Forest,0.9


In [116]:
fig_accuracy = px.bar(
    results_df,
    x="Model",
    y="Accuracy",
    color="Model",
    title="Model Accuracy Comparison",
    text="Accuracy"
)

fig_accuracy.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig_accuracy.update_layout(yaxis_range=[0.9, 1.0])

fig_accuracy.show()

fig_accuracy.write_html("figures_html/model_accuracy.html")

In [117]:
best_model_name = results_df.sort_values("Accuracy", ascending=False).iloc[0]["Model"]
best_model = models[best_model_name]

y_pred = best_model.predict(X_test)

print("Best Model:", best_model_name)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, target_names=le.classes_))

Best Model: SVM

Classification Report:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      0.90      0.95        10
 Iris-virginica       0.91      1.00      0.95        10

       accuracy                           0.97        30
      macro avg       0.97      0.97      0.97        30
   weighted avg       0.97      0.97      0.97        30



In [118]:
fig_cm = px.imshow(
    cm,
    text_auto=True,
    color_continuous_scale="Blues",
    x=le.classes_,
    y=le.classes_,
    title=f"Confusion Matrix ({best_model_name})"
)

fig_cm.update_layout(
    xaxis_title="Predicted",
    yaxis_title="Actual"
)

fig_cm.show()

fig_cm.write_html("figures_html/confusion_matrix.html")

In [119]:
from sklearn.inspection import permutation_importance

r = permutation_importance(
    best_model,
    X_test,
    y_test,
    n_repeats=10,
    random_state=42,
    n_jobs=-1
)

feat_df = pd.DataFrame({
    "Feature": X.columns,
    "Importance": r.importances_mean
}).sort_values(by="Importance", ascending=False)

fig_perm = px.bar(
    feat_df,
    x="Importance",
    y="Feature",
    orientation="h",
    title=f"Permutation Feature Importance ({best_model_name})"
)

fig_perm.show()

fig_perm.write_html("figures_html/permutation_feature_importance.html")

## Conclusion
- Iris flower species were classified using a custom CSV dataset.
- Multiple classification models were trained and compared.
- Plotly was used for interactive visualization and evaluation.
- The best-performing model achieved very high accuracy.
