In [72]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [41]:
df = pd.read_csv("iris.csv")

In [42]:
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [43]:
df.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [45]:
X = df.drop(["Species" ], axis = 1)
y = df["Species"]

In [46]:
y.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

# One hot encoding

In [47]:
one_hot = pd.get_dummies(df["Species"], prefix = "is")
data = pd.concat([df, one_hot], axis = 1)
data = data.drop("Species", axis = 1)

In [48]:
data

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,is_Iris-setosa,is_Iris-versicolor,is_Iris-virginica
0,1,5.1,3.5,1.4,0.2,1,0,0
1,2,4.9,3.0,1.4,0.2,1,0,0
2,3,4.7,3.2,1.3,0.2,1,0,0
3,4,4.6,3.1,1.5,0.2,1,0,0
4,5,5.0,3.6,1.4,0.2,1,0,0
...,...,...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,0,0,1
146,147,6.3,2.5,5.0,1.9,0,0,1
147,148,6.5,3.0,5.2,2.0,0,0,1
148,149,6.2,3.4,5.4,2.3,0,0,1


In [52]:
X = data.drop(["is_Iris-setosa", "is_Iris-versicolor", "is_Iris-virginica"], axis = 1)
y_versicolor = data['is_Iris-versicolor']
y_setosa = data['is_Iris-setosa']
y_virginica = data['is_Iris-virginica']


In [56]:
X_train, X_test, y_versicolor_train, y_versicolor_test = train_test_split(X, y_versicolor, test_size=0.2, random_state=42)
_, _, y_setosa_train, y_setosa_test = train_test_split(X, y_setosa, test_size=0.2, random_state=42)
_, _, y_virginica_train, y_virginica_test = train_test_split(X, y_virginica, test_size=0.2, random_state=42)


# Train Logistic Regression (Built-In)

In [66]:
model_versicolor = LogisticRegression()
model_setosa = LogisticRegression()
model_virginica = LogisticRegression()

In [68]:
model_versicolor.fit(X_train, y_versicolor_train) 
model_setosa.fit(X_train, y_setosa_train) 
model_virginica.fit(X_train, y_virginica_train) 

In [69]:
y_versicolor_pred = model_versicolor.predict(X_test)
y_setosa_pred = model_setosa.predict(X_test)
y_virginica_pred = model_virginica.predict(X_test)

In [70]:
def evaluate_model(y_true, y_pred, class_name):
    print(f"Evaluation Metrics for {class_name}:")
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.2f}")
    print(f"Precision: {precision_score(y_true, y_pred):.2f}")
    print(f"Recall: {recall_score(y_true, y_pred):.2f}")
    print(f"F1-Score: {f1_score(y_true, y_pred):.2f}")
    print("-" * 30)

    

In [73]:
evaluate_model(y_versicolor_test, y_versicolor_pred, "Versicolor")
evaluate_model(y_setosa_test, y_setosa_pred, "Setosa")
evaluate_model(y_virginica_test, y_virginica_pred, "Virginica")

Evaluation Metrics for Versicolor:
Accuracy: 0.83
Precision: 0.70
Recall: 0.78
F1-Score: 0.74
------------------------------
Evaluation Metrics for Setosa:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-Score: 1.00
------------------------------
Evaluation Metrics for Virginica:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-Score: 1.00
------------------------------


In [74]:
probs_versicolor = model_versicolor.predict_proba(X_test)[:, 1]
probs_setosa = model_setosa.predict_proba(X_test)[:, 1]
probs_virginica = model_virginica.predict_proba(X_test)[:, 1]


In [75]:
combined_probs = pd.DataFrame({
    'versicolor': probs_versicolor,
    'setosa': probs_setosa,
    'virginica': probs_virginica
}) 

In [76]:
multi_class_predictions = combined_probs.idxmax(axis=1)

print("\nMulti-class Predictions:")
print(multi_class_predictions.head())


Multi-class Predictions:
0    versicolor
1        setosa
2     virginica
3    versicolor
4    versicolor
dtype: object
