In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:
df = pd.read_csv("iris.csv")

In [9]:
df.drop("Id", axis = 1, inplace = True)

In [10]:
df.describe()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [12]:
X = df.drop(["Species" ], axis = 1)
y = df["Species"]

In [13]:
y.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

# One hot encoding

In [14]:
one_hot = pd.get_dummies(df["Species"], prefix = "is")
data = pd.concat([df, one_hot], axis = 1)
data = data.drop("Species", axis = 1)

In [15]:
data

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,is_Iris-setosa,is_Iris-versicolor,is_Iris-virginica
0,5.1,3.5,1.4,0.2,1,0,0
1,4.9,3.0,1.4,0.2,1,0,0
2,4.7,3.2,1.3,0.2,1,0,0
3,4.6,3.1,1.5,0.2,1,0,0
4,5.0,3.6,1.4,0.2,1,0,0
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,0,0,1
146,6.3,2.5,5.0,1.9,0,0,1
147,6.5,3.0,5.2,2.0,0,0,1
148,6.2,3.4,5.4,2.3,0,0,1


In [None]:
X = data.drop(["is_Iris-setosa", "is_Iris-versicolor", "is_Iris-virginica"], axis = 1)
y_versicolor = data['is_Iris-versicolor']
y_setosa = data['is_Iris-setosa']
y_virginica = data['is_Iris-virginica']


In [None]:
X_train, X_test, y_versicolor_train, y_versicolor_test = train_test_split(X, y_versicolor, test_size=0.2, random_state=42)
_, _, y_setosa_train, y_setosa_test = train_test_split(X, y_setosa, test_size=0.2, random_state=42)
_, _, y_virginica_train, y_virginica_test = train_test_split(X, y_virginica, test_size=0.2, random_state=42)


# Train Logistic Regression (Built-In)

In [None]:
model_versicolor = LogisticRegression()
model_setosa = LogisticRegression()
model_virginica = LogisticRegression()

In [None]:
model_versicolor.fit(X_train, y_versicolor_train) 
model_setosa.fit(X_train, y_setosa_train) 
model_virginica.fit(X_train, y_virginica_train) 

In [None]:
y_versicolor_pred = model_versicolor.predict(X_test)
y_setosa_pred = model_setosa.predict(X_test)
y_virginica_pred = model_virginica.predict(X_test)

In [None]:
def evaluate_model(y_true, y_pred, class_name):
    print(f"Evaluation Metrics for {class_name}:")
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.2f}")
    print(f"Precision: {precision_score(y_true, y_pred):.2f}")
    print(f"Recall: {recall_score(y_true, y_pred):.2f}")
    print(f"F1-Score: {f1_score(y_true, y_pred):.2f}")
    print("-" * 30)

    

In [None]:
evaluate_model(y_versicolor_test, y_versicolor_pred, "Versicolor")
evaluate_model(y_setosa_test, y_setosa_pred, "Setosa")
evaluate_model(y_virginica_test, y_virginica_pred, "Virginica")

In [None]:
probs_versicolor = model_versicolor.predict_proba(X_test)[:, 1]
probs_setosa = model_setosa.predict_proba(X_test)[:, 1]
probs_virginica = model_virginica.predict_proba(X_test)[:, 1]


In [None]:
combined_probs = pd.DataFrame({
    'versicolor': probs_versicolor,
    'setosa': probs_setosa,
    'virginica': probs_virginica
}) 

In [None]:
multi_class_predictions = combined_probs.idxmax(axis=1)

print("\nMulti-class Predictions:")
print(multi_class_predictions.head())

# Manual Built of Logistic Model

In [None]:
X_train["bias"] = 1


In [None]:
theta = np.zeros(X_train.shape[1])

In [None]:
theta

In [None]:
X_train