In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [2]:
# Load dataset
df = pd.read_csv('Iris.csv')

# Preview data
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# Prepare the data for Binary Classification
# Independent variables (features)
X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]

# Dependent variable (target)
# Setosa -> 0; Versicolor & Virginica -> 1
y = df['Species'].apply(lambda x: 0 if x == 'Iris-setosa' else 1)

In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [5]:
# Train the Logistic Regression model
# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [6]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

Confusion Matrix:
[[19  0]
 [ 0 26]]



- **True Negatives (TN)** = 19 → correctly predicted `Iris-setosa`
- **False Positives (FP)** = 0 → no `Iris-setosa` flowers were misclassified
- **False Negatives (FN)** = 0 → no `not-Iris-setosa` flowers were misclassified
- **True Positives (TP)** = 26 → correctly predicted `not-Iris-setosa`

This means the model made **no classification errors**.

### Prediction Summary

- **Precision** = 1.00 (no false positives)
- **Recall** = 1.00 (no false negatives)
- **Accuracy** = 1.00

Since both precision and recall are perfect and equal, we can conclude:

> The model has **similar precision and recall**, and performs perfectly on this test set.


In [7]:
# Extract values from confusion matrix
tn, fp, fn, tp = cm.ravel()

# Accuracy
accuracy = (tp + tn) / (tp + tn + fp + fn)

# Precision: tp / (tp + fp)
precision = tp / (tp + fp) if (tp + fp) != 0 else 0

# Recall: tp / (tp + fn)
recall = tp / (tp + fn) if (tp + fn) != 0 else 0

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

Accuracy: 1.00
Precision: 1.00
Recall: 1.00


This indicates the model correctly classified all samples with no false positives or false negatives, demonstrating excellent predictive ability for this binary classification task.

### Optional task

In [8]:
# Encode the dependent variable for multi-class classification
# Encode Species as 0,1,2 for setosa, versicolor, virginica
label_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
df['Species_encoded'] = df['Species'].map(label_mapping)

X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
y = df['Species_encoded']

In [12]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [13]:
#  Fit Logistic Regression model for multi-class
model = LogisticRegression(multi_class='ovr', max_iter=200)
model.fit(X_train, y_train)



In [14]:
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

Confusion Matrix:
[[19  0  0]
 [ 0 11  2]
 [ 0  0 13]]


### Multi-class Confusion Matrix Analysis
- All **Iris-setosa** samples were correctly classified.
- A few **Iris-versicolor** samples were misclassified as **Iris-virginica**.
- All **Iris-virginica** samples were correctly classified.

This indicates the model struggles slightly more with distinguishing **Iris-versicolor**