Step 1: Import libraries and load dataset

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

# Load Dataset
df = pd.read_csv('Iris.csv')
print("Raw Species values:", df['Species'].unique())

# View the first 5 rows
df.head()

Raw Species values: ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
print(df['Species'].unique())

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


Step 2: Prepare the data

In [4]:
# Drop Id column
df = df.drop('Id', axis=1)

# Encode the target variable
df['Species'] = df['Species'].apply(lambda x: 0 if x == 'Iris-setosa' else 1)

# Independent variables (features)
x = df.drop('Species', axis=1)

# Dependent variable (target)
y = df['Species']

Step 3: Train-test-split

In [5]:
# Split into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Step 4: Fit Logistic Regression Model

In [6]:
# Initialize and fit model
model = LogisticRegression()
model.fit(x_train, y_train)

# Make Predictions
y_pred = model.predict(x_test)

Step 5: Generate Confusion Matrix

In [7]:
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[10  0]
 [ 0 20]]


# Comment:
The model correctly classfifed all Iris-setosa samples (True Negatives =10) and most not-setosa (True Positives =20),
with 0 False Negative (missed not-setosa).
This suggests high precision and recall, likely similar (balanced performance).

Step 6: Manually Calculate Accuracy, Precision, Recall

In [8]:
# Extract values
tn, fp, fn, tp = conf_matrix.ravel()

# Manual calculations
accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

Accuracy: 1.00
Precision: 1.00
Recall: 1.00


Optional Multiclass Classification (3-Class)

In [9]:
# Reload dataset from fresh start
df = pd.read_csv('Iris.csv')

if 'Id' in df.columns:
    df = df.drop('Id', axis=1)

# Standardize Species values to lowercase
df['Species'] = df['Species'].astype(str).str.strip().str.lower()

# Correct Label map
label_map = {'iris-setosa': 0,
             'iris-versicolor': 1,
             'iris-virginica': 2}

# Map to numerica labels
df['Species'] = df['Species'].map(label_map)

# Check for unmapped entries
missing = df['Species'].isna().sum()
if missing > 0:
    print(f'Error: {missing} unmapped species values')
    print(df[pd.isna(df['Species'])])
else:
    print("Species mapped successfully.")

# Model training and evaluation
x = df.drop('Species', axis=1)
y = df['Species']


# Train-test-split
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Fit model
model = LogisticRegression(multi_class='multinomial', max_iter=200)
model.fit(x_train, y_train)

# Predictions
y_pred = model.predict(x_test)

# Confusion matrix
from sklearn.metrics import confusion_matrix
print("Multiclass Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Species mapped successfully.
Multiclass Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


# Interpretation of the Confusion Matrix:
The model correctly predicted all test samples:
- All 10 Iris-setosa were classified correctly.
- All 9 Iris-versicolor were classified correctly.
- All 11 Iris-virginica were classified correctly.
There are zero false positives or false negatives.

This indicates the model achieved 100% accuracy, precision and recall on the test set.