**Iris Logistic Regression**

In [28]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score

# importing the dataset
# Load the Iris dataset
df = pd.read_csv('Iris.csv')

# Display the first few rows of the dataset
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


Identify varibles and encode

In [29]:
# Independent variables (features)
X = df.iloc[:, 1:5]  # Selecting columns 1 to 4 (features)

# Dependent variable (target)
le = LabelEncoder()
y = df.iloc[:, -1]  # The 'Species' column, which is the last column
df['Target'] = le.fit_transform(y)
y = df['Target'].apply(lambda x: 0 if x == 0 else 1)  # Encode 
#'Iris-setosa' as 0, others as 1

# Display the first few rows of the modified dataset
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Target
0,1,5.1,3.5,1.4,0.2,Iris-setosa,0
1,2,4.9,3.0,1.4,0.2,Iris-setosa,0
2,3,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5,5.0,3.6,1.4,0.2,Iris-setosa,0


Split data

In [30]:
# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

# Display the size of each set
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [31]:
# Initialize and train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Confusion Matrix with labeled classes
conf_mat = confusion_matrix(y_test, y_pred)

# Create class labels for confusion matrix (0 = 'Iris-setosa', 
# 1 = 'not-Iris-setosa')
classes = ['Iris-setosa', 'not-Iris-setosa']

# Convert the confusion matrix to a DataFrame with labels
cm_df = pd.DataFrame(conf_mat, columns=classes, index=classes)

# Display the labeled confusion matrix
cm_df

Unnamed: 0,Iris-setosa,not-Iris-setosa
Iris-setosa,10,0
not-Iris-setosa,0,20


In [32]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Calculate accuracy, precision, and recall
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# Display the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

Accuracy: 1.00
Precision: 1.00
Recall: 1.00


In [33]:
tp = conf_mat[0, 0]  # True Positives
tn = conf_mat[1, 1]  # True Negatives
fp = conf_mat[1, 0]  # False Positives
fn = conf_mat[0, 1]  # False Negatives

# Based on the confusion matrix analysis, the model is likely to 
# have higher recall than precision, 
# as the number of false positives (FP) is greater than false
# negatives (FN).

print(f"True Positives: {tp}, True Negatives: {tn}, "
      f"False Positives: {fp}, False Negatives: {fn}")

True Positives: 10, True Negatives: 20, False Positives: 0, False Negatives: 0


Since the confusion matrix shows 0 False Positives and 0 False Negatives, it indicates that all predictions were correct.