In [8]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [9]:
# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [10]:
# Step 2: Preprocess the data
# Convert to binary classification: Iris Setosa (class 0) vs. others (class 1)
y_binary = (y == 0).astype(int)

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [11]:
# Step 4: Define the logistic loss function (cost function)
def logistic_loss(y_true, y_pred):
    epsilon = 1e-15  # Small constant to prevent log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip values to avoid log(0)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# Step 5: Define the training function using gradient descent
def train_logistic_regression(X_train, y_train, learning_rate, num_iterations, regularization_param):
    m, n = X_train.shape
    W = np.zeros(n)  # Initialize weights to zeros
    b = 0  # Initialize bias to zero
    
    for iteration in range(num_iterations):
        # Compute the sigmoid function and predicted probabilities
        z = np.dot(X_train, W) + b
        y_pred = 1 / (1 + np.exp(-z))
        
        # Compute gradients for weights and bias
        dw = (1 / m) * np.dot(X_train.T, (y_pred - y_train))
        db = (1 / m) * np.sum(y_pred - y_train)
        
        # Regularization term for weights (L2 regularization)
        dw += (regularization_param / m) * W
        
        # Update weights and bias
        W -= learning_rate * dw
        b -= learning_rate * db
    
    return W, b

In [12]:
# Step 6: Train the logistic regression model
learning_rate = 0.01
num_iterations = 1000
regularization_param = 0.1
W, b = train_logistic_regression(X_train, y_train, learning_rate, num_iterations, regularization_param)

# Step 7: Define the prediction function
def predict_logistic_regression(X, W, b):
    z = np.dot(X, W) + b
    y_pred = 1 / (1 + np.exp(-z))
    return (y_pred >= 0.5).astype(int)


In [13]:
# Step 8: Predict on the test set
y_pred = predict_logistic_regression(X_test, W, b)

# Step 9: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", classification_rep)

Accuracy: 1.0
Confusion Matrix:
 [[20  0]
 [ 0 10]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

