This notebook uses Iris Dataset to demonstrate logistic regression with single feature

An interesting article on Iris dataset:
https://academic.oup.com/jrssig/article/18/6/26/7038520?login=false

- Iris dataset has four features: 

    - sepal length (cm)  
    - sepal width (cm)  
    - petal length (cm)  
    - petal width (cm) 

- It has three classes:
    - Class 0: Iris-setosa
    - Class 1: Iris-versicolor
    - Class 2: Iris-virginica

We construct a new dataset with single feature and two classes

- Feature    
    - petal width (cm)

- Classes
    - Class 1: Iris-virginica 
    - Class 0: Others (Iris-versicolor and Iris-setosa)    



In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn import datasets
import seaborn as sns
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
#from sklearn.preprocessing import StandardScaler
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
np.random.seed(42)

In [None]:
from IPython.display import Image, display
# Specify the file name of the image
image_file = 'iris.jpg'
# Use the Image class to load and display the image
print('Iris virginica, versicolor, and setosa')
display(Image(filename=image_file))

# Loading  the Iris dataset

In [None]:
import pandas as pd
# Load the Iris dataset
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target

# Exploring the Iris dataset

In [None]:
print(iris_df.head())
print(f"Feature names in iris datset: {iris.feature_names}")
for i, name in enumerate(iris.target_names):
    print(f"Label {i}: {name}")

# Buildinhg a single feature (petal width) and binaty class dataset with Iris-verginica as class 1 and others as class 0

In [None]:
x_1 = iris["data"][:, 3:]  # Petal Width 
y_virginica = (iris["target"] == 2).astype(int)  # 1 if Iris-Virginica, else 0'

# Add bias term (intercept term) to the feature matrix
X = np.concatenate((np.ones((x_1.shape[0], 1)), x_1), axis = 1)


plt.figure(figsize=(10, 6))
plt.scatter(x_1[y_virginica == 0], np.zeros_like(x_1[y_virginica == 0]), s=50, c="b", marker="s", label="Not Iris-Virginica")
plt.scatter(x_1[y_virginica == 1], np.zeros_like(x_1[y_virginica == 1]), s=50, c="g", marker="^", label="Iris-Virginica")
plt.xlabel("Standardized Petal Width")
plt.ylabel("Class")
plt.title("Standardized Data: Petal Width vs. Class")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Functions for binary classification using logistic regression
def logistic_function(x):
    return 1 / (1 + np.exp(-x))

def binary_cross_entropy_loss(y_true, y_pred):
    m = len(y_true)
    return -(1 / m )* np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [None]:
# Mini-Batch Gradient Descent function
def mini_batch_gradient_descent(X, y, learning_rate=0.01, n_iterations=1000, batch_size=10):
    m, n = X.shape
    w = np.random.randn(n, 1)  # Random initialization
    print(w)
    loss_history = []
    frames = []

    for iteration in range(n_iterations):
        shuffled_indices = np.random.permutation(m)
        X_shuffled = X[shuffled_indices]
        y_shuffled = y[shuffled_indices].reshape(-1, 1)
        
        for i in range(0, m, batch_size):
            xi = X_shuffled[i:i+batch_size]
            yi = y_shuffled[i:i+batch_size]
            y_pred = logistic_function(xi.dot(w))
            gradients = (1 / batch_size) * xi.T.dot(y_pred - yi)
            w = w - learning_rate * gradients
            
        y_pred_full = logistic_function(X.dot(w))
        loss = binary_cross_entropy_loss(y.reshape(-1, 1), y_pred_full)
        loss_history.append(loss)
        frames.append((w.copy(), loss_history.copy()))
    return w, loss_history, frames

# Train the model using mini-batch gradient descent
learning_rate = 0.1
n_iterations = 300  
batch_size = int(X.shape[0])        #int(X.shape[0]) - for full-batch GD   #int(X.shape[0]/10) with 10 batches   #1 for SGD
w, loss_history, frames = mini_batch_gradient_descent(X, y_virginica, learning_rate, n_iterations, batch_size)
print(w)

print(loss_history)
# Plot the convergence of the loss function
plt.figure(figsize=(12, 6))
plt.plot(range(len(loss_history)), loss_history, label='Training Loss', color='blue')
plt.xlabel('Iterations')
plt.ylabel('Cross-Entropy Loss')
plt.title('Convergence of Mini-Batch Gradient Descent')
plt.legend()
plt.grid(True)
plt.show()
print()
print(f'Final loss:{loss_history[-1]}')
print(f'Final weights:{w.ravel()}')


In [None]:
# Increase the animation embed limit
import matplotlib
matplotlib.rcParams['animation.embed_limit'] = 300  # Increase limit to 100 MB
# Plotting setup
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))
# Create error surface
w0_vals = np.linspace(-4, 4, 100)
w1_vals = np.linspace(-4, 4, 100)
W0, W1 = np.meshgrid(w0_vals, w1_vals)
loss_surface = np.zeros(W0.shape)
for i in range(W0.shape[0]):
    for j in range(W0.shape[1]):
        w = np.array([[W0[i, j]], [W1[i, j]]])
        y_pred = logistic_function(X.dot(w))
        loss_surface[i, j] = binary_cross_entropy_loss(y_virginica.reshape(-1, 1), y_pred)

# Animation function
def update(frame_idx):
    w, loss_history = frames[frame_idx]

    # Decision boundary plot
    ax1.clear()
    ax1.scatter(x_1[y_virginica == 0], np.zeros_like(x_1[y_virginica == 0]), s=50, c="b", marker="s", label="Not Iris-Virginica")
    ax1.scatter(x_1[y_virginica == 1], np.zeros_like(x_1[y_virginica == 1]), s=50, c="g", marker="^", label="Iris-Virginica")
    threshold = -(w[0] / w[1]) 
    ax1.axvline(x=threshold, color='r', linestyle='--', label='Decision Boundary')
    ax1.set_xlabel("Petal Width (cm)")
    ax1.set_title(r"Decision Boundary = $-\frac{w_0}{w_1}$")
    ax1.legend()
    ax1.grid(True)

    # Loss convergence plot
    ax2.clear()
    ax2.plot(range(len(loss_history)), loss_history, label='Training Loss', color='blue')
    ax2.set_xlabel('Iterations')
    ax2.set_ylabel('Cross-Entropy Loss')
    ax2.set_title('Convergence of Mini-Batch Gradient Descent')
    ax2.legend()
    ax2.grid(True)

    # Gradient descent path plot with contours
    ax3.clear()
    ax3.contour(W0, W1, loss_surface, levels=30, cmap='viridis')
    ax3.plot([f[0][0, 0] for f in frames[:frame_idx]], [f[0][1, 0] for f in frames[:frame_idx]], 'ro-', markersize=2)
    ax3.set_xlabel('w0')
    ax3.set_ylabel('w1')
    ax3.set_title('Gradient Descent Path with Contours')
    ax3.grid(True)

# Create animation
anim = FuncAnimation(
    fig,
    update,
    frames=len(frames),
    repeat=False,
    blit=False,
    cache_frame_data=False
)

# Display the animation in the notebook
HTML(anim.to_jshtml())