In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from utils import *
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# Load the Iris dataset
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_data = pd.read_csv('iris/iris.data', header=None, names=column_names)

In [None]:
print(iris_data.head())

In [None]:
print(iris_data['species'].value_counts())

In [None]:
X = iris_data.drop('species', axis=1).values.T
y = pd.get_dummies(iris_data['species']).values.T

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X.T, y.T, test_size=0.2, random_state=42)
X_train, X_test = X_train.T, X_test.T
y_train, y_test = y_train.T, y_test.T

y_train.shape

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.T).T
X_test_scaled = scaler.transform(X_test.T).T

In [None]:
params = logistic_regression(X_train_scaled, y_train, num_iterations=3000, learning_rate=0.1, print_cost=True)

In [None]:
# Make predictions on test set
y_pred = predict(X_test_scaled, params)
y_true = np.argmax(y_test, axis=0)
# Calculate and print accuracy
accuracy = np.mean(y_pred == y_true)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# Visualize decision boundaries
def plot_decision_boundary(X, y, params):
    x1 = np.linspace(X[0].min()-1, X[0].max()+1, 100)
    x2 = np.linspace(X[1].min()-1, X[1].max()+1, 100)
    xx1, xx2 = np.meshgrid(x1, x2)
    X_grid = np.array([xx1.ravel(), xx2.ravel(), np.mean(X[2])*np.ones_like(xx1.ravel()), np.mean(X[3])*np.ones_like(xx1.ravel())])
    
    Z = predict(X_grid, params)
    Z = Z.reshape(xx1.shape)
    
    plt.figure(figsize=(12, 8))
    plt.contourf(xx1, xx2, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
    scatter = plt.scatter(X[0], X[1], c=y, cmap=plt.cm.RdYlBu, edgecolor='black')
    plt.xlabel('Sepal Length')
    plt.ylabel('Sepal Width')
    plt.title('Decision Boundaries of Logistic Regression on Iris Dataset')
    plt.legend(*scatter.legend_elements(), title="Classes")
    plt.show()

plot_decision_boundary(X_train_scaled, np.argmax(y_train, axis=0), params)