In [None]:
import numpy as np
import matplotlib.pyplot as plt
from models.logistic_regression import LogisticRegression


def create_features_for_poly(X, degree):
    """
    Expands input features to the specified polynomial degree.

    Parameters:
    X (numpy.ndarray): Input matrix of shape (n_samples, n_features).
    degree (int): Degree of the polynomial features.

    Returns:
    numpy.ndarray: Expanded features of shape (n_samples, n_poly_features).
    """
    if degree < 1:
        raise ValueError("Degree must be at least 1.")
    
    n_samples, n_features = X.shape
    features_poly = X

    for d in range(2, degree + 1):
        features_poly = np.concatenate([features_poly, X**d], axis=1)

    return features_poly

def mean_logloss(y_real, y_pred):
    """
    Computes the mean log loss for a binary classification problem.

    Parameters:
    y_real (numpy.ndarray): True labels, shape (n_samples,).
    y_pred (numpy.ndarray): Predicted probabilities, shape (n_samples,).

    Returns:
    float: Mean log loss.
    """
    epsilon = 1e-15  # To avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y_real * np.log(y_pred) + (1 - y_real) * np.log(1 - y_pred))

def classif_error(y_real, y_pred):
    """
    Computes classification error percentage.

    Parameters:
    y_real (numpy.ndarray): True labels, shape (n_samples,).
    y_pred (numpy.ndarray): Predicted labels, shape (n_samples,).

    Returns:
    float: Classification error percentage.
    """
    incorrect = np.sum(y_real != y_pred)
    return (incorrect / len(y_real)) * 100


def plot_decision_boundary(X, y, theta, degree):
    """
    Plots decision boundary for a logistic regression model with polynomial features.

    Parameters:
    X (numpy.ndarray): Input matrix of shape (n_samples, n_features).
    y (numpy.ndarray): True labels, shape (n_samples,).
    theta (numpy.ndarray): Optimized parameters.
    degree (int): Degree of polynomial features.
    """
    # Generate a mesh grid
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))
    
    # Create polynomial features for the grid
    grid = np.c_[xx.ravel(), yy.ravel()]
    grid_poly = create_features_for_poly(grid, degree)
    grid_poly = np.concatenate((grid_poly, np.ones((grid_poly.shape[0], 1))), axis=1)  # Add bias

    # Predict probabilities
    probs = LogisticRegression(grid_poly, theta).reshape(xx.shape)

    # Plot
    plt.contourf(xx, yy, probs, levels=[0, 0.5, 1], cmap="coolwarm", alpha=0.6)
    plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='red', label="Class 1")
    plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='blue', label="Class 0")
    plt.title("Decision Boundary")
    plt.legend()
    plt.show()

def shuffle_data(X, y):
    """
    Randomly shuffles the dataset.

    Parameters:
    X (numpy.ndarray): Features matrix, shape (n_samples, n_features).
    y (numpy.ndarray): Labels vector, shape (n_samples,).

    Returns:
    tuple: Shuffled X and y.
    """
    permutation = np.random.permutation(len(y))
    return X[permutation], y[permutation]


In [None]:
###############################################
# Main Orchestration Script for 4 Task
# Author: AbdoAllah Mohammad
# Description: Combines all modules to complete tasks from pages 7-10 of the B1 project.
###############################################

# Import dependencies
import numpy as np
from data.create_data import create_data
from models.gradient_descent import GradientDescent
from main_project_skeleton import create_features_for_poly, evaluate_loss, classif_error, plot_data, plot_decision_boundary
from models.logistic_regression import train_logistic_regression, predict_logistic_regression
from utils.utilities import (
    evaluate_loss,
    evaluate_classification_error,
    plot_data,
)

# Hyperparameters
learning_rate = 0.001
num_iterations = 1000
degree_poly = 2  # Polynomial degree for feature expansion
n_samples_train = 400
n_samples_val = 4000

# ---- Main Workflow ----
if __name__ == "__main__":
    print("Starting main pipeline...")

    # Step 1: Generate Data
    print("Generating training and validation data...")
    X_train, class_labels_train = create_data(n_samples_train)
    y_train = (class_labels_train == 1) * 0 + (class_labels_train == 2) * 1

    X_val, class_labels_val = create_data(n_samples_val)
    y_val = (class_labels_val == 1) * 0 + (class_labels_val == 2) * 1

    # Step 2: Polynomial Feature Expansion
    print(f"Expanding features to degree {degree_poly}...")
    X_train_poly = create_features_for_poly(X_train, degree_poly)
    X_train_poly = np.concatenate((X_train_poly, np.ones((n_samples_train, 1))), axis=1)

    X_val_poly = create_features_for_poly(X_val, degree_poly)
    X_val_poly = np.concatenate((X_val_poly, np.ones((n_samples_val, 1))), axis=1)

    # Step 3: Train Logistic Regression Model
    print("Training logistic regression model using gradient descent...")
    theta_opt = train_logistic_regression(
        X_train_poly, y_train, learning_rate, num_iterations
    )

    # Step 4: Evaluate the Model
    print("Evaluating model performance...")
    y_pred_train = LogisticRegression(X_train_poly, theta_opt)
    y_pred_val = LogisticRegression(X_val_poly, theta_opt)

    loss_train = evaluate_loss(X_train_poly, y_train, theta_opt)
    loss_val = evaluate_loss(X_val_poly, y_val, theta_opt)

    error_train = evaluate_classification_error(y_train, y_pred_train)
    error_val = evaluate_classification_error(y_val, y_pred_val)

    print(f"Training Loss: {loss_train:.4f}, Validation Loss: {loss_val:.4f}")
    print(f"Training Error: {error_train:.2f}%, Validation Error: {error_val:.2f}%")

    # Step 5: Plot Data and Decision Boundary
    print("Plotting data and decision boundaries...")
    plot_data(X_train, class_labels_train)
    plot_decision_boundary(X_train, class_labels_train, theta_opt, degree_poly)

    plot_data(X_val, class_labels_val)
    plot_decision_boundary(X_val, class_labels_val, theta_opt, degree_poly)

    print("Pipeline complete!")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from models.logistic_regression import LogisticRegression

def create_features_for_poly(X, degree):
    """
    Expands input features to the specified polynomial degree.

    Parameters:
    X (numpy.ndarray): Input matrix of shape (n_samples, n_features).
    degree (int): Degree of the polynomial features.

    Returns:
    numpy.ndarray: Expanded features of shape (n_samples, n_poly_features).
    """
    if degree < 1:
        raise ValueError("Degree must be at least 1.")
    
    n_samples, n_features = X.shape
    features_poly = X

    for d in range(2, degree + 1):
        features_poly = np.concatenate([features_poly, X**d], axis=1)

    return features_poly

def mean_logloss(y_real, y_pred):
    """
    Computes the mean log loss for a binary classification problem.

    Parameters:
    y_real (numpy.ndarray): True labels, shape (n_samples,).
    y_pred (numpy.ndarray): Predicted probabilities, shape (n_samples,).

    Returns:
    float: Mean log loss.
    """
    epsilon = 1e-15  # To avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y_real * np.log(y_pred) + (1 - y_real) * np.log(1 - y_pred))

def classif_error(y_real, y_pred):
    """
    Computes classification error percentage.

    Parameters:
    y_real (numpy.ndarray): True labels, shape (n_samples,).
    y_pred (numpy.ndarray): Predicted labels, shape (n_samples,).

    Returns:
    float: Classification error percentage.
    """
    incorrect = np.sum(y_real != y_pred)
    return (incorrect / len(y_real)) * 100


def plot_decision_boundary(X, y, theta, degree):
    """
    Plots decision boundary for a logistic regression model with polynomial features.

    Parameters:
    X (numpy.ndarray): Input matrix of shape (n_samples, n_features).
    y (numpy.ndarray): True labels, shape (n_samples,).
    theta (numpy.ndarray): Optimized parameters.
    degree (int): Degree of polynomial features.
    """
    # Generate a mesh grid
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))
    
    # Create polynomial features for the grid
    grid = np.c_[xx.ravel(), yy.ravel()]
    grid_poly = create_features_for_poly(grid, degree)
    grid_poly = np.concatenate((grid_poly, np.ones((grid_poly.shape[0], 1))), axis=1)  # Add bias

    # Predict probabilities
    probs = LogisticRegression(grid_poly, theta).reshape(xx.shape)

    # Plot
    plt.contourf(xx, yy, probs, levels=[0, 0.5, 1], cmap="coolwarm", alpha=0.6)
    plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='red', label="Class 1")
    plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='blue', label="Class 0")
    plt.title("Decision Boundary")
    plt.legend()
    plt.show()

def shuffle_data(X, y):
    """
    Randomly shuffles the dataset.

    Parameters:
    X (numpy.ndarray): Features matrix, shape (n_samples, n_features).
    y (numpy.ndarray): Labels vector, shape (n_samples,).

    Returns:
    tuple: Shuffled X and y.
    """
    permutation = np.random.permutation(len(y))
    return X[permutation], y[permutation]

