In [None]:
# main_experiments.py
import numpy as np
import matplotlib.pyplot as plt
from models import LinearRegression, LogisticRegression, GradientDescent, StochasticGradientDescent

def test_linear_regression_analytical():
    """Test linear regression with analytical solution"""
    print("=== Testing Linear Regression (Analytical) ===")
    
    # Generate simple linear data
    np.random.seed(42)
    X = np.linspace(0, 10, 100)
    true_slope = 2.0
    true_intercept = 1.0
    y = true_slope * X + true_intercept + np.random.normal(0, 0.5, 100)
    
    # Fit model
    model = LinearRegression(add_bias=True)
    model.fit_analytical(X, y)
    
    # Predictions
    y_pred = model.predict(X)
    
    # Calculate metrics
    mse = np.mean((y - y_pred) ** 2)
    print(f"True slope: {true_slope:.3f}, Estimated: {model.weights[0]:.3f}")
    print(f"True intercept: {true_intercept:.3f}, Estimated: {model.weights[1]:.3f}")
    print(f"MSE: {mse:.4f}")
    
    # Simple plot
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.scatter(X, y, alpha=0.7, label='Data')
    plt.plot(X, y_pred, color='red', linewidth=2, label='Fit')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Linear Regression (Analytical)')
    plt.legend()
    
    return model, X, y, y_pred

def test_linear_regression_gradient_descent():
    """Test linear regression with gradient descent"""
    print("\n=== Testing Linear Regression (Gradient Descent) ===")
    
    # Generate data
    np.random.seed(42)
    X = np.linspace(0, 10, 100)
    true_slope = 2.0
    true_intercept = 1.0
    y = true_slope * X + true_intercept + np.random.normal(0, 0.5, 100)
    
    # Create optimizer
    optimizer = GradientDescent(lr=0.01, max_iters=1000, epsilon=1e-6, record_history=True)
    
    # Fit model
    model = LinearRegression(add_bias=True)
    model.fit_gradient_descent(X, y, optimizer)
    
    # Predictions
    y_pred = model.predict(X)
    
    # Calculate metrics
    mse = np.mean((y - y_pred) ** 2)
    print(f"True slope: {true_slope:.3f}, Estimated: {model.weights[0]:.3f}")
    print(f"True intercept: {true_intercept:.3f}, Estimated: {model.weights[1]:.3f}")
    print(f"MSE: {mse:.4f}")
    print(f"Number of iterations: {len(optimizer.w_history)}")
    
    # Plot convergence
    plt.subplot(1, 2, 2)
    errors = [np.mean((y - model._preprocess_X(X) @ w)**2) for w in optimizer.w_history]
    plt.plot(errors)
    plt.xlabel('Iteration')
    plt.ylabel('MSE')
    plt.title('Gradient Descent Convergence')
    plt.yscale('log')
    
    plt.tight_layout()
    plt.show()
    
    return model, X, y, y_pred

def test_logistic_regression():
    """Test logistic regression"""
    print("\n=== Testing Logistic Regression ===")
    
    # Generate binary classification data
    np.random.seed(42)
    n_samples = 200
    
    # Create two classes
    X1 = np.random.normal(2, 1, n_samples)
    X2 = np.random.normal(-2, 1, n_samples)
    X = np.concatenate([X1, X2])
    y = np.concatenate([np.ones(n_samples), np.zeros(n_samples)])
    
    # Shuffle data
    indices = np.random.permutation(2 * n_samples)
    X, y = X[indices], y[indices]
    
    # Create optimizer
    optimizer = GradientDescent(lr=0.1, max_iters=2000, epsilon=1e-6, record_history=True)
    
    # Fit model
    model = LogisticRegression(add_bias=True)
    model.fit(X, y, optimizer)
    
    # Predictions
    y_pred = model.predict(X)
    y_proba = model.predict_proba(X)
    
    # Calculate accuracy
    accuracy = np.mean(y_pred == y)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Final weights: {model.weights}")
    
    # Plot results
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 3, 1)
    plt.scatter(X[y==0], y[y==0], alpha=0.7, label='Class 0')
    plt.scatter(X[y==1], y[y==1], alpha=0.7, label='Class 1')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Original Data')
    plt.legend()
    
    plt.subplot(1, 3, 2)
    plt.scatter(X, y_proba, alpha=0.7, c=y, cmap='coolwarm')
    plt.xlabel('X')
    plt.ylabel('P(y=1)')
    plt.title('Predicted Probabilities')
    
    plt.subplot(1, 3, 3)
    losses = []
    for w in optimizer.w_history:
        yh = model._sigmoid(model._preprocess_X(X) @ w)
        loss = -np.mean(y * np.log(yh + 1e-8) + (1-y) * np.log(1-yh + 1e-8))
        losses.append(loss)
    plt.plot(losses)
    plt.xlabel('Iteration')
    plt.ylabel('Log Loss')
    plt.title('Training Loss')
    plt.yscale('log')
    
    plt.tight_layout()
    plt.show()
    
    return model, X, y, y_pred

def test_stochastic_gradient_descent():
    """Test with stochastic gradient descent"""
    print("\n=== Testing Stochastic Gradient Descent ===")
    
    # Generate data
    np.random.seed(42)
    X = np.linspace(0, 10, 1000)  # Larger dataset
    true_slope = 2.0
    true_intercept = 1.0
    y = true_slope * X + true_intercept + np.random.normal(0, 0.5, 1000)
    
    # Create optimizer
    optimizer = StochasticGradientDescent(lr=0.01, max_iters=500, batch_size=32, record_history=True)
    
    # Fit model
    model = LinearRegression(add_bias=True)
    model.fit_gradient_descent(X, y, optimizer)
    
    # Predictions
    y_pred = model.predict(X)
    
    # Calculate metrics
    mse = np.mean((y - y_pred) ** 2)
    print(f"True slope: {true_slope:.3f}, Estimated: {model.weights[0]:.3f}")
    print(f"True intercept: {true_intercept:.3f}, Estimated: {model.weights[1]:.3f}")
    print(f"MSE: {mse:.4f}")
    
    # Plot convergence
    plt.figure(figsize=(10, 4))
    
    plt.subplot(1, 2, 1)
    errors = [np.mean((y - model._preprocess_X(X) @ w)**2) for w in optimizer.w_history]
    plt.plot(errors)
    plt.xlabel('Iteration')
    plt.ylabel('MSE')
    plt.title('SGD Convergence')
    plt.yscale('log')
    
    plt.subplot(1, 2, 2)
    plt.scatter(X[::10], y[::10], alpha=0.7, label='Data')
    plt.plot(X, y_pred, color='red', linewidth=2, label='SGD Fit')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Linear Regression with SGD')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    return model, X, y, y_pred

if __name__ == "__main__":
    # Run all tests
    print("Running comprehensive tests...")
    
    # Test 1: Linear Regression Analytical
    model1, X1, y1, y_pred1 = test_linear_regression_analytical()
    
    # Test 2: Linear Regression Gradient Descent  
    model2, X2, y2, y_pred2 = test_linear_regression_gradient_descent()
    
    # Test 3: Logistic Regression
    model3, X3, y3, y_pred3 = test_logistic_regression()
    
    # Test 4: Stochastic Gradient Descent
    model4, X4, y4, y_pred4 = test_stochastic_gradient_descent()
    
    print("\n=== All tests completed ===")