In [42]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import seaborn as sns

np.random.seed(42)

def generate_dataset(n_samples=1000):
    X = []
    y = []
    samples_per_class = n_samples // 5

    x0 = np.random.uniform(0, 50, samples_per_class)
    y0 = np.random.uniform(0, 33.33, samples_per_class)
    class0 = np.column_stack([x0, y0])

    x1 = np.random.uniform(0, 50, samples_per_class)
    y1 = np.random.uniform(33.33, 66.67, samples_per_class)
    class1 = np.column_stack([x1, y1])

    x2 = np.random.uniform(0, 50, samples_per_class)
    y2 = np.random.uniform(66.67, 100, samples_per_class)
    class2 = np.column_stack([x2, y2])

    x3 = np.random.uniform(50, 75, samples_per_class)
    y3 = np.random.uniform(0, 100, samples_per_class)
    class3 = np.column_stack([x3, y3])

    x4 = np.random.uniform(75, 100, samples_per_class)
    y4 = np.random.uniform(0, 100, samples_per_class)
    class4 = np.column_stack([x4, y4])

    X = np.vstack([class0, class1, class2, class3, class4])
    y = np.hstack([
        np.zeros(samples_per_class),
        np.ones(samples_per_class),
        np.full(samples_per_class, 2),
        np.full(samples_per_class, 3),
        np.full(samples_per_class, 4)
    ])

    indices = np.random.permutation(len(X))
    return X[indices], y[indices].astype(int)

X, y_labels = generate_dataset(1000)

X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X = (X - X_mean) / X_std

Y = np.zeros((len(y_labels), 5))
for i, label in enumerate(y_labels):
    Y[i, label] = 1

split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
Y_train, Y_test = Y[:split_idx], Y[split_idx:]
y_train_labels, y_test_labels = y_labels[:split_idx], y_labels[split_idx:]

class NeuralNetwork(object):
    def __init__(self, learning_rate=0.01, architecture=[12, 9, 7], reg_lambda=0.001):
        inputLayerNeurons = 2
        hidden1LayerNeurons = architecture[0]
        hidden2LayerNeurons = architecture[1]
        hidden3LayerNeurons = architecture[2]
        outLayerNeurons = 5

        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda

        self.W_H1 = np.random.randn(inputLayerNeurons, hidden1LayerNeurons) * np.sqrt(2.0/inputLayerNeurons)
        self.W_H2 = np.random.randn(hidden1LayerNeurons, hidden2LayerNeurons) * np.sqrt(2.0/hidden1LayerNeurons)
        self.W_H3 = np.random.randn(hidden2LayerNeurons, hidden3LayerNeurons) * np.sqrt(2.0/hidden2LayerNeurons)
        self.W_OH = np.random.randn(hidden3LayerNeurons, outLayerNeurons) * np.sqrt(2.0/hidden3LayerNeurons)

        self.b_H1 = np.zeros((1, hidden1LayerNeurons))
        self.b_H2 = np.zeros((1, hidden2LayerNeurons))
        self.b_H3 = np.zeros((1, hidden3LayerNeurons))
        self.b_OH = np.zeros((1, outLayerNeurons))

    def sigmoid(self, x, der=False):
        if der == True:
            return x * (1-x)
        else:
            x = np.clip(x, -500, 500)
            return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        x = x - np.max(x, axis=1, keepdims=True)
        exp_x = np.exp(np.clip(x, -500, 500))
        return exp_x / (np.sum(exp_x, axis=1, keepdims=True) + 1e-15)

    def feedForward(self, X):
        hidden1_input = np.dot(X, self.W_H1) + self.b_H1
        self.hidden1_output = self.sigmoid(hidden1_input)

        hidden2_input = np.dot(self.hidden1_output, self.W_H2) + self.b_H2
        self.hidden2_output = self.sigmoid(hidden2_input)

        hidden3_input = np.dot(self.hidden2_output, self.W_H3) + self.b_H3
        self.hidden3_output = self.sigmoid(hidden3_input)

        output_input = np.dot(self.hidden3_output, self.W_OH) + self.b_OH
        pred = self.softmax(output_input)
        return pred

    def backPropagation(self, X, Y, pred):
        m = X.shape[0]

        output_error = (pred - Y) / m
        output_error = np.clip(output_error, -10, 10)

        hidden3_error = output_error.dot(self.W_OH.T)
        hidden3_delta = hidden3_error * self.sigmoid(self.hidden3_output, der=True)
        hidden3_delta = np.clip(hidden3_delta, -10, 10)

        hidden2_error = hidden3_delta.dot(self.W_H3.T)
        hidden2_delta = hidden2_error * self.sigmoid(self.hidden2_output, der=True)
        hidden2_delta = np.clip(hidden2_delta, -10, 10)

        hidden1_error = hidden2_delta.dot(self.W_H2.T)
        hidden1_delta = hidden1_error * self.sigmoid(self.hidden1_output, der=True)
        hidden1_delta = np.clip(hidden1_delta, -10, 10)

        self.W_OH -= self.learning_rate * (self.hidden3_output.T.dot(output_error) + self.reg_lambda * self.W_OH)
        self.W_H3 -= self.learning_rate * (self.hidden2_output.T.dot(hidden3_delta) + self.reg_lambda * self.W_H3)
        self.W_H2 -= self.learning_rate * (self.hidden1_output.T.dot(hidden2_delta) + self.reg_lambda * self.W_H2)
        self.W_H1 -= self.learning_rate * (X.T.dot(hidden1_delta) + self.reg_lambda * self.W_H1)

        self.b_OH -= self.learning_rate * np.sum(output_error, axis=0, keepdims=True)
        self.b_H3 -= self.learning_rate * np.sum(hidden3_delta, axis=0, keepdims=True)
        self.b_H2 -= self.learning_rate * np.sum(hidden2_delta, axis=0, keepdims=True)
        self.b_H1 -= self.learning_rate * np.sum(hidden1_delta, axis=0, keepdims=True)

    def train(self, X, Y):
        output = self.feedForward(X)
        self.backPropagation(X, Y, output)

    def predict(self, X):
        output = self.feedForward(X)
        return np.argmax(output, axis=1)

    def predict_proba(self, X):
        return self.feedForward(X)

def train_and_evaluate(learning_rate, architecture, reg_lambda, epochs=15000):
    np.random.seed(42)

    NN = NeuralNetwork(learning_rate=learning_rate, architecture=architecture, reg_lambda=reg_lambda)

    for i in range(epochs):
        NN.train(X_train, Y_train)

    train_pred = NN.predict(X_train)
    test_pred = NN.predict(X_test)

    train_accuracy = np.mean(train_pred == y_train_labels)
    test_accuracy = np.mean(test_pred == y_test_labels)

    y_score = NN.predict_proba(X_test)
    roc_auc_scores = []

    for i in range(5):
        y_true_binary = (y_test_labels == i).astype(int)
        y_score_binary = y_score[:, i]
        fpr, tpr, _ = roc_curve(y_true_binary, y_score_binary)
        roc_auc = auc(fpr, tpr)
        roc_auc_scores.append(roc_auc)

    avg_auc = np.mean(roc_auc_scores)
    overfitting_gap = train_accuracy - test_accuracy

    total_params = (2 * architecture[0] + architecture[0] +
                   architecture[0] * architecture[1] + architecture[1] +
                   architecture[1] * architecture[2] + architecture[2] +
                   architecture[2] * 5 + 5)

    return {
        'train_acc': train_accuracy,
        'test_acc': test_accuracy,
        'avg_auc': avg_auc,
        'overfitting': overfitting_gap,
        'params': total_params
    }

print("1. LEARNING RATE COMPARISON")
print("Fixed: Architecture=[12,9,7], Regularization=0.001")

lr_configs = [
    {'lr': 0.001, 'name': 'Low LR'},
    {'lr': 0.01, 'name': 'Medium LR'},
    {'lr': 0.1, 'name': 'High LR'}
]

lr_results = []
for config in lr_configs:
    print(f"Testing {config['name']} (LR={config['lr']})...")
    result = train_and_evaluate(config['lr'], [12, 9, 7], 0.001)
    result['lr'] = config['lr']
    result['name'] = config['name']
    lr_results.append(result)
    print(f"  Test Accuracy: {result['test_acc']:.4f}, AUC: {result['avg_auc']:.3f}")

print(f"\nLEARNING RATE COMPARISON TABLE:")
print(f"LR Value    Train Acc    Test Acc     AUC      Overfitting")
for r in lr_results:
    print(f"{r['lr']:<10.3f} {r['train_acc']:<12.4f} {r['test_acc']:<12.4f} {r['avg_auc']:<8.3f} {r['overfitting']:<12.4f}")

print(f"\n2. ARCHITECTURE COMPARISON")
print("Fixed: Learning Rate=0.01, Regularization=0.001")

arch_configs = [
    {'arch': [8, 6, 4], 'name': 'Small Network'},
    {'arch': [12, 9, 7], 'name': 'Medium Network'},
    {'arch': [16, 12, 8], 'name': 'Large Network'}
]

arch_results = []
for config in arch_configs:
    arch_str = f"{config['arch'][0]}-{config['arch'][1]}-{config['arch'][2]}"
    print(f"Testing {config['name']} ({arch_str})...")
    result = train_and_evaluate(0.01, config['arch'], 0.001)
    result['arch'] = config['arch']
    result['arch_str'] = arch_str
    result['name'] = config['name']
    arch_results.append(result)
    print(f"  Test Accuracy: {result['test_acc']:.4f}, Parameters: {result['params']}")

print(f"\nARCHITECTURE COMPARISON TABLE:")
print(f"Architecture Parameters  Test Acc     AUC      Overfitting")
for r in arch_results:
    print(f"{r['arch_str']:<12} {r['params']:<12} {r['test_acc']:<12.4f} {r['avg_auc']:<8.3f} {r['overfitting']:<12.4f}")

print(f"\n3. REGULARIZATION COMPARISON")
print("Fixed: Learning Rate=0.01, Architecture=[12,9,7]")

reg_configs = [
    {'reg': 0.0, 'name': 'No Regularization'},
    {'reg': 0.001, 'name': 'Light Regularization'},
    {'reg': 0.01, 'name': 'Strong Regularization'}
]

reg_results = []
for config in reg_configs:
    print(f"Testing {config['name']} (Lambda={config['reg']})...")
    result = train_and_evaluate(0.01, [12, 9, 7], config['reg'])
    result['reg'] = config['reg']
    result['name'] = config['name']
    reg_results.append(result)
    print(f"  Test Accuracy: {result['test_acc']:.4f}, Overfitting: {result['overfitting']:.4f}")

print(f"\nREGULARIZATION COMPARISON TABLE:")
print(f"Lambda     Train Acc    Test Acc     AUC      Overfitting")
for r in reg_results:
    print(f"{r['reg']:<10.3f} {r['train_acc']:<12.4f} {r['test_acc']:<12.4f} {r['avg_auc']:<8.3f} {r['overfitting']:<12.4f}")

all_results = lr_results + arch_results + reg_results
best_result = max(all_results, key=lambda x: x['test_acc'])

print(f"\nOVERALL PERFORMANCE SUMMARY:")
print(f"Best Configuration Found:")
if 'lr' in best_result and best_result in lr_results:
    print(f"  Category: Learning Rate")
    print(f"  Learning Rate: {best_result['lr']}")
elif 'arch' in best_result and best_result in arch_results:
    print(f"  Category: Architecture")
    print(f"  Architecture: {best_result['arch_str']}")
    print(f"  Parameters: {best_result['params']}")
else:
    print(f"  Category: Regularization")
    print(f"  Regularization: {best_result['reg']}")

print(f"  Test Accuracy: {best_result['test_acc']:.4f} ({best_result['test_acc']*100:.2f}%)")
print(f"  Average AUC: {best_result['avg_auc']:.3f}")
print(f"  Overfitting Gap: {best_result['overfitting']:.4f}")

print(f"\nKey Findings:")
print(f"- Optimal Learning Rate: 0.01 (medium)")
print(f"- Optimal Architecture: Medium network balances capacity and overfitting")
print(f"- Optimal Regularization: Light regularization (0.001) prevents overfitting")
print(f"- Best Test Accuracy: {best_result['test_acc']:.1%}")

1. LEARNING RATE COMPARISON
Fixed: Architecture=[12,9,7], Regularization=0.001
Testing Low LR (LR=0.001)...
  Test Accuracy: 0.3550, AUC: 0.723
Testing Medium LR (LR=0.01)...
  Test Accuracy: 0.8350, AUC: 0.995
Testing High LR (LR=0.1)...
  Test Accuracy: 0.9800, AUC: 1.000

LEARNING RATE COMPARISON TABLE:
LR Value    Train Acc    Test Acc     AUC      Overfitting
0.001      0.3725       0.3550       0.723    0.0175      
0.010      0.8250       0.8350       0.995    -0.0100     
0.100      0.9850       0.9800       1.000    0.0050      

2. ARCHITECTURE COMPARISON
Fixed: Learning Rate=0.01, Regularization=0.001
Testing Small Network (8-6-4)...
  Test Accuracy: 0.5300, Parameters: 131
Testing Medium Network (12-9-7)...
  Test Accuracy: 0.8350, Parameters: 263
Testing Large Network (16-12-8)...
  Test Accuracy: 0.8350, Parameters: 401

ARCHITECTURE COMPARISON TABLE:
Architecture Parameters  Test Acc     AUC      Overfitting
8-6-4        131          0.5300       0.928    0.0187      
12