In [3]:
# Decision Tree Regressor - Correctness Tests

import numpy as np
import sys
sys.path.append('..')
from src.decision_tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeRegressor as SklearnTree

np.random.seed(42)
n = 500

# TEST 1: Numerical features only
# Target = 2*x1 + noise, tree should split on x1
print("Test 1: Numerical features")
X_num = np.random.randn(n, 3)
y_num = 2 * X_num[:, 0] + np.random.randn(n) * 0.3

tree_num = DecisionTreeRegressor(max_depth=4, min_samples_leaf=10)
tree_num.fit(X_num, y_num, feature_names=['x1', 'x2', 'x3'])
y_pred_num = tree_num.predict(X_num)
r2_num = 1 - np.sum((y_num - y_pred_num)**2) / np.sum((y_num - np.mean(y_num))**2)

print(f"  R2: {r2_num:.4f}, Splits: {tree_num.get_split_summary()['by_feature']}")
assert r2_num > 0.85 and 'x1' in tree_num.get_split_summary()['by_feature']


# TEST 2: Categorical features only
# Target depends on category: Low=1, Medium=5, High=10
print("Test 2: Categorical features")
categories = np.random.choice(['Low', 'Medium', 'High'], size=n)
y_cat = np.array([{'Low': 1.0, 'Medium': 5.0, 'High': 10.0}[c] for c in categories])
y_cat += np.random.randn(n) * 0.5
X_cat = categories.reshape(-1, 1)

tree_cat = DecisionTreeRegressor(max_depth=3, min_samples_leaf=10)
tree_cat.fit(X_cat, y_cat, feature_names=['category'], categorical_features=['category'])
y_pred_cat = tree_cat.predict(X_cat)
r2_cat = 1 - np.sum((y_cat - y_pred_cat)**2) / np.sum((y_cat - np.mean(y_cat))**2)

print(f"  R2: {r2_cat:.4f}, Categorical splits: {tree_cat.get_split_summary()['categorical_splits']}")
assert r2_cat > 0.9 and tree_cat.get_split_summary()['categorical_splits'] > 0


# TEST 3: Mixed numerical + categorical features
# Target = category_effect + 0.5*numerical + noise
print("Test 3: Mixed features")
num_feat = np.random.randn(n)
cat_feat = np.random.choice(['A', 'B', 'C'], size=n)
y_mixed = np.array([{'A': 0.0, 'B': 3.0, 'C': 6.0}[c] for c in cat_feat])
y_mixed += 0.5 * num_feat + np.random.randn(n) * 0.3
X_mixed = np.column_stack([num_feat, cat_feat])

tree_mixed = DecisionTreeRegressor(max_depth=5, min_samples_leaf=10)
tree_mixed.fit(X_mixed, y_mixed, feature_names=['numerical', 'category'], categorical_features=['category'])
y_pred_mixed = tree_mixed.predict(X_mixed)
r2_mixed = 1 - np.sum((y_mixed - y_pred_mixed)**2) / np.sum((y_mixed - np.mean(y_mixed))**2)
summary = tree_mixed.get_split_summary()

print(f"  R2: {r2_mixed:.4f}, Num splits: {summary['numerical_splits']}, Cat splits: {summary['categorical_splits']}")
assert r2_mixed > 0.9 and summary['numerical_splits'] > 0 and summary['categorical_splits'] > 0


# TEST 4: Comparison with sklearn
print("Test 4: Comparison with sklearn")
tree_sklearn = SklearnTree(max_depth=4, min_samples_leaf=10, random_state=42)
tree_sklearn.fit(X_num, y_num)
y_pred_sklearn = tree_sklearn.predict(X_num)
r2_sklearn = 1 - np.sum((y_num - y_pred_sklearn)**2) / np.sum((y_num - np.mean(y_num))**2)

print(f"  Scratch R2: {r2_num:.4f}, sklearn R2: {r2_sklearn:.4f}, Diff: {abs(r2_num - r2_sklearn):.4f}")
assert abs(r2_num - r2_sklearn) < 0.05


# TEST 5: Predictions equal number of leaves
print("Test 5: Prediction count equals leaf count")
n_unique = len(np.unique(y_pred_num))
n_leaves = tree_num.get_n_leaves()
print(f"  Unique predictions: {n_unique}, Leaves: {n_leaves}")
assert n_unique == n_leaves





Test 1: Numerical features
Building tree with 500 samples and 3 features...
Categorical features: []
Tree built successfully!
  R2: 0.9722, Splits: {'x1': 15}
Test 2: Categorical features
Building tree with 500 samples and 1 features...
Categorical features: ['category']
Tree built successfully!
  R2: 0.9828, Categorical splits: 2
Test 3: Mixed features
Building tree with 500 samples and 2 features...
Categorical features: ['category']
Tree built successfully!
  R2: 0.9883, Num splits: 22, Cat splits: 2
Test 4: Comparison with sklearn
  Scratch R2: 0.9722, sklearn R2: 0.9722, Diff: 0.0000
Test 5: Prediction count equals leaf count
  Unique predictions: 16, Leaves: 16


In [1]:
# Neural Network - Correctness Tests

import numpy as np
import sys
import warnings
import io
from contextlib import redirect_stdout
sys.path.append('..')
from src.neural_network import NeuralNetwork, DenseLayer, mse_loss, mse_loss_derivative
from sklearn.neural_network import MLPRegressor

# Suppress sklearn convergence warnings
warnings.filterwarnings('ignore', category=UserWarning)

np.random.seed(42)
n = 500

# Helper to suppress NeuralNetwork verbose output
def silent_nn(*args, **kwargs):
    with redirect_stdout(io.StringIO()):
        nn = NeuralNetwork(*args, **kwargs)
    return nn

def silent_fit(nn, X, y):
    with redirect_stdout(io.StringIO()):
        nn.fit(X, y, verbose=0)
    return nn

# TEST 1: Forward pass produces valid output shape
print("Test 1: Forward pass output shape")
X_test1 = np.random.randn(100, 5)
nn1 = silent_nn(layer_sizes=[5, 10, 1], learning_rate=0.01, epochs=1, random_seed=42)
silent_fit(nn1, X_test1, np.random.randn(100))
y_pred1 = nn1.predict(X_test1)
print(f"  Input shape: {X_test1.shape}, Output shape: {y_pred1.shape}")
assert y_pred1.shape == (100,), f"Expected (100,), got {y_pred1.shape}"

# TEST 2: Loss decreases during training
print("Test 2: Loss decreases during training")
X_train2 = np.random.randn(200, 4)
y_train2 = 2 * X_train2[:, 0] + 0.5 * X_train2[:, 1] + np.random.randn(200) * 0.1
nn2 = silent_nn(layer_sizes=[4, 16, 8, 1], learning_rate=0.01, epochs=50, random_seed=42)
silent_fit(nn2, X_train2, y_train2)
initial_loss = nn2.loss_history[0]
final_loss = nn2.loss_history[-1]
print(f"  Initial loss: {initial_loss:.4f}, Final loss: {final_loss:.4f}")
assert final_loss < initial_loss, "Loss should decrease during training"

# TEST 3: Network learns simple linear relationship
print("Test 3: Network learns linear relationship")
X_linear = np.random.randn(n, 2)
y_linear = 3 * X_linear[:, 0] - 2 * X_linear[:, 1]
nn3 = silent_nn(layer_sizes=[2, 32, 16, 1], learning_rate=0.01, epochs=200, random_seed=42)
silent_fit(nn3, X_linear, y_linear)
y_pred3 = nn3.predict(X_linear)
r2_linear = 1 - np.sum((y_linear - y_pred3)**2) / np.sum((y_linear - np.mean(y_linear))**2)
print(f"  R2 on linear function: {r2_linear:.4f}")
assert r2_linear > 0.95, f"Expected R2 > 0.95, got {r2_linear:.4f}"

# TEST 4: Network learns nonlinear relationship
print("Test 4: Network learns nonlinear relationship")
X_nonlin = np.random.randn(n, 2)
y_nonlin = np.sin(X_nonlin[:, 0]) + np.cos(X_nonlin[:, 1])
nn4 = silent_nn(layer_sizes=[2, 64, 32, 1], learning_rate=0.01, epochs=300, random_seed=42)
silent_fit(nn4, X_nonlin, y_nonlin)
y_pred4 = nn4.predict(X_nonlin)
r2_nonlin = 1 - np.sum((y_nonlin - y_pred4)**2) / np.sum((y_nonlin - np.mean(y_nonlin))**2)
print(f"  R2 on sin+cos function: {r2_nonlin:.4f}")
assert r2_nonlin > 0.85, f"Expected R2 > 0.85, got {r2_nonlin:.4f}"

# TEST 5: Comparison with sklearn MLPRegressor
print("Test 5: Comparison with sklearn")
X_comp = np.random.randn(n, 3)
y_comp = X_comp[:, 0]**2 + X_comp[:, 1] + np.random.randn(n) * 0.1

nn_scratch = silent_nn(layer_sizes=[3, 32, 16, 1], learning_rate=0.01, epochs=100, random_seed=42)
silent_fit(nn_scratch, X_comp, y_comp)
y_pred_scratch = nn_scratch.predict(X_comp)
r2_scratch = 1 - np.sum((y_comp - y_pred_scratch)**2) / np.sum((y_comp - np.mean(y_comp))**2)

sklearn_nn = MLPRegressor(hidden_layer_sizes=(32, 16), activation='relu', solver='sgd',
                          learning_rate_init=0.01, max_iter=100, random_state=42)
sklearn_nn.fit(X_comp, y_comp)
y_pred_sklearn = sklearn_nn.predict(X_comp)
r2_sklearn = 1 - np.sum((y_comp - y_pred_sklearn)**2) / np.sum((y_comp - np.mean(y_comp))**2)

print(f"  Scratch R2: {r2_scratch:.4f}, sklearn R2: {r2_sklearn:.4f}")
assert abs(r2_scratch - r2_sklearn) < 0.2, "Performance should be comparable to sklearn"

# TEST 6: MSE loss and derivative correctness
print("Test 6: MSE loss and derivative")
y_true = np.array([[1.0], [2.0], [3.0]])
y_pred = np.array([[1.1], [2.2], [2.8]])
loss = mse_loss(y_true, y_pred)
expected_loss = np.mean((y_true - y_pred)**2)
print(f"  Computed loss: {loss:.6f}, Expected: {expected_loss:.6f}")
assert np.isclose(loss, expected_loss), "MSE loss incorrect"
grad = mse_loss_derivative(y_true, y_pred)
expected_grad = 2.0 * (y_pred - y_true)
assert np.allclose(grad, expected_grad), "MSE derivative incorrect"

# TEST 7: Different activations work
print("Test 7: Sigmoid activation")
nn_sig = silent_nn(layer_sizes=[3, 16, 1], learning_rate=0.1, epochs=100, 
                   hidden_activation='sigmoid', random_seed=42)
silent_fit(nn_sig, X_comp, y_comp)
y_pred_sig = nn_sig.predict(X_comp)
r2_sig = 1 - np.sum((y_comp - y_pred_sig)**2) / np.sum((y_comp - np.mean(y_comp))**2)
print(f"  R2 with sigmoid: {r2_sig:.4f}")
assert r2_sig > 0.3, "Sigmoid network should learn something"




Test 1: Forward pass output shape
  Input shape: (100, 5), Output shape: (100,)
Test 2: Loss decreases during training
  Initial loss: 4.6462, Final loss: 0.0471
Test 3: Network learns linear relationship
  R2 on linear function: 1.0000
Test 4: Network learns nonlinear relationship
  R2 on sin+cos function: 0.9886
Test 5: Comparison with sklearn
  Scratch R2: 0.9825, sklearn R2: 0.9908
Test 6: MSE loss and derivative
  Computed loss: 0.030000, Expected: 0.030000
Test 7: Sigmoid activation
  R2 with sigmoid: 0.9882
