# Neural Network on Residuals

## Objective
Train a Neural Network on the residuals from Linear Regression to capture non-linear patterns.

## Strategy
- Input: Original 8 features (6 numeric + 2 one-hot Sex)
- Target: Residuals from Linear Regression (alpha=0.1)
- Architecture: MLP with (64, 32) hidden layers
- Regularization: alpha=0.001, early stopping
- Goal: Beat direct MLP baseline (0.202906)
- Success criteria: CV < 0.202906

This is the second step in the residual modeling pipeline.

In [None]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_log_error
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

SEED = 42
np.random.seed(SEED)

print("Loading data and residuals...")
# Load training data
train_df = pd.read_csv('/home/code/data/train.csv')
test_df = pd.read_csv('/home/code/data/test.csv')

# Load residuals from Linear Regression
residuals_df = pd.read_csv('/home/code/experiments/005_linear_regression/residuals_lr.csv')
residuals = residuals_df['residual'].values

print(f"Train: {train_df.shape}, Test: {test_df.shape}")
print(f"Residuals: {residuals.shape}, range: [{residuals.min():.2f}, {residuals.max():.2f}]")

## Feature Engineering

Use same features as Linear Regression:
- Original numerical features (6 features)
- Sex: one-hot encoded (2 features)
- Total: 8 features

In [None]:
def create_features(df):
    """Create minimal features (same as Linear Regression)"""
    df_new = df.copy()
    
    # Original numerical features
    num_features = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
    
    # One-hot encode Sex
    sex_encoded = pd.get_dummies(df_new['Sex'], prefix='Sex')
    df_new = pd.concat([df_new, sex_encoded], axis=1)
    
    feature_cols = num_features + list(sex_encoded.columns)
    
    return df_new, feature_cols

# Create features
train_feat, feature_cols = create_features(train_df)
test_feat, _ = create_features(test_df)

print(f"Feature columns ({len(feature_cols)}): {feature_cols}")

# Prepare data
X = train_feat[feature_cols]
y_residuals = residuals  # Target is residuals from Linear Regression
X_test = test_feat[feature_cols]

print(f"X shape: {X.shape}, y_residuals shape: {y_residuals.shape}, X_test shape: {X_test.shape}")

## Cross-Validation Setup

Use 5-fold CV with seed 42 (SAME splits as Linear Regression to prevent leakage)

In [None]:
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=SEED)

# Initialize arrays
oof_residual_predictions = np.zeros(len(train_df))
test_residual_predictions = np.zeros(len(test_df))

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

print(f"Features standardized. X_scaled shape: {X_scaled.shape}")

## Load Linear Regression OOF Predictions

Need these to calculate final predictions and evaluate properly

In [None]:
# Load OOF predictions from Linear Regression
lr_oof_df = pd.read_csv('/home/code/experiments/005_linear_regression/oof_005_linear_regression.csv')
lr_oof_predictions = lr_oof_df['oof_prediction'].values

print(f"Loaded Linear Regression OOF predictions: {lr_oof_predictions.shape}")
print(f"LR OOF range: [{lr_oof_predictions.min():.2f}, {lr_oof_predictions.max():.2f}]")

## Train Neural Network on Residuals

MLP architecture: (64, 32) hidden layers with early stopping

In [None]:
fold_scores = []

print("Training Neural Network on residuals...")
print("=" * 60)

for fold, (train_idx, val_idx) in enumerate(kf.split(X_scaled), 1):
    print(f"\nFold {fold}/{n_folds}")
    
    # Split data
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y_residuals[train_idx], y_residuals[val_idx]
    
    # Train model
    model = MLPRegressor(
        hidden_layer_sizes=(64, 32),
        alpha=0.001,
        random_state=SEED,
        max_iter=500,
        early_stopping=True,
        validation_fraction=0.2,
        n_iter_no_change=20,
        verbose=False
    )
    model.fit(X_train, y_train)
    
    # Predict residuals
    pred_residuals_val = model.predict(X_val)
    pred_residuals_test = model.predict(X_test_scaled)
    
    # Calculate final predictions (Linear + Residual)
    final_pred_val = lr_oof_predictions[val_idx] + pred_residuals_val
    
    # Clip predictions
    final_pred_val = np.clip(final_pred_val, train_df['Calories'].min(), train_df['Calories'].max())
    
    # Calculate RMSLE on final predictions
    rmsle = np.sqrt(mean_squared_log_error(train_df['Calories'].iloc[val_idx], final_pred_val))
    fold_scores.append(rmsle)
    
    # Store OOF residual predictions
    oof_residual_predictions[val_idx] = pred_residuals_val
    test_residual_predictions += pred_residuals_test / n_folds
    
    print(f"  Fold {fold} RMSLE: {rmsle:.6f}")
    print(f"  Residual prediction range: [{pred_residuals_val.min():.2f}, {pred_residuals_val.max():.2f}]")

# Calculate overall CV score
cv_score = np.mean(fold_scores)
cv_std = np.std(fold_scores)

print("\n" + "=" * 60)
print("NEURAL NETWORK ON RESIDUALS RESULTS")
print("=" * 60)
print(f"CV RMSLE: {cv_score:.6f} ± {cv_std:.6f}")
print(f"Individual folds: {fold_scores}")
print(f"OOF residual predictions range: [{oof_residual_predictions.min():.2f}, {oof_residual_predictions.max():.2f}]")

# Compare to direct MLP baseline
direct_mlp_score = 0.202906
print(f"\nDirect MLP baseline: {direct_mlp_score:.6f}")
if cv_score < direct_mlp_score:
    print(f"✓ IMPROVEMENT: {- (cv_score - direct_mlp_score):.6f}")
else:
    print(f"✗ WORSE: +{cv_score - direct_mlp_score:.6f}")

## Analyze Residual Predictions

Check if we're capturing meaningful patterns

In [None]:
# Calculate final OOF predictions (Linear + NN on residuals)
final_oof_predictions = lr_oof_predictions + oof_residual_predictions

# Clip final predictions
final_oof_predictions = np.clip(final_oof_predictions, train_df['Calories'].min(), train_df['Calories'].max())

print("\n" + "=" * 60)
print("FINAL PREDICTIONS ANALYSIS")
print("=" * 60)
print(f"Final OOF predictions range: [{final_oof_predictions.min():.2f}, {final_oof_predictions.max():.2f}]")

# Calculate residuals after NN
residuals_after_nn = train_df['Calories'].values - final_oof_predictions
print(f"Residuals after NN - mean: {residuals_after_nn.mean():.6f}, std: {residuals_after_nn.std():.6f}")
print(f"Variance explained by Linear+NN: {(1 - residuals_after_nn.std()/train_df['Calories'].std())*100:.2f}%")

# Save residuals for next step (XGBoost on residuals)
residuals_nn_df = pd.DataFrame({
    'id': train_df['id'],
    'residual': residuals_after_nn
})
residuals_nn_df.to_csv('/home/code/experiments/006_neural_network_residuals/residuals_after_nn.csv', index=False)
print(f"\nResiduals after NN saved for next step")

## Create Submission

Combine Linear Regression predictions with NN residual predictions

In [None]:
# Load Linear Regression test predictions
lr_test_df = pd.read_csv('/home/submission/submission_005_linear_regression.csv')
lr_test_predictions = lr_test_df['Calories'].values

# Calculate final test predictions
final_test_predictions = lr_test_predictions + test_residual_predictions

# Clip predictions
final_test_predictions = np.clip(final_test_predictions, train_df['Calories'].min(), train_df['Calories'].max())

# Create submission
submission = pd.DataFrame({
    'id': test_df['id'],
    'Calories': final_test_predictions
})

submission_path = '/home/submission/submission_006_neural_network_residuals.csv'
submission.to_csv(submission_path, index=False)

print(f"\nSubmission saved: {submission_path}")
print(f"Submission predictions range: [{submission['Calories'].min():.2f}, {submission['Calories'].max():.2f}]")

# Save OOF predictions
oof_df = pd.DataFrame({
    'id': train_df['id'],
    'oof_prediction': final_oof_predictions,
    'residual_prediction': oof_residual_predictions
})
oof_path = '/home/code/experiments/006_neural_network_residuals/oof_006_neural_network_residuals.csv'
oof_df.to_csv(oof_path, index=False)

print(f"OOF predictions saved: {oof_path}")

## Summary

This Neural Network on residuals:
- Captures non-linear patterns in Linear Regression residuals
- Combines with Linear predictions for final output
- Achieves CV score that should beat direct MLP baseline
- Generates residuals for the next step (XGBoost on residuals)
- Completes the second stage of the residual modeling pipeline