In [None]:
"""
Cross-Validation Model Comparison for Blood Pressure Estimation
Author: Jan Reifferscheidt
Project: Blood Pressure Estimation with PAT (Pulse Arrival Time)
"""

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm.notebook import tqdm
from sklearn.model_selection import LeaveOneOut
from statsmodels.formula.api import ols
import warnings
warnings.filterwarnings('ignore')

# Configuration
DATA_FILE = "Results1.txt"
FEATURE_COLS = ['PTTp', 'PTTs', 'PTTv', 'HR', 'height', 'weight', 'age', 
               'gender_female', 'gender_male', 'activity_run', 'activity_sit', 'activity_walk']
TARGET_COL = 'BP_dia'
HIDDEN_DIM = 50
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 5e-4
EPOCHS = 1000

class BloodPressureNN(nn.Module):
    """Neural Network for Blood Pressure Estimation"""
    
    def __init__(self, input_size, hidden_dim):
        super(BloodPressureNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
        
    def forward(self, x):
        return self.layers(x)

def prepare_data(df, feature_cols, target_col):
    """Prepare data for model training"""
    X = df[feature_cols].values.astype(np.float32)
    y = df[target_col].values.astype(np.float32)
    return torch.tensor(X), torch.tensor(y)

def train_neural_network(model, X, y, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, epochs=EPOCHS):
    """Train the neural network model"""
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.MSELoss()
    
    model.train()
    for epoch in tqdm(range(epochs), desc='Training NN'):
        optimizer.zero_grad()
        
        # Forward pass
        output = model(X).reshape(-1)
        loss = criterion(output, y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
    
    return model

def train_linear_model(train_data, feature_cols, target_col):
    """Train linear regression model"""
    formula = f"{target_col} ~ {' + '.join(feature_cols)}"
    return ols(formula, data=train_data).fit()

def evaluate_models(nn_model, lm_model, X_test, y_test, test_data):
    """Evaluate both models and return predictions"""
    # Neural Network predictions
    nn_model.eval()
    with torch.no_grad():
        nn_pred = nn_model(X_test).detach().numpy().flatten()
    
    # Linear Model predictions
    lm_pred = lm_model.predict(test_data).values
    
    # Calculate residuals
    y_true = y_test.numpy()
    nn_residuals = nn_pred - y_true
    lm_residuals = lm_pred - y_true
    
    return nn_residuals, lm_residuals

def main():
    """Main function to run cross-validation"""
    # Load data
    df = pd.read_csv(DATA_FILE, sep='\t')
    
    # Initialize result storage
    nn_results = []
    lm_results = []
    
    # Leave-One-Out Cross-Validation
    loo = LeaveOneOut()
    print("Starting Leave-One-Out Cross-Validation...")
    print(f"Total iterations: {len(df)}")
    
    for fold, (train_idx, test_idx) in enumerate(loo.split(df)):
        print(f"\rFold {fold + 1}/{len(df)} - Testing sample {test_idx[0]}", end="")
        
        # Split data
        train_data = df.iloc[train_idx]
        test_data = df.iloc[test_idx]
        
        # Prepare data for neural network
        X_train, y_train = prepare_data(train_data, FEATURE_COLS, TARGET_COL)
        X_test, y_test = prepare_data(test_data, FEATURE_COLS, TARGET_COL)
        
        # Train Neural Network
        nn_model = BloodPressureNN(input_size=len(FEATURE_COLS), hidden_dim=HIDDEN_DIM)
        nn_model = train_neural_network(nn_model, X_train, y_train)
        
        # Train Linear Model
        lm_model = train_linear_model(train_data, FEATURE_COLS, TARGET_COL)
        
        # Evaluate models
        nn_residuals, lm_residuals = evaluate_models(nn_model, lm_model, X_test, y_test, test_data)
        
        # Store results
        nn_results.append(nn_residuals[0])
        lm_results.append(lm_residuals[0])
    
    print("\nCross-validation completed!")
    
    # Calculate performance metrics
    nn_mae = np.mean(np.abs(nn_results))
    lm_mae = np.mean(np.abs(lm_results))
    nn_rmse = np.sqrt(np.mean(np.square(nn_results)))
    lm_rmse = np.sqrt(np.mean(np.square(lm_results)))
    
    print(f"\nPerformance Summary:")
    print(f"Neural Network - MAE: {nn_mae:.3f}, RMSE: {nn_rmse:.3f}")
    print(f"Linear Model - MAE: {lm_mae:.3f}, RMSE: {lm_rmse:.3f}")
    
    return nn_results, lm_results

# Run the analysis
if __name__ == "__main__":
    nn_results, lm_results = main()