# Test Logistic Regression Baseline Model

This notebook tests the logistic regression baseline model for predicting insufficient pain management.

In [None]:
import sys
import os
sys.path.append('/Users/jk1/icu_research/PreHosp')

from analgesia.prediction_of_insufficient_pain_management.data_preprocessing import load_and_preprocess_data
from analgesia.prediction_of_insufficient_pain_management.logistic_regression_baseline import LogisticRegressionBaseline
import pandas as pd
import numpy as np

In [None]:
# Load and preprocess the data
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'

print("Loading and preprocessing data...")
processed_data, processor = load_and_preprocess_data(data_path)

print(f"\nProcessed data shape: {processed_data.shape}")
print(f"Target distribution: {processed_data['insufficient_pain_mgmt'].value_counts()}")

In [None]:
# Prepare data for modeling
X_train, X_test, y_train, y_test = processor.prepare_modeling_data()

print(f"Training set: {X_train.shape[0]} samples, {X_train.shape[1]} features")
print(f"Test set: {X_test.shape[0]} samples, {X_test.shape[1]} features")
print(f"Training target distribution: {y_train.value_counts().to_dict()}")
print(f"Test target distribution: {y_test.value_counts().to_dict()}")

In [None]:
# Initialize and train the baseline model
print("\nInitializing Logistic Regression Baseline...")
baseline = LogisticRegressionBaseline()

print("Training baseline model...")
baseline.fit_final_model(X_train, y_train)

print("Baseline model training complete!")

In [None]:
# Evaluate the baseline model
print("\nEvaluating baseline model...")
baseline_scores = baseline.evaluate_model(X_test, y_test)

print("\nBaseline Model Performance:")
for metric, score in baseline_scores.items():
    if isinstance(score, (int, float)):
        print(f"  {metric}: {score:.4f}")
    else:
        print(f"  {metric}: {score}")

In [None]:
# Get feature importance
print("\nTop 10 Most Important Features:")
feature_importance = baseline.get_feature_importance()
print(feature_importance.head(10))

In [None]:
# Test cross-validation
print("\nRunning cross-validation...")
cv_scores = baseline.cross_validate_model(X_train, y_train, cv_folds=5)

print("\nCross-Validation Results:")
for metric, scores in cv_scores.items():
    if hasattr(scores, 'mean'):
        print(f"  {metric}: {scores.mean():.4f} (+/- {scores.std() * 2:.4f})")
    else:
        print(f"  {metric}: {scores}")