In [None]:
import pandas as pd
import os
from scipy.stats import pearsonr
from statsmodels.miscmodels.ordinal_model import OrderedModel
import seaborn as sns
import matplotlib.pyplot as plt
from utils import create_registry_case_identification_column, create_ehr_case_identification_column, patient_selection
from utils import load_data_from_main_dir
from lab_preprocessing import preprocess_labs
from outcome_preprocessing import preprocess_outcomes


In [None]:
eds_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20221117/eds_j1.csv'
ehr_data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20221117/'
registry_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/stroke_research/geneva_stroke_unit_dataset/data/stroke_registry/post_hoc_modified/stroke_registry_post_hoc_modified.xlsx'

In [None]:
eds_df = pd.read_csv(eds_path, delimiter=';', encoding='utf-8',
                         dtype=str)
registry_df = pd.read_excel(registry_path, dtype=str)


In [None]:
registry_df['case_admission_id'] = create_registry_case_identification_column(registry_df)
eds_df['case_admission_id'] = create_ehr_case_identification_column(eds_df)

In [None]:
eds_df.head()

In [None]:
inclusion_registry_df, excluded_patients_df = patient_selection(
    registry_path=registry_path,
    eds_path=eds_path,
    exclude_patients_under_18=True,
    exclude_non_ischemic_stroke=True,
    exclude_non_acute_stroke=True,
    verbose=True
)

In [None]:
eds_df['case_admission_id'] = eds_df[eds_df['case_admission_id'].isin(inclusion_registry_df['case_admission_id'])]['case_admission_id']
print(f'Number of patients in EDS after selection: {eds_df.patient_id.nunique()}')

In [None]:
registry_df.case_admission_id.nunique()

In [None]:
lab_file_start = 'labo'
lab_df = load_data_from_main_dir(ehr_data_path, lab_file_start)

In [None]:
lab_df['case_admission_id'] = create_ehr_case_identification_column(lab_df)

In [None]:
preprocessed_lactate_df = preprocess_labs(lab_df, ["lactate"])

In [None]:
preprocessed_lactate_df = preprocessed_lactate_df[preprocessed_lactate_df['case_admission_id'].isin(inclusion_registry_df['case_admission_id'])]

In [None]:
preprocessed_lactate_df.head()

In [None]:
preprocessed_lactate_df.unit_of_measure.unique()

In [None]:
preprocessed_lactate_df.value.hist(bins=100)

In [None]:
inclusion_registry_df.head()

In [None]:
inclusion_registry_df['stroke_dt'].isna().sum(), inclusion_registry_df['arrival_dt'].isna().sum()

In [None]:
inclusion_registry_df['T0'] = inclusion_registry_df['stroke_dt'].fillna(inclusion_registry_df['arrival_dt'])
preprocessed_lactate_df = preprocessed_lactate_df.merge(
    inclusion_registry_df[['case_admission_id', 'T0']],
    on='case_admission_id',
    how='left'
)

In [None]:
dt_format = '%d.%m.%Y %H:%M'
preprocessed_lactate_df['relative_sample_date'] = (pd.to_datetime(preprocessed_lactate_df['sample_date'], format=dt_format) - pd.to_datetime(preprocessed_lactate_df['T0'], format=dt_format)).dt.total_seconds() / 3600 # convert to hours


In [None]:
preprocessed_lactate_df['sample_date'].values

In [None]:
import numpy as np
preprocessed_lactate_df['relative_sample_date_hcat'] = preprocessed_lactate_df['relative_sample_date'].apply(np.floor)

In [None]:
preprocessed_lactate_df[['T0', 'sample_date', 'relative_sample_date']]

In [None]:

# sns.set(style="whitegrid")
# plt.figure(figsize=(10, 6))
# ax = sns.lineplot(x='relative_sample_date_hcat', y='value', data=preprocessed_lactate_df)

# ax.set_xlim(-24, 7*24)

In [None]:
preprocessed_lactate_df.case_admission_id.nunique()

In [None]:
n_patients_with_lactate_in_first_24h = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > -12) & (preprocessed_lactate_df.relative_sample_date < 24)].case_admission_id.nunique()
n_patients_with_lactate_in_24_to_72h = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 24) & (preprocessed_lactate_df.relative_sample_date < 3*24)].case_admission_id.nunique()

print(f'Number of patients with lactate in first 24h: {n_patients_with_lactate_in_first_24h}')
print(f'Number of patients with lactate in 24 to 72h: {n_patients_with_lactate_in_24_to_72h}')

In [None]:
outcome_df = preprocess_outcomes(registry_path)
outcome_df = outcome_df[outcome_df.case_admission_id.isin(inclusion_registry_df.case_admission_id.unique())]
outcome_df.drop_duplicates(subset='case_admission_id', keep='first', inplace=True)

In [None]:
preprocessed_lactate_df = preprocessed_lactate_df.merge(
    outcome_df[['case_admission_id', '3M mRS']],
    on='case_admission_id',
    how='left'
)

In [None]:
preprocessed_lactate_df.head()

In [None]:
early_lactate_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > -12) & (preprocessed_lactate_df.relative_sample_date < 24)]
lactate_d2_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 24) & (preprocessed_lactate_df.relative_sample_date < 2*72)]
lactate_d3_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 2*24) & (preprocessed_lactate_df.relative_sample_date < 3*72)]
lactate_d_2_3_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 1*24) & (preprocessed_lactate_df.relative_sample_date < 3*72)]

In [None]:
# correlation of early lactate with outcome
temp_df = early_lactate_df.dropna(subset=['value', '3M mRS'])
corr, p_value = pearsonr(temp_df['value'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
# correlation of d2 lactate with outcome
temp_df = lactate_d2_df.dropna(subset=['value', '3M mRS'])
corr, p_value = pearsonr(temp_df['value'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
# correlation of d3 lactate with outcome
temp_df = lactate_d3_df.dropna(subset=['value', '3M mRS'])
corr, p_value = pearsonr(temp_df['value'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
# correlation of d3 lactate with outcome
temp_df = lactate_d_2_3_df.dropna(subset=['value', '3M mRS'])
corr, p_value = pearsonr(temp_df['value'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:

mod_log = OrderedModel(temp_df['3M mRS'],
                        temp_df[['value']],
                        distr='logit')

res_log = mod_log.fit()
res_log.summary()

In [None]:
early_lactate_df.describe()

adjust form age, NIHSS, mRs

In [None]:
inclusion_registry_df.head()

In [None]:
inclusion_registry_df['age'] = inclusion_registry_df['Age (calc.)']
# covariates = ['age', 'Prestroke disability (Rankin)', 'NIH on admission']
covariates = ['age', 'NIH on admission']
preprocessed_lactate_df = preprocessed_lactate_df.merge(
    inclusion_registry_df[['case_admission_id'] + covariates],
    on='case_admission_id',
    how='left'
)

In [None]:
early_lactate_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > -12) & (preprocessed_lactate_df.relative_sample_date < 24)]
lactate_d2_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 24) & (preprocessed_lactate_df.relative_sample_date < 2*72)]
lactate_d3_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 2*24) & (preprocessed_lactate_df.relative_sample_date < 3*72)]
lactate_d_2_3_df = preprocessed_lactate_df[(preprocessed_lactate_df.relative_sample_date > 1*24) & (preprocessed_lactate_df.relative_sample_date < 3*72)]

In [None]:
# overall lactate vs mrs adjusted for covariates
temp_df = preprocessed_lactate_df[['value', '3M mRS'] + covariates]
temp_df.dropna(inplace=True)
# set all vars to float
temp_df = temp_df.astype(float)

all_data_mod_log = OrderedModel(temp_df['3M mRS'],
                        temp_df[['value'] + covariates],
                        distr='logit')

all_data_res_log = all_data_mod_log.fit()
all_data_res_log.summary()

In [None]:
# day 1
temp_df = early_lactate_df[['value', '3M mRS'] + covariates]
temp_df.dropna(inplace=True)
# set all vars to float
temp_df = temp_df.astype(float)

all_data_mod_log = OrderedModel(temp_df['3M mRS'],
                        temp_df[['value'] + covariates],
                        distr='logit')

all_data_res_log = all_data_mod_log.fit()
all_data_res_log.summary()

In [None]:
# day 2-3
temp_df = lactate_d_2_3_df[['value', '3M mRS'] + covariates]
temp_df.dropna(inplace=True)
# set all vars to float
temp_df = temp_df.astype(float)

all_data_mod_log = OrderedModel(temp_df['3M mRS'],
                        temp_df[['value'] + covariates],
                        distr='logit')

all_data_res_log = all_data_mod_log.fit()
all_data_res_log.summary()

In [None]:
# day 2-3 (mrs 0-2 vs 3-6)


In [None]:
# VISUALIZATION: Binary density plots only (0-5 range)
from scipy.stats import gaussian_kde

# Create binary mRS outcome (0-2 vs 3-6)
early_lactate_df['mRS_binary'] = (early_lactate_df['3M mRS'] <= 2).astype(int)
lactate_d_2_3_df['mRS_binary'] = (lactate_d_2_3_df['3M mRS'] <= 2).astype(int)

# Use data without outlier removal
early_clean = early_lactate_df.dropna(subset=['value', '3M mRS'])
d23_clean = lactate_d_2_3_df.dropna(subset=['value', '3M mRS'])

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Binary outcomes setup
binary_outcomes = [0, 1]
binary_labels = ['Poor (mRS 3-6)', 'Good (mRS 0-2)']
colors_binary = ['red', 'green']

# Early lactate - binary mRS
for i, (outcome, label, color) in enumerate(zip(binary_outcomes, binary_labels, colors_binary)):
    subset = early_clean[early_clean['mRS_binary'] == outcome]['value']
    if len(subset) > 1:
        density = gaussian_kde(subset)
        xs = np.linspace(0, 5, 200)  # Concentrated on 0-5 range
        density_values = density(xs)
        # Plot density curves with proper scaling
        axes[0].plot(xs, density_values, color=color, linewidth=2, label=label)
        axes[0].fill_between(xs, 0, density_values, alpha=0.3, color=color)

axes[0].set_title('Early Lactate Density by Binary Outcome', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Lactate Level (mmol/L)')
axes[0].set_ylabel('Density')
axes[0].set_xlim(0, 5)
axes[0].legend()

# Day 2-3 lactate - binary mRS
for i, (outcome, label, color) in enumerate(zip(binary_outcomes, binary_labels, colors_binary)):
    subset = d23_clean[d23_clean['mRS_binary'] == outcome]['value']
    if len(subset) > 1:
        density = gaussian_kde(subset)
        xs = np.linspace(0, 5, 200)  # Concentrated on 0-5 range
        density_values = density(xs)
        # Plot density curves with proper scaling
        axes[1].plot(xs, density_values, color=color, linewidth=2, label=label)
        axes[1].fill_between(xs, 0, density_values, alpha=0.3, color=color)

axes[1].set_title('Day 2-3 Lactate Density by Binary Outcome', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Lactate Level (mmol/L)')
axes[1].set_ylabel('Density')
axes[1].set_xlim(0, 5)
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

x_max = 2.5  # Set x-axis limit for ridge plots

# Set the style for ridge plots
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

# Prepare data for ridge plots
early_clean = early_lactate_df.dropna(subset=['value', '3M mRS'])
d23_clean = lactate_d_2_3_df.dropna(subset=['value', '3M mRS'])

# Convert mRS to string for proper ordering - SORT PROPERLY
early_clean['mRS_str'] = early_clean['3M mRS'].astype(int).astype(str)
d23_clean['mRS_str'] = d23_clean['3M mRS'].astype(int).astype(str)

# Create ordered categories for proper sorting
mrs_categories = ['0', '1', '2', '3', '4', '5', '6']
early_clean['mRS_str'] = pd.Categorical(early_clean['mRS_str'], categories=mrs_categories, ordered=True)
d23_clean['mRS_str'] = pd.Categorical(d23_clean['mRS_str'], categories=mrs_categories, ordered=True)

# Define function to label the plot
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .2, f"mRS {label}", fontweight="bold", color=color,
            ha="left", va="center", transform=ax.transAxes)

# Ridge plot for Early Lactate - BIGGER SIZE
pal = sns.cubehelix_palette(len(early_clean['mRS_str'].cat.categories), rot=-.25, light=.7)
g1 = sns.FacetGrid(early_clean, row="mRS_str", hue="mRS_str", aspect=20, height=.8, palette=pal, row_order=mrs_categories)

# Draw the densities
g1.map(sns.kdeplot, "value",
      bw_adjust=.5, clip_on=True,
      fill=True, alpha=1, linewidth=1.5)
g1.map(sns.kdeplot, "value", clip_on=True, color="w", lw=2, bw_adjust=.5)

# Add reference line
g1.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)

g1.map(label, "value")

# Set the subplots to overlap
g1.figure.subplots_adjust(hspace=-.25)

# Remove axes details
g1.set_titles("")
g1.set(yticks=[], ylabel="")
g1.despine(bottom=True, left=True)

# Set x-axis limits to focus on 0-3
g1.set(xlim=(0, x_max))

# Add main title with larger font
g1.figure.suptitle('Early Lactate Distribution by mRS Score (0-24h)', 
                   fontsize=18, fontweight='bold', y=0.98)

plt.show()

# Ridge plot for Day 2-3 Lactate - BIGGER SIZE
pal2 = sns.cubehelix_palette(len(d23_clean['mRS_str'].cat.categories), rot=-.25, light=.7)
g2 = sns.FacetGrid(d23_clean, row="mRS_str", hue="mRS_str", aspect=20, height=.8, palette=pal2, row_order=mrs_categories)

# Draw the densities
g2.map(sns.kdeplot, "value",
      bw_adjust=.5, clip_on=True,
      fill=True, alpha=1, linewidth=1.5)
g2.map(sns.kdeplot, "value", clip_on=True, color="w", lw=2, bw_adjust=.5)

# Add reference line
g2.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)

g2.map(label, "value")

# Set the subplots to overlap
g2.figure.subplots_adjust(hspace=-.25)

# Remove axes details
g2.set_titles("")
g2.set(yticks=[], ylabel="")
g2.despine(bottom=True, left=True)

# Set x-axis limits to focus on 0-3
g2.set(xlim=(0, x_max))

# Add main title with larger font
g2.figure.suptitle('Day 2-3 Lactate Distribution by mRS Score (24-72h)', 
                   fontsize=18, fontweight='bold', y=0.98)

plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def create_dichotomized_ridge_plots(early_lactate_df, lactate_d_2_3_df, threshold=2, x_max=2.5):
    """
    Create dichotomized ridge plots for mRS outcomes with variable threshold.
    
    Parameters:
    - early_lactate_df: DataFrame with early lactate data
    - lactate_d_2_3_df: DataFrame with day 2-3 lactate data
    - threshold: mRS threshold for dichotomization (default=2, creates 0-2 vs 3-6)
    - x_max: Maximum x-axis limit for plots
    """
    aspect_ratio = 10
    
    # Set the style for ridge plots
    sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

    # Prepare data for ridge plots
    early_clean = early_lactate_df.dropna(subset=['value', '3M mRS']).copy()
    d23_clean = lactate_d_2_3_df.dropna(subset=['value', '3M mRS']).copy()

    # Create dichotomized mRS outcome based on threshold
    early_clean['mRS_binary'] = (early_clean['3M mRS'] <= threshold).astype(int)
    d23_clean['mRS_binary'] = (d23_clean['3M mRS'] <= threshold).astype(int)

    # Create categorical labels for binary outcomes
    early_clean['mRS_binary_str'] = early_clean['mRS_binary'].map({
        0: f'Poor (mRS {threshold+1}-6)', 
        1: f'Good (mRS 0-{threshold})'
    })
    d23_clean['mRS_binary_str'] = d23_clean['mRS_binary'].map({
        0: f'Poor (mRS {threshold+1}-6)', 
        1: f'Good (mRS 0-{threshold})'
    })

    # Create ordered categories for proper sorting (Good outcome first, then Poor)
    binary_categories = [f'Good (mRS 0-{threshold})', f'Poor (mRS {threshold+1}-6)']
    early_clean['mRS_binary_str'] = pd.Categorical(
        early_clean['mRS_binary_str'], 
        categories=binary_categories, 
        ordered=True
    )
    d23_clean['mRS_binary_str'] = pd.Categorical(
        d23_clean['mRS_binary_str'], 
        categories=binary_categories, 
        ordered=True
    )

    # Define function to label the plot
    def label_binary(x, color, label):
        ax = plt.gca()
        ax.text(0, .2, label, fontweight="bold", color=color,
                ha="left", va="center", transform=ax.transAxes)

    # Create color palette for binary outcomes
    colors = ['#27ae60', '#e74c3c']  # Green for good, red for poor
    
    # Ridge plot for Early Lactate - Dichotomized
    g1 = sns.FacetGrid(early_clean, row="mRS_binary_str", hue="mRS_binary_str", 
                       aspect=aspect_ratio, height=1.2, palette=colors, row_order=binary_categories)

    # Draw the densities
    g1.map(sns.kdeplot, "value",
          bw_adjust=.5, clip_on=True,
          fill=True, alpha=1, linewidth=1.5)
    g1.map(sns.kdeplot, "value", clip_on=True, color="w", lw=2, bw_adjust=.5)

    # Add reference line
    g1.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)

    g1.map(label_binary, "value")

    # Set the subplots to overlap
    g1.figure.subplots_adjust(hspace=-.25)

    # Remove axes details
    g1.set_titles("")
    g1.set(yticks=[], ylabel="")
    g1.despine(bottom=True, left=True)

    # Set x-axis limits
    g1.set(xlim=(0, x_max))

    # Add main title with larger font
    g1.figure.suptitle(f'Early Lactate Distribution by Binary mRS (threshold={threshold}) - (0-24h)', 
                       fontsize=18, fontweight='bold', y=0.98)

    plt.show()

    # Ridge plot for Day 2-3 Lactate - Dichotomized
    g2 = sns.FacetGrid(d23_clean, row="mRS_binary_str", hue="mRS_binary_str", 
                       aspect=aspect_ratio, height=1.2, palette=colors, row_order=binary_categories)

    # Draw the densities
    g2.map(sns.kdeplot, "value",
          bw_adjust=.5, clip_on=True,
          fill=True, alpha=1, linewidth=1.5)
    g2.map(sns.kdeplot, "value", clip_on=True, color="w", lw=2, bw_adjust=.5)

    # Add reference line
    g2.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)

    g2.map(label_binary, "value")

    # Set the subplots to overlap
    g2.figure.subplots_adjust(hspace=-.25)

    # Remove axes details
    g2.set_titles("")
    g2.set(yticks=[], ylabel="")
    g2.despine(bottom=True, left=True)

    # Set x-axis limits
    g2.set(xlim=(0, x_max))

    # Add main title with larger font
    g2.figure.suptitle(f'Day 2-3 Lactate Distribution by Binary mRS (threshold={threshold}) - (24-72h)', 
                       fontsize=18, fontweight='bold', y=0.98)

    plt.show()

    # Print summary statistics
    print(f"\nSUMMARY STATISTICS (threshold={threshold}):")
    print("="*50)
    
    # Early lactate summary
    early_good = early_clean[early_clean['mRS_binary'] == 1]
    early_poor = early_clean[early_clean['mRS_binary'] == 0]
    
    print(f"\nEARLY LACTATE:")
    print(f"Good outcome (mRS 0-{threshold}): N={len(early_good)}, median={early_good['value'].median():.2f} ± {early_good['value'].std():.2f}")
    print(f"Poor outcome (mRS {threshold+1}-6): N={len(early_poor)}, median={early_poor['value'].median():.2f} ± {early_poor['value'].std():.2f}")
    print(f"Good outcome rate: {(early_clean['mRS_binary'] == 1).mean()*100:.1f}%")
    
    # Day 2-3 lactate summary
    d23_good = d23_clean[d23_clean['mRS_binary'] == 1]
    d23_poor = d23_clean[d23_clean['mRS_binary'] == 0]
    
    print(f"\nDAY 2-3 LACTATE:")
    print(f"Good outcome (mRS 0-{threshold}): N={len(d23_good)}, median={d23_good['value'].median():.2f} ± {d23_good['value'].std():.2f}")
    print(f"Poor outcome (mRS {threshold+1}-6): N={len(d23_poor)}, median={d23_poor['value'].median():.2f} ± {d23_poor['value'].std():.2f}")
    print(f"Good outcome rate: {(d23_clean['mRS_binary'] == 1).mean()*100:.1f}%")

# Usage examples:
# Default threshold of 2 (mRS 0-2 vs 3-6)
create_dichotomized_ridge_plots(early_lactate_df, lactate_d_2_3_df, threshold=2)

# Alternative threshold of 1 (mRS 0-1 vs 2-6)
# create_dichotomized_ridge_plots(early_lactate_df, lactate_d_2_3_df, threshold=1)

# Alternative threshold of 3 (mRS 0-3 vs 4-6)
# create_dichotomized_ridge_plots(early_lactate_df, lactate_d_2_3_df, threshold=3)

In [None]:
# Create binary mRS outcome (0-2 vs 3-6)
early_lactate_df['mRS_binary'] = (early_lactate_df['3M mRS'] <= 2).astype(int)
lactate_d_2_3_df['mRS_binary'] = (lactate_d_2_3_df['3M mRS'] <= 2).astype(int)

# Create binary outcome labels
early_lactate_df['outcome_label'] = early_lactate_df['mRS_binary'].map({0: 'Poor (mRS 3-6)', 1: 'Good (mRS 0-2)'})
lactate_d_2_3_df['outcome_label'] = lactate_d_2_3_df['mRS_binary'].map({0: 'Poor (mRS 3-6)', 1: 'Good (mRS 0-2)'})

# Add time period labels
early_lactate_df['time_period'] = 'Early (0-24h)'
lactate_d_2_3_df['time_period'] = 'Day 2-3 (24-72h)'

# Combine datasets
combined_df = pd.concat([early_lactate_df, lactate_d_2_3_df], ignore_index=True)

# Create the combined plot
fig, ax = plt.subplots(1, 1, figsize=(12, 8))

# Define vibrant colors for outcomes
outcome_colors = {
    'Poor (mRS 3-6)': '#e74c3c',    # Red for poor outcomes
    'Good (mRS 0-2)': '#27ae60'     # Green for good outcomes
}

# Create boxplot with time periods on x-axis and outcomes as hue
sns.boxplot(x='time_period', y='value', hue='outcome_label', data=combined_df, 
            ax=ax, showfliers=False, palette=outcome_colors,
            boxprops=dict(alpha=0.8), whiskerprops=dict(alpha=0.8))

# Style improvements
ax.set_title('Lactate Levels by Time Period and Outcome', 
             fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Time Period', fontsize=14, fontweight='bold')
ax.set_ylabel('Lactate Level (mmol/L)', fontsize=14, fontweight='bold')
ax.set_ylim(0, 3.2)

# Improve legend with better styling
legend = ax.legend(title='Outcome', title_fontsize=13, fontsize=12, 
                  loc='upper right', frameon=True, fancybox=True, shadow=True)
legend.get_frame().set_facecolor('#f8f9fa')
legend.get_frame().set_alpha(0.9)

# Add subtle grid for better readability
ax.grid(True, alpha=0.3, linestyle='--')

# Make the plot background slightly off-white
ax.set_facecolor('#fafafa')

# Style the spines
for spine in ax.spines.values():
    spine.set_color('#cccccc')
    spine.set_linewidth(0.8)

plt.tight_layout()
plt.show()

In [None]:
os.environ["R_HOME"] = "/Library/Frameworks/R.framework/Versions/4.1/Resources"
from pymer4.models import Lmer

# overall lactate vs mrs adjusted for covariates
temp_df = preprocessed_lactate_df[['value', '3M mRS', 'case_admission_id'] + covariates]
# replace all ' ' in columns names with _
temp_df.columns = temp_df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
covariates_with_underscore = [covar_name.replace(' ', '_').replace('(', '').replace(')', '') for covar_name in covariates]
temp_df['mrs'] = temp_df['3M_mRS']

model = Lmer(f'mrs ~ value + {" + ".join(covariates_with_underscore)} + (1|case_admission_id)',
             data=temp_df)
# model.fit(control='optimizer="bobyqa", optCtrl=list(maxfun=100000)')
model.fit()
print(model.summary())



In [None]:
# overall lactate vs mrs adjusted for covariates
temp_df = early_lactate_df[['value', '3M mRS', 'case_admission_id'] + covariates]
# replace all ' ' in columns names with _
temp_df.columns = temp_df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
covariates_with_underscore = [covar_name.replace(' ', '_').replace('(', '').replace(')', '') for covar_name in covariates]
temp_df['mrs'] = temp_df['3M_mRS']

model = Lmer(f'mrs ~ value + {" + ".join(covariates_with_underscore)} + (1|case_admission_id)',
             data=temp_df)
model.fit(control='optimizer="bobyqa", optCtrl=list(maxfun=100000)')
# model.fit()
print(model.summary())

In [None]:
# overall lactate vs mrs adjusted for covariates
temp_df = lactate_d_2_3_df[['value', '3M mRS', 'case_admission_id'] + covariates]
# replace all ' ' in columns names with _
temp_df.columns = temp_df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
covariates_with_underscore = [covar_name.replace(' ', '_').replace('(', '').replace(')', '') for covar_name in covariates]
temp_df['mrs'] = temp_df['3M_mRS']
temp_df.dropna(subset=['value', 'mrs'] + covariates_with_underscore, inplace=True)
model = Lmer(f'mrs ~ value + {" + ".join(covariates_with_underscore)} + (1|case_admission_id)',
             data=temp_df)
# model.fit(control='optimizer="bobyqa", optCtrl=list(maxfun=100000)')
# increase tolerance
model.fit(control='optimizer="nlminbwrap", optCtrl=list(maxfun=100000, xtol_abs = 1e-12, ftol_abs = 1e-12)')

print(model.summary())