In [None]:
# comment if not using google colab
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks')

In [None]:
import pandas as pd
import numpy as np

# turn into 6 hour time intervals
def process_patient_data(file_path):
    df = pd.read_csv(file_path)
    
    # Keep only the first 24 hours
    df_filtered = df.groupby('patientunitstayid').head(24)
    
    results = pd.DataFrame()
    for pid, group in df_filtered.groupby('patientunitstayid'):
        patient_data = {'patientunitstayid': pid} 
        # Calculate 6-hour grouped statistics for selected columns
        for col in ['heartrate', 'temperature', 'BP']:
            for period in range(0, 24, 6):
                sub_group = group.iloc[period:period+6]
                patient_data[f'{col}_mean_{period//6}'] = sub_group[col].mean()
                patient_data[f'{col}_max_{period//6}'] = sub_group[col].max()
                patient_data[f'{col}_min_{period//6}'] = sub_group[col].min()
                patient_data[f'{col}_var_{period//6}'] = sub_group[col].var(ddof=0)

        # Average over 24 hours
        for col in ['paO2', 'FiO2_x', 'FiO2_y', 'Glasgow score', 'BUN', 'WBC x 1000', 'bicarbonate', 'sodium', 'potassium', 'total bilirubin']:
            patient_data[f'{col}'] = group[col].mean()

        # Append patient data
        results = pd.concat([results, pd.DataFrame(patient_data, index=[pid])])
    return results

file_path = "13features.csv"
processed_data = process_patient_data(file_path)
print(processed_data.head())

In [None]:
# put age in
patient_df = pd.read_csv('patient.csv')
merged_df = pd.merge(processed_data, patient_df[['patientunitstayid', 'age']], on='patientunitstayid', how='left')
merged_df['age'] = merged_df['age'].replace('> 89', '90')

folder_path = '/content/drive/My Drive/Stream/Models'
file_name = 'merged_with_age.csv'
merged_df.to_csv(os.path.join(folder_path, file_name), index=False)