In [1]:
import os
import pandas as pd
import numpy as np


In [2]:
import os
import pandas as pd

all_columns = set()

for f in os.listdir("processed_training_Ay"):
    if f.endswith(".psv"):
        df = pd.read_csv(os.path.join("processed_training_Ay", f), sep="|", nrows=1)
        all_columns.update(df.columns)

FEATURE_COLUMNS = sorted(c for c in all_columns if c != "SepsisLabel")

print("Final feature count:", len(FEATURE_COLUMNS))
print(FEATURE_COLUMNS)


KeyboardInterrupt: 

In [None]:
def load_patient_sequences(folder, feature_columns, early_hours=6):
    X, y = [], []

    for f in os.listdir(folder):
        df = pd.read_csv(os.path.join(folder, f), sep="|")

        # ðŸ”‘ FORCE SAME FEATURE SPACE
        df = df.reindex(columns=feature_columns + ["SepsisLabel"])
        df[feature_columns] = df[feature_columns].fillna(0.0)

        features = df[feature_columns].values
        labels = df["SepsisLabel"].values

        if labels.max() == 1:
            t_sepsis = np.where(labels == 1)[0][0]
            cutoff = t_sepsis - early_hours
            if cutoff <= 0:
                continue
            seq = features[:cutoff]
            label = 1
        else:
            seq = features
            label = 0

        X.append(seq)
        y.append(label)

    return X, np.array(y)

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
def pad_data(X, max_len=None):
    if max_len is None:
        max_len = max(len(x) for x in X)

    X_pad = pad_sequences(
        X,
        maxlen=max_len,
        dtype="float32",
        padding="pre",
        truncating="pre"
    )

    return X_pad, max_len

In [None]:
# TRAIN (Set A)
X_train_raw, y_train = load_patient_sequences("processed_training", FEATURE_COLUMNS)
X_train, max_len = pad_data(X_train_raw)

# VALIDATION / TEST (Set B)
X_val_raw, y_val = load_patient_sequences("processed_training_setB", FEATURE_COLUMNS)
X_val, _ = pad_data(X_val_raw, max_len=max_len)

In [None]:
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)

TIMESTEPS = X_train.shape[1]
N_FEATURES = X_train.shape[2]

print("TIMESTEPS:", TIMESTEPS)
print("N_FEATURES:", N_FEATURES)

assert N_FEATURES == 32

In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.array([0,1]),
    y=y_train
)

class_weight = {0: class_weights[0], 1: class_weights[1]}
print(class_weight)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

def build_lstm(units=64, dropout=0.3):
    model = Sequential([
        Input(shape=(TIMESTEPS, N_FEATURES)),
        LSTM(units, return_sequences=False),
        Dropout(dropout),
        Dense(1, activation="sigmoid")
    ])

    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.AUC(name="auc")]
    )

    return model


In [None]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    "units": [32, 64],
    "dropout": [0.3, 0.5]
}

best_auc = 0.0
best_model = None

for params in ParameterGrid(param_grid):
    print("Training with:", params)

    model = build_lstm(
        units=params["units"],
        dropout=params["dropout"]
    )

    model.fit(
        X_train,
        y_train,
        epochs=10,
        batch_size=32,
        class_weight=class_weight,
        validation_data=(X_val, y_val),
        verbose=1
    )

    val_auc = model.evaluate(X_val, y_val, verbose=0)[1]

    if val_auc > best_auc:
        best_auc = val_auc
        best_model = model

In [None]:
from sklearn.metrics import f1_score

y_prob = best_model.predict(X_val).ravel()

thresholds = np.linspace(0.1, 0.9, 50)
best_t, best_f1 = 0, 0

for t in thresholds:
    y_pred = (y_prob > t).astype(int)
    f1 = f1_score(y_val, y_pred)
    if f1 > best_f1:
        best_f1 = f1
        best_t = t

print("Best threshold:", best_t)
print("Best F1:", best_f1)


In [None]:
from sklearn.metrics import classification_report, roc_auc_score
y_pred = (y_prob > best_t).astype(int)

print("ROC-AUC:", roc_auc_score(y_val, y_prob))
print(classification_report(y_val, y_pred))


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pos_weight = torch.tensor([scale_pos_weight]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)