In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
from scipy import stats

In [25]:
# Load datasets
DEFAULT_PATH = "/content/drive/MyDrive/MA 591/Dataset/"
BATCH_SIZE = 32

df = pd.read_csv(DEFAULT_PATH + "merged_data.csv")

df.rename(columns={"Temperature (°F)": "Temperature"}, inplace=True)

features = ['subject_id', 'ABP Diastolic', 'ABP Systolic', 'Glucose', 'Heart Rate', 'Respiratory Rate', 'Temperature', 'hospital_expire_flag']
df = df[features]

for feature in features:
  if feature not in ['subject_id', 'hospital_expire_flag']:
    q_low = df[feature].quantile(0.01)
    q_hi  = df[feature].quantile(0.99)
    df = df[(df[feature] < q_hi) & (df[feature] > q_low)]


df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

count_total = df.shape[0]
count_0 = df[df['hospital_expire_flag'] == 0].shape[0]
count_1 = df[df['hospital_expire_flag'] == 1].shape[0]

print(f"~{round(100*count_0/count_total)}% of the data has hospital_expire_flag = 0")
print(f"~{round(100*count_1/count_total)}% of the data has hospital_expire_flag = 1")

# # We will make this distribution equal

difference = count_0 - count_1

df_majority = df[df['hospital_expire_flag'] == 0]
df_minority = df[df['hospital_expire_flag'] == 1]

df_majority_downsampled = df_majority.sample(n=count_1, random_state=42)

df = pd.concat([df_majority_downsampled, df_minority])

df = df.drop(df.index[:22])

# Shuffle the dataset
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

print("\nBalanced data:")

count_total = df.shape[0]
count_0 = df[df['hospital_expire_flag'] == 0].shape[0]
count_1 = df[df['hospital_expire_flag'] == 1].shape[0]
print(f"~{round(100*count_0/count_total)}% of the data has hospital_expire_flag = 0")
print(f"~{round(100*count_1/count_total)}% of the data has hospital_expire_flag = 1")

print()

print(df)

       subject_id  ABP Diastolic  ABP Systolic  Glucose  Heart Rate  \
9        10002013          61.48        110.62    98.00       94.21   
10       10002114          66.72        101.30    99.86       80.85   
11       10002155          57.40        113.40   125.64       89.75   
12       10002155          57.40        113.40   125.64       89.75   
14       10002428          59.01        110.99   109.23       96.97   
...           ...            ...           ...      ...         ...   
71004    19997448          60.90        105.42   116.86       96.89   
71007    19997752          48.44        122.72   102.33       76.33   
71013    19998770          58.76        113.47   133.50       96.35   
71017    19999287          54.54        114.69   113.00       88.57   
71019    19999442          82.81        127.47   120.12       59.45   

       Respiratory Rate  Temperature  hospital_expire_flag  
9                 15.19        97.50                     0  
10                16.75  

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd

# Load DataFrame (assuming df is already loaded)

# Prepare features and target
X = df.drop(columns=["subject_id", "hospital_expire_flag"])
y = df["hospital_expire_flag"]

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Create DataLoaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
testloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define a deeper and improved Neural Network Model
class MortalityNN(nn.Module):
    def __init__(self, input_dim):
        super(MortalityNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 8)  # Reduced neurons
        self.fc2 = nn.Linear(8, 1)  # Directly output the prediction
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)  # Lower dropout to prevent underfitting

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)  # No activation (handled by BCEWithLogitsLoss)

# Initialize model
input_dim = X.shape[1]
model = MortalityNN(input_dim).to(device)

# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)

# Learning rate scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

# Training function with Early Stopping
def train(net, trainloader, epochs: int, patience=7):
    net.train()
    best_loss = float("inf")
    patience_counter = 0

    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in trainloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            y_pred = net(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(trainloader)
        scheduler.step()  # Adjust learning rate

        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

        # Early Stopping Check
        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

# Testing function
def test(net, testloader):
    net.eval()
    correct, total, loss = 0, 0, 0.0

    with torch.no_grad():
        for X_batch, y_batch in testloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            y_pred = net(X_batch)
            loss += criterion(y_pred, y_batch).item()
            predicted = (torch.sigmoid(y_pred) > 0.5).float()
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)

    accuracy = correct / total
    print(f"Test Loss: {loss/len(testloader):.4f}, Accuracy: {accuracy:.4f}")
    return loss / len(testloader), accuracy


# Train and evaluate model
train(model, trainloader, epochs=100)
test_loss, test_accuracy = test(model, testloader)


Epoch 1/100, Loss: 0.6683
Epoch 2/100, Loss: 0.6417
Epoch 3/100, Loss: 0.6254
Epoch 4/100, Loss: 0.6190
Epoch 5/100, Loss: 0.6148
Epoch 6/100, Loss: 0.6124
Epoch 7/100, Loss: 0.6120
Epoch 8/100, Loss: 0.6130
Epoch 9/100, Loss: 0.6118
Epoch 10/100, Loss: 0.6107
Epoch 11/100, Loss: 0.6115
Epoch 12/100, Loss: 0.6101
Epoch 13/100, Loss: 0.6105
Epoch 14/100, Loss: 0.6083
Epoch 15/100, Loss: 0.6079
Epoch 16/100, Loss: 0.6074
Epoch 17/100, Loss: 0.6087
Epoch 18/100, Loss: 0.6088
Epoch 19/100, Loss: 0.6080
Epoch 20/100, Loss: 0.6057
Epoch 21/100, Loss: 0.6075
Epoch 22/100, Loss: 0.6088
Epoch 23/100, Loss: 0.6086
Epoch 24/100, Loss: 0.6078
Epoch 25/100, Loss: 0.6083
Epoch 26/100, Loss: 0.6066
Epoch 27/100, Loss: 0.6096
Early stopping triggered.
Test Loss: 0.5977, Accuracy: 0.6856
