# setup

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report, mean_squared_error
from sklearn.model_selection import train_test_split

data = pd.read_csv("../clean_data/nafl/combined.large.nafl.csv")

In [None]:
# create the X and Y datasets

data = data.drop(columns='DaysUntilFirstProgression')
# data = data.drop(columns='Outcome')
data = data.drop(columns='Censored')

Y = data[['StudyID', 'Outcome']]
# Y = data[['StudyID', 'DaysUntilFirstProgression']]
X = data.drop(columns='Outcome')
X = data.drop(columns=['mean_BMI_category', 'last_BMI_category'])


X = X.set_index('StudyID')
Y = Y.set_index('StudyID')

In [None]:
# check if GPU is enabled
device = "cuda" if torch.cuda.is_available() else "cpu" # need to define device since python can use both cpu and gpu
print(f"Using {device} device")
print(f"Shape of X: {X.shape}. Shape of Y: {Y.shape}.")

In [None]:
# convert data to tensors
X_numpy = X.values.astype(np.int64) # turn into a numpy array

# standardize our features
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler()
X_numpy = scaler.fit_transform(X_numpy)

X_torch = torch.from_numpy(X_numpy)

Y_numpy = Y.values.astype(np.int64) # turn into a numpy array
Y_torch = torch.from_numpy(Y_numpy)

### trying smote

In [None]:
!pip install imbalanced-learn

In [None]:
# trying smote
# !pip install imbalanced-learn

from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import torch


Y_numpy = Y.values.astype(np.int64).ravel()
X_numpy = X.values.astype(np.float32)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numpy)

# Count before SMOTE
print("Class distribution before SMOTE:", Counter(Y_numpy))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, Y_resampled = smote.fit_resample(X_scaled, Y_numpy)

# Count after SMOTE
print("Class distribution after SMOTE:", Counter(Y_resampled))

# Convert to PyTorch tensors
X_torch = torch.tensor(X_resampled, dtype=torch.float32)
Y_torch = torch.tensor(Y_resampled, dtype=torch.float32).unsqueeze(1)  # make (n_samples, 1)

In [None]:
X_resampled_df = pd.DataFrame(X_resampled)

In [None]:
foo = X_resampled_df.iloc[-1, :]

In [None]:
bar = pd.DataFrame(X_scaled).iloc[-1, :]

In [None]:
np.sum(foo - bar != 0)

In [None]:
foo.shape

# establish the model

In [None]:
# curate the dataset
class MAFLDDataset(Dataset): # must contain init, len, and getitem
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = MAFLDDataset(X_torch, Y_torch)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True) # batch size 64

In [None]:
# define by subclassing nn.Module and initialize the neural network layers in __init__.
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__() # inherit init from parent class
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(X.shape[1], 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
# create an instance of NeuralNetwork, move to device, print its structure
model = NeuralNetwork().to(device)
# print(model)

# define loss function and optimizer
# loss_fn = nn.MSELoss()
# loss_fn = nn.BCELoss() # if using BCELoss, do not run the sigmoid layer in the forward step!
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3) # start with this baseline learning rate

# run the untrained model on full dataset

In [None]:
num_epochs = 30 # typically between 10-50 for small datasets

for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        # move data to device
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        
        # Reshape labels if needed
        # batch_y = batch_y.unsqueeze(1)  # Make sure batch_y is (batch_size, 1)

        #initialize the gradients to zero
        optimizer.zero_grad() 

        # forward pass
        outputs = model(batch_X)

        # compute loss
        loss = loss_fn(outputs, batch_y)

        # gradient descent and update the weights
        loss.backward()
        optimizer.step()

print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

## evaluate performance on predicting binary outcome

In [None]:
# evaluate
X_input = torch.tensor(X_torch, device=device, dtype=torch.float32)
Y_hat = model(X_input)

predictions = (Y_hat >= 0.5).float()  # 0 if <0.5, 1 if >=0.5
print(f'Predicted classes: {predictions}')

In [None]:
# check performance

print(confusion_matrix(Y, predictions.cpu().detach().numpy()))
print(classification_report(Y, predictions.cpu().detach().numpy()))

# train model on train/test split

In [None]:
# split into train/test
X_train, X_test, y_train, y_test = train_test_split(X_torch, Y_torch, test_size=0.3, random_state=42)

train_dataset = MAFLDDataset(X_train, y_train)
train_data = DataLoader(train_dataset, shuffle=True, batch_size=64)

In [None]:
# train model for 30 epochs
num_epochs = 30 # typically between 10-50 for small datasets

for epoch in range(num_epochs):
    for batch_X, batch_y in train_data:
        # move data to device
        # batch_X = batch_X.to(device)
        # batch_y = batch_y.to(device)
        # print(batch_X)
        batch_X = torch.tensor(batch_X).to(device)
        batch_y = torch.tensor(batch_y).to(device)
        
        # Reshape labels if needed
        # batch_y = batch_y.unsqueeze(1)  # Make sure batch_y is (batch_size, 1)

        #initialize the gradients to zero
        optimizer.zero_grad() 

        # forward pass
        outputs = model(batch_X)

        # compute loss
        loss = loss_fn(outputs, batch_y)

        # weighted_loss = (loss * batch_weights).mean()

        # weighted_loss.backward()
        # optimizer.step()
        
        # gradient descent and update the weights
        loss.backward()
        optimizer.step()

print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [None]:
# run model on test data
Y_hat_test = model(X_test.float().to(device)) # run on testing data
Y_hat_probs = torch.sigmoid(Y_hat_test)
Y_pred_binary = (Y_hat_probs > 0.5).float()
# evaluate via auroc
print(roc_auc_score(y_test, Y_pred_binary.cpu().detach().numpy()))

In [None]:
print(classification_report(y_test, Y_pred_binary.cpu().detach().numpy()))

In [None]:
cf = confusion_matrix(y_test, Y_pred_binary.cpu().detach().numpy())
df_cf = pd.DataFrame(cf, index=['True no progression', 'True progression'], columns=['Predicted no progression', 'Predicted progression'])

In [None]:
print(cf)

In [None]:
import seaborn as sns
# categories = ['No progression', 'Progression']
sns.heatmap(df_cf/np.sum(cf), annot=True, 
            fmt='.2%', cmap='Blues')

# sns.heatmap(df_cf, annot=True, 
#             cmap='Blues')

# tweaking model design

In [None]:
# original model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__() # inherit init from parent class
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(X.shape[1], 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1), # no activation follows this layer
        )

    def forward(self, x):
        pred = self.linear_relu_stack(x)
        return pred

In [None]:
# adding dropout, switching to LeakyReLU, adding batchnorm layers
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__() # inherit init from parent class
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(X.shape[1], 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Dropout(0.2),

            nn.Linear(512, 256),
            nn.LeakyReLU(),

            nn.Linear(256, 128),
            nn.Dropout(0.2),
            nn.LeakyReLU(),

            nn.Linear(128, 64),
            nn.LeakyReLU(),

            nn.Linear(64, 1)
        )

    def forward(self, x):
        pred = self.linear_relu_stack(x)
        return pred

In [None]:
# attempting skip connections
class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.BatchNorm1d(dim),
            nn.ReLU(),
            nn.Linear(dim, dim),
            nn.BatchNorm1d(dim)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.relu(x + self.block(x))  # skip connection

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, 256)

        self.resblock1 = ResidualBlock(256)
        self.resblock2 = ResidualBlock(256)
        self.resblock3 = ResidualBlock(256)

        self.output_layer = nn.Linear(256, 1)

    def forward(self, x):
        x = self.input_layer(x)
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.resblock3(x)
        return self.output_layer(x)

In [None]:
# creating an experiment manager that can test run the various edits we want to make
from itertools import product

search_space = {
    "hidden_sizes": [[512, 128], [1024, 512, 128]],
    "activation": ["relu", "leaky_relu"],
    "dropout": [0.0, 0.2],
    "use_batchnorm": [True, False],
    "learning_rate": [1e-3, 1e-4]
}

# Create list of all combinations
all_configs = [dict(zip(search_space.keys(), values)) for values in product(*search_space.values())]

In [None]:
import torch.nn as nn

def get_activation(name):
    return {
        "relu": nn.ReLU(),
        "leaky_relu": nn.LeakyReLU(0.01),
    }[name]

class FlexibleNetwork(nn.Module):
    def __init__(self, input_dim, hidden_sizes, activation, dropout, use_batchnorm):
        super().__init__()
        layers = []
        last_dim = input_dim
        for h in hidden_sizes: # for each layer, construct linear + batchnorm + dropout
            layers.append(nn.Linear(last_dim, h))
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(h))
            layers.append(get_activation(activation))
            if dropout > 0.0:
                layers.append(nn.Dropout(dropout))
            last_dim = h
        layers.append(nn.Linear(last_dim, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [None]:
# training loop and evaluator
def train_model(model, train_loader, val_loader, lr, device="cpu", epochs=10):
    model.to(device)
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    val_losses = []

    for epoch in range(epochs):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            loss = loss_fn(model(x).squeeze(), y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Evaluate
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                val_loss += loss_fn(model(x).squeeze(), y).item()
        val_losses.append(val_loss / len(val_loader))
    return val_losses[-1]  # return final validation loss


In [None]:
# run experiments
def run_experiments(X_train, y_train, X_val, y_val):
    from torch.utils.data import DataLoader, TensorDataset

    results = []
    for config in all_configs:
        print(f"Running config: {config}")
        model = FlexibleNetwork(
            input_dim=X_train.shape[1],
            hidden_sizes=config["hidden_sizes"],
            activation=config["activation"],
            dropout=config["dropout"],
            use_batchnorm=config["use_batchnorm"]
        )

        train_loader = DataLoader(MAFLDDataset(X_train, y_train), batch_size=64, shuffle=True)
        val_loader = DataLoader(MAFLDDataset(X_val, y_val), batch_size=64)

        val_loss = train_model(model, train_loader, val_loader, lr=config["learning_rate"])
        results.append((config, val_loss))
        print(f"Validation loss: {val_loss:.4f}")

    return sorted(results, key=lambda x: x[1])  # sorted by val loss


In [None]:
run_experiments(X_train, y_train, X_test, y_test)

In [None]:
# split into train/test
X_train, X_test, y_train, y_test = train_test_split(X_torch, Y_torch, test_size=0.3, random_state=42)

train_dataset = MAFLDDataset(X_train, y_train)
train_data = DataLoader(train_dataset, shuffle=True, batch_size=64)

In [None]:
X_train.shape[1]

In [None]:
y_train.shape