# Train a neural network to predict Therapeutic Dose of Warfarin that achieves a given INR
---
#### This notebook uses PyTorch to train a feed‑forward network
  
## How it works
---
* Read the CSV, treating Therapeutic Dose of Warfarin (mg/week) as the target.  

* All other columns are used as features, including the patient's INR measured on their current dose. At inference time you can set the INR column to the value you *want* the patient to reach (e.g. 2.5) and obtain a recommended weekly dose.  

* The numeric features are z‑score normalised with `sklearn.StandardScaler`.  

* A simple fully‑connected network (3 hidden layers) is trained with mean‑squared‑error (MSE) loss.  

* Validation metrics (RMSE & MAE) are printed every epoch.  

* The best model (lowest validation RMSE) is saved to `best_model.pt`.  

---
# LIBRARY
---

In [1]:
from typing import Dict, List
from tqdm import tqdm
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import root_mean_squared_error as rmse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2

from torch.utils.data import DataLoader, Dataset
from torch import nn
import torch

---
# CLASS
---

## Data wrapper around ndarray

In [2]:
class WarfarinDataset(Dataset):
    """Tensor-ready wrapper for (X, y) numpy arrays."""

    def __init__(self, X:np.ndarray, y:np.ndarray):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

## Feed Forward Neural Network

In [3]:
class FeedForwardNN(nn.Module):
    def __init__(self, in_dim:int):
        super().__init__()
        self.NN = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(128, 64),
            nn.LeakyReLU(),

            nn.Linear(64, 1), # output layer – single continuous value (mg/week)
            nn.ReLU()  # ensures dose ≥ 0
        )

        # Apply good weight initialisation across all sub‑modules
        self.apply(self._init_weights)

    @staticmethod
    def _init_weights(m):
        """Kaiming-uniform initialisation suited for LeakyReLU."""
        if isinstance(m, nn.Linear):
            nn.init.kaiming_uniform_(m.weight, nonlinearity='leaky_relu')
            nn.init.zeros_(m.bias)
    
    def forward(self, X:torch.Tensor) -> torch.Tensor:
        return self.NN(X)

## Trainer

In [4]:
"""------------------ CONFIGURE ------------------"""
MODEL_WEIGHTS: str = 'best_nn.pt'  # where to save checkpointed weights
SCALER_FILE: str = 'scaler.pkl'    # where to save fitted StandardScaler
RANDOM_STATE: int = 42
TEST_SIZE: float = 0.2

In [5]:
def train(df:pd.DataFrame, target_col:str, epochs:int=100, batch_size:int=64, lr:float=1e-3):
    """------------------ 1) Load data ------------------"""
    print('[*] Loading data...')
    # Split dataset into features and target
    X = df.drop(columns=[target_col]).values.astype(np.float32)
    y = df[target_col].values.astype(np.float32)

    # Train:Test -> 8:2
    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=TEST_SIZE,
        random_state=RANDOM_STATE,
    )

    """"------------------ 2) Fit scaler ------------------"""
    print('[*] Scaling features...')
    scaler = StandardScaler()
    # scaler = MinMaxScaler()

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)

    # Persist scaler to re‑use at inference time
    with open(SCALER_FILE, 'wb') as filo:
        pickle.dump(scaler, filo)
    

    """------------------ 3) Build datasets/loaders ------------------"""
    print('[*] Wrapping train/test datasets...')
    train_ds = WarfarinDataset(X_train_scaled, y_train)
    test_ds = WarfarinDataset(X_test_scaled, y_test)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    """------------------ 4) Model/optimiser ------------------"""
    print('[*] Initializing neural network...')
    device = torch.device('cuda' if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
    print(f'\tUsing {device}.')

    model = FeedForwardNN(X_train.shape[1]).to(device)
    print(f'\tModel:\n{model}\n')

    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(params=model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=10, factor=0.5)
    print(
        f'\tCirterion:\n{criterion}\n\n'
        f'\tOptimizer:\n{optimizer}\n\n'
        f'\tScheduler:\n{scheduler}\n\n'
    )

    """------------------ 5) Training loop ------------------"""
    print(f'[*] Start training({epochs} epochs):')
    best_test_rmse = float('inf')
    for epoch in range(1, epochs + 1):
        # ---- train ----
        model.train()
        train_losses: List[float] = []
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            y_preds = model(x_batch)
            loss = criterion(y_preds, y_batch)
            loss.backward()
            optimizer.step()
            
            train_losses.append(loss.item())
        
        # ---- test ----
        model.eval()
        test_losses: List[float] = []
        test_preds, test_targets = [], []
        with torch.no_grad():
            for x_batch, y_batch in test_loader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                
                y_preds = model(x_batch)
                loss = criterion(y_preds, y_batch)
                
                test_losses.append(loss.item())
                test_preds.append(y_preds.cpu().numpy())
                test_targets.append(y_batch.cpu().numpy())
        
        test_preds_np = np.concatenate(test_preds).squeeze()
        test_targets_np = np.concatenate(test_targets).squeeze()
        test_rmse = rmse(test_targets_np, test_preds_np)
        test_mae = mae(test_targets_np, test_preds_np)
        test_r2 = r2(test_targets_np, test_preds_np)
        
        print(f'\r\tEpoch {epoch:03d}: Train MSE = {np.mean(train_losses):.4f} | Test RMSE = {test_rmse:.4f} | Test MAE = {test_mae:.4f} | Test R² = {test_r2:.4f}', end='')

        # Plateau scheduler – auto LR decay if progress stalls
        scheduler.step(test_rmse)

        # Checkpoint if this epoch is best so far
        if test_rmse < best_test_rmse:
            best_test_rmse = test_rmse
            torch.save(model.state_dict(), MODEL_WEIGHTS)
        
    """------------------ 6) Done ------------------"""
    print(
        f'\n[*] Training complete, best validation RMSE: {best_test_rmse:.4f}.\n'
        f'[*] Model saved to {MODEL_WEIGHTS}\n'
        f'[*] Scaler saved to {SCALER_FILE}'
    )

---
# TRAIN NEURAL NETWORK
---

In [6]:
DATA_CSV: str = '../datasets/NN_Training_Data.csv'
TARGET_COLUMN: str = 'Therapeutic Dose of Warfarin'

In [7]:
df = pd.read_csv(DATA_CSV)
df

Unnamed: 0,INR on Reported Therapeutic Dose of Warfarin,Therapeutic Dose of Warfarin,Weight (kg),Height (cm),Gender_male,Age,VKORC1 -1639 consensus_A/G,VKORC1 -1639 consensus_G/G,CYP2C9 consensus_*1/*2,CYP2C9 consensus_*2/*3,CYP2C9 consensus_*1/*3,Amiodarone (Cordarone),CYP2C9 consensus_*2/*2,CYP2C9 consensus_*1/*14,CYP2C9 consensus_*3/*3,Anti-fungal Azoles,Current Smoker,Diabetes
0,2.13,35.000000,75.50,173.482,1,84.5,1,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0
1,2.33,17.500000,70.00,166.116,1,84.5,1,0,0,0,1,0.0,0,0,0,0.0,0.0,0.0
2,1.90,20.000000,88.60,176.022,1,74.5,1,0,1,0,0,0.0,0,0,0,0.0,0.0,1.0
3,2.83,30.000000,92.00,176.022,1,74.5,1,0,1,0,0,0.0,0,0,0,0.0,0.0,0.0
4,2.33,42.000000,114.00,178.562,1,54.5,1,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1482,2.30,39.974286,77.27,180.340,1,64.5,1,0,0,0,0,0.0,0,0,0,0.0,1.0,0.0
1483,2.30,28.000000,84.55,180.340,1,74.5,1,0,1,0,0,0.0,0,0,0,0.0,0.0,1.0
1484,2.90,49.980000,90.91,185.420,1,54.5,0,1,1,0,0,0.0,0,0,0,0.0,1.0,0.0
1485,2.50,42.490000,86.36,157.480,0,74.5,0,1,0,0,0,0.0,0,0,0,0.0,0.0,0.0


In [8]:
train(
    df=df,
    target_col=TARGET_COLUMN,
    # epochs=500,
)

[*] Loading data...
[*] Scaling features...
[*] Wrapping train/test datasets...
[*] Initializing neural network...
	Using mps.
	Model:
FeedForwardNN(
  (NN): Sequential(
    (0): Linear(in_features=17, out_features=128, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Linear(in_features=64, out_features=1, bias=True)
    (6): ReLU()
  )
)

	Cirterion:
MSELoss()

	Optimizer:
AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-05
)

	Scheduler:
<torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x13a7d01a0>


[*] Start training(100 epochs):
	Epoch 100: Train MSE = 166.8431 | Test RMSE = 20.3996 | Test MAE = 10.2971 | Test R² = 0.168438
[*] Training complete, best validat