In [None]:
# --- Optional: pin SymPy if you need 1.12 specifically (uncomment in notebooks) ---
!pip uninstall -y sympy
!pip install sympy==1.12

# ========================
# Standard Library
# ========================
import os
import random
from copy import deepcopy
from collections import defaultdict

# ========================
# Core Scientific Stack
# ========================
import numpy as np
import pandas as pd
from scipy import stats

# ========================
# Machine Learning Utilities
# ========================
from sklearn.model_selection import train_test_split

# ========================
# Deep Learning (PyTorch)
# ========================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

# ========================
# Visualization
# ========================
import matplotlib.pyplot as plt
import seaborn as sns  # used for heatmaps; remove if you want matplotlib-only

Found existing installation: sympy 1.13.3
Uninstalling sympy-1.13.3:
  Successfully uninstalled sympy-1.13.3
Collecting sympy==1.12
  Downloading sympy-1.12-py3-none-any.whl.metadata (12 kB)
Downloading sympy-1.12-py3-none-any.whl (5.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.7/5.7 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sympy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.8.0+cu126 requires sympy>=1.13.3, but you have sympy 1.12 which is incompatible.[0m[31m
[0mSuccessfully installed sympy-1.12


In [None]:
def seed_all(seed: int = 42, deterministic: bool = True):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    else:
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True

In [None]:
seed_all()

# === Categorical Mapping Utilities ===
def BinCat2Num(df: pd.DataFrame) -> pd.DataFrame:
    mapping = {
        "Gender": {"Male": 0, "Female": 1},
        "Ethnic": {"Asian": 0, "African": 1, "Caucasian": 2, "Other": 3},
        "Base Drug Combo": {
            "FTC + TDF": 0, "3TC + ABC": 1, "FTC + TAF": 2,
            "DRV + FTC + TDF": 3, "FTC + RTVB + TDF": 4, "Other": 5
        },
        "Comp. INI": {"DTG": 0, "RAL": 1, "EVG": 2, "Not Applied": 3},
        "Comp. NNRTI": {"NVP": 0, "EFV": 1, "RPV": 2, "Not Applied": 3},
        "Extra PI": {"DRV": 0, "RTVB": 1, "LPV": 2, "RTV": 3, "ATV": 4, "Not Applied": 5},
        "Extra pk-En": {"Fasle": 0, "True": 1}
    }
    df = df.copy()
    for col, map_dict in mapping.items():
        if col in df.columns:
            mapped = df[col].map(map_dict)
            if mapped.isnull().any():
                bad_vals = df[col][mapped.isnull()].unique()
                raise ValueError(f"Unmapped values in column '{col}': {bad_vals}")
            df[col] = mapped.astype(int)
    return df

def BinCat2Str(df: pd.DataFrame) -> pd.DataFrame:
    reverse_mapping = {
        "Gender": {0: "Male", 1: "Female"},
        "Ethnic": {0: "Asian", 1: "African", 2: "Caucasian", 3: "Other"},
        "Base Drug Combo": {
            0: "FTC + TDF", 1: "3TC + ABC", 2: "FTC + TAF",
            3: "DRV + FTC + TDF", 4: "FTC + RTVB + TDF", 5: "Other"
        },
        "Comp. INI": {0: "DTG", 1: "RAL", 2: "EVG", 3: "Not Applied"},
        "Comp. NNRTI": {0: "NVP", 1: "EFV", 2: "RPV", 3: "Not Applied"},
        "Extra PI": {0: "DRV", 1: "RTVB", 2: "LPV", 3: "RTV", 4: "ATV", 5: "Not Applied"},
        "Extra pk-En": {0: "Fasle", 1: "True"}
    }
    df = df.copy()
    for col, map_dict in reverse_mapping.items():
        if col in df.columns:
            df[col] = df[col].map(map_dict)
    return df


In [None]:
seed_all()

# === Box-Cox Transformation Parameters ===
def compute_boxcox_params(df: pd.DataFrame,
                          columns: list = ["VL", "CD4", "Rel CD4"],
                          eps: float = 1e-3) -> dict:
    params = {}
    for col in columns:
        x = df[col].dropna().astype(float) + eps
        boxcox_transformed, lmbda = stats.boxcox(x)
        params[col] = {
            "lambda": lmbda,
            "min": boxcox_transformed.min(),
            "max": boxcox_transformed.max() - boxcox_transformed.min()
        }
    return params

# === Apply Box-Cox + Min-Max Normalization ===
def apply_boxcox_minmax_transform(df: pd.DataFrame,
                                   params: dict,
                                   columns: list = ["VL", "CD4", "Rel CD4"],
                                   eps: float = 1e-3) -> pd.DataFrame:
    df_transformed = deepcopy(df)
    for col in columns:
        mask = ~df_transformed[col].isna()
        shifted = df_transformed.loc[mask, col].astype(float) + eps
        boxcox_transformed = stats.boxcox(shifted, lmbda=params[col]["lambda"])
        scaled = (boxcox_transformed - params[col]["min"]) / params[col]["max"]
        df_transformed.loc[mask, col] = scaled
    return df_transformed

# === Inverse Box-Cox for PyTorch Tensors ===
def inverse_boxcox_torch(data: torch.Tensor, lmbda: float, eps: float = 1e-3) -> torch.Tensor:
    if lmbda != 0:
        return torch.exp(torch.log(lmbda * data + 1) / lmbda) - eps
    else:
        return torch.exp(data) - eps

# === Backtransform a Tensor to Original Values ===
def backtransform_art_tensor(tensor: torch.Tensor,
                              feature_names: list,
                              transform_params: dict,
                              real_columns: list = ["VL", "CD4", "Rel CD4"]) -> pd.DataFrame:
    tensor = tensor.clone()
    for col in real_columns:
        idx = feature_names.index(col)
        p = transform_params[col]
        tensor[:, idx] = tensor[:, idx] * p['max'] + p['min']
        tensor[:, idx] = inverse_boxcox_torch(tensor[:, idx], p['lambda'])
    return pd.DataFrame(tensor.detach().cpu().numpy(), columns=feature_names)


In [None]:
# === Set Seed for Reproducibility ===
seed_all()

# === Step 0–1: Load and Preprocess Raw Data ===
raw_url = "https://figshare.com/ndownloader/files/40584980"
All_Data = pd.read_csv(raw_url)
All_Data = All_Data.drop(['VL (M)', 'CD4 (M)', 'Drug (M)'], axis=1)

All_Data.replace({
    "Gender":          {1: "Male", 2: "Female"},
    "Ethnic":          {1: "Asian", 2: "African", 3: "Caucasian", 4: "Other"},
    "Base Drug Combo": {0: "FTC + TDF", 1: "3TC + ABC", 2: "FTC + TAF", 3: "DRV + FTC + TDF", 4: "FTC + RTVB + TDF", 5: "Other"},
    "Comp. INI":       {0: "DTG", 1: "RAL", 2: "EVG", 3: "Not Applied"},
    "Comp. NNRTI":     {0: "NVP", 1: "EFV", 2: "RPV", 3: "Not Applied"},
    "Extra PI":        {0: "DRV", 1: "RTVB", 2: "LPV", 3: "RTV", 4: "ATV", 5: "Not Applied"},
    "Extra pk-En":     {0: "Fasle", 1: "True"}
}, inplace=True)

In [None]:
All_Data.head()

Unnamed: 0,VL,CD4,Rel CD4,Gender,Ethnic,Base Drug Combo,Comp. INI,Comp. NNRTI,Extra PI,Extra pk-En,PatientID,Timestep
0,29.944271,793.4583,30.834505,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,0
1,29.24198,467.4189,30.35598,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,1
2,28.748991,465.12485,30.40532,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,2
3,28.101835,692.0069,30.248816,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,3
4,28.813837,641.75714,29.944712,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,4


In [None]:
# === Step 2–4: Transform Dataset ===
All_Data = All_Data.drop(['PatientID', 'Timestep'], axis = 1)
ART_Data_Num = BinCat2Num(All_Data)
art_transformation_params = compute_boxcox_params(ART_Data_Num)
ART_Data_Transformed = apply_boxcox_minmax_transform(ART_Data_Num, art_transformation_params)

In [None]:
ART_Data_Num.head()

Unnamed: 0,VL,CD4,Rel CD4,Gender,Ethnic,Base Drug Combo,Comp. INI,Comp. NNRTI,Extra PI,Extra pk-En
0,29.944271,793.4583,30.834505,0,2,0,0,3,5,0
1,29.24198,467.4189,30.35598,0,2,0,0,3,5,0
2,28.748991,465.12485,30.40532,0,2,0,0,3,5,0
3,28.101835,692.0069,30.248816,0,2,0,0,3,5,0
4,28.813837,641.75714,29.944712,0,2,0,0,3,5,0


In [None]:
art_transformation_params

{'VL': {'lambda': np.float64(-0.10280050659808612),
  'min': np.float64(-2.5667126025418563),
  'max': np.float64(9.308206289656969)},
 'CD4': {'lambda': np.float64(-0.15837078646408756),
  'min': np.float64(1.768426319414879),
  'max': np.float64(3.3780829615538304)},
 'Rel CD4': {'lambda': np.float64(-0.07011648493830457),
  'min': np.float64(0.613685356331402),
  'max': np.float64(4.526683845025821)}}

In [None]:
ART_Data_Transformed.head()

Unnamed: 0,VL,CD4,Rel CD4,Gender,Ethnic,Base Drug Combo,Comp. INI,Comp. NNRTI,Extra PI,Extra pk-En
0,0.583964,0.696381,0.537696,0,2,0,0,3,5,0
1,0.582164,0.63962,0.534977,0,2,0,0,3,5,0
2,0.580872,0.639069,0.53526,0,2,0,0,3,5,0
3,0.579139,0.68216,0.534362,0,2,0,0,3,5,0
4,0.581044,0.67419,0.532604,0,2,0,0,3,5,0


In [None]:
###===>>>++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Copyright (c) 2021. by Nicholas Kuo & Sebastiano Babieri, UNSW.                     +
# All rights reserved. This file is part of the Health Gym, and is released under the +
# "MIT Lisence Agreement". Please see the LICENSE file that should have been included +
# as part of this package.                                                            +
###===###++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

###===>>>
# This is the 3rd of all files for WGAN on Sepsis

###===>>>
import  numpy               as  np
import  pandas              as  pd

import  itertools
import  random

import  torch
import  torch.utils.data    as  utils

Cur_Len   = 60
Feats_Len = 10
Pat_Len   = 8916

###===>>>
def Execute_C003(
        df, Hyper001_BatchSize,
        Cur_Len = Cur_Len, Feats_Len = Feats_Len, Pat_Len = Pat_Len):

    ###===>>>
    data = df.values

    data = data.reshape((-1, Cur_Len, Feats_Len))
    data = utils.TensorDataset(
                    torch.from_numpy(data).float(),
                    torch.full((Pat_Len, 1, 1), Cur_Len),
                )

    trn_loader = utils.DataLoader(
            data, batch_size=Hyper001_BatchSize, shuffle=True, drop_last=True
        )

    ###===>>>
    All_Trainable_Data = []

    #---
    for batch_idx, (x, _) in enumerate(trn_loader):
        All_Trainable_Data.append(x)

    #---
    All_Trainable_Data = torch.cat(All_Trainable_Data, dim = 1)

    ###===###
    return trn_loader, All_Trainable_Data

In [None]:
Hyper001_BatchSize  = 32

Train_Loader, All_Trainable_Data = Execute_C003(ART_Data_Transformed, Hyper001_BatchSize)

In [None]:
len(Train_Loader)

278

In [None]:
for Cur_Data in Train_Loader:
    break

In [None]:
Cur_Data[0].shape

torch.Size([32, 60, 10])

In [None]:
# === Set Seed for Reproducibility ===
seed_all()

# === Step 0–1: Load and Preprocess Raw Data ===
raw_url = "https://figshare.com/ndownloader/files/40584980"
All_Data = pd.read_csv(raw_url)
All_Data = All_Data.drop(['VL (M)', 'CD4 (M)', 'Drug (M)'], axis=1)

All_Data.replace({
    "Gender":          {1: "Male", 2: "Female"},
    "Ethnic":          {1: "Asian", 2: "African", 3: "Caucasian", 4: "Other"},
    "Base Drug Combo": {0: "FTC + TDF", 1: "3TC + ABC", 2: "FTC + TAF", 3: "DRV + FTC + TDF", 4: "FTC + RTVB + TDF", 5: "Other"},
    "Comp. INI":       {0: "DTG", 1: "RAL", 2: "EVG", 3: "Not Applied"},
    "Comp. NNRTI":     {0: "NVP", 1: "EFV", 2: "RPV", 3: "Not Applied"},
    "Extra PI":        {0: "DRV", 1: "RTVB", 2: "LPV", 3: "RTV", 4: "ATV", 5: "Not Applied"},
    "Extra pk-En":     {0: "Fasle", 1: "True"}
}, inplace=True)

In [None]:
All_Data.head(n = 5)

Unnamed: 0,VL,CD4,Rel CD4,Gender,Ethnic,Base Drug Combo,Comp. INI,Comp. NNRTI,Extra PI,Extra pk-En,PatientID,Timestep
0,29.944271,793.4583,30.834505,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,0
1,29.24198,467.4189,30.35598,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,1
2,28.748991,465.12485,30.40532,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,2
3,28.101835,692.0069,30.248816,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,3
4,28.813837,641.75714,29.944712,Male,Caucasian,FTC + TDF,DTG,Not Applied,Not Applied,Fasle,0,4


In [None]:
Cur_Len = 60
arr12 = All_Data.values.reshape((-1, Cur_Len, 12))

demo = arr12[0, :5, :]   # patient 0, timesteps 0..5
print("###===###")
for first_5_idx in range(5):
  print(demo[first_5_idx, :])
  print("#---")

###===###
[29.944271 793.4583 30.834505 'Male' 'Caucasian' 'FTC + TDF' 'DTG'
 'Not Applied' 'Not Applied' 'Fasle' 0 0]
#---
[29.24198 467.4189 30.35598 'Male' 'Caucasian' 'FTC + TDF' 'DTG'
 'Not Applied' 'Not Applied' 'Fasle' 0 1]
#---
[28.748991 465.12485 30.40532 'Male' 'Caucasian' 'FTC + TDF' 'DTG'
 'Not Applied' 'Not Applied' 'Fasle' 0 2]
#---
[28.101835 692.0069 30.248816 'Male' 'Caucasian' 'FTC + TDF' 'DTG'
 'Not Applied' 'Not Applied' 'Fasle' 0 3]
#---
[28.813837 641.75714 29.944712 'Male' 'Caucasian' 'FTC + TDF' 'DTG'
 'Not Applied' 'Not Applied' 'Fasle' 0 4]
#---


In [None]:
demo = arr12[1, :5, :]   # patient 1, timesteps 0..5
print("###===###")
for first_5_idx in range(5):
  print(demo[first_5_idx, :])
  print("#---")

###===###
[31409.234 215.05347 12.571113 'Male' 'Other' 'FTC + TDF' 'Not Applied'
 'RPV' 'Not Applied' 'Fasle' 1 0]
#---
[12623.555 376.45508 24.313864 'Male' 'Other' 'FTC + TDF' 'Not Applied'
 'RPV' 'Not Applied' 'Fasle' 1 1]
#---
[32045.723 446.03433 23.397942 'Male' 'Other' 'FTC + TDF' 'Not Applied'
 'RPV' 'Not Applied' 'Fasle' 1 2]
#---
[8450.791 301.51492 15.54874 'Male' 'Other' 'FTC + TDF' 'Not Applied'
 'RPV' 'Not Applied' 'Fasle' 1 3]
#---
[20871.664 400.9121 29.138647 'Male' 'Other' 'FTC + TDF' 'Not Applied'
 'RPV' 'Not Applied' 'Fasle' 1 4]
#---
