In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv("../data/physionet_wo_missing.csv", index_col=0)  # if first column is an index
# df.head()

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

data_np = df.values  # shape: (num_samples, num_features)

data_scaled_np = scaler.fit_transform(data_np)

print("Shape of scaled data:", data_scaled_np.shape)

print("Mean per column after scaling:", np.mean(data_scaled_np, axis=0))
print("Std per column after scaling:", np.std(data_scaled_np, axis=0))

Shape of scaled data: (1598, 39)
Mean per column after scaling: [ 8.89290032e-18  4.44645016e-17 -7.11432026e-17 -5.78038521e-17
  0.00000000e+00 -4.44645016e-17  4.89109518e-17 -1.11161254e-17
  1.35616730e-16  1.47844468e-16  3.11251511e-17 -2.53447659e-16
 -4.89109518e-17  8.89290032e-18  8.00361029e-17 -1.05769933e-15
 -1.22277379e-15  4.22412765e-17  4.00180515e-17  1.88974132e-16
 -6.73637199e-16  6.26393666e-16 -4.40198566e-16 -1.29502861e-16
 -5.33574019e-17 -4.40198566e-16  4.53537916e-16 -2.77903135e-17
 -2.62340560e-16  3.37930212e-16  3.94622452e-17 -1.40063180e-16
 -1.26723830e-15  1.70076719e-16  1.37839955e-16  3.21256024e-16
 -2.93465711e-16 -5.33574019e-17  0.00000000e+00]
Std per column after scaling: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [2]:
data_full = torch.tensor(data_scaled_np, dtype=torch.float32).to(device)

print("Final tensor shape:", data_full.shape)

Final tensor shape: torch.Size([1598, 39])


In [None]:
num_samples, num_features = data_full.shape
print("Data shape:", data_full.shape)

In [9]:
class MaskedImputationDataset(torch.utils.data.Dataset):
    def __init__(self, data_tensor, mask_ratio=0.1):
        super().__init__()
        self.data = data_tensor
        self.mask_ratio = mask_ratio
        self.num_samples = data_tensor.shape[0]
        self.num_features = data_tensor.shape[1]

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        row = self.data[idx]

        mask = (torch.rand(self.num_features) < self.mask_ratio).float()

        masked_row = row.clone()
        masked_row[mask == 1] = 0.0

        return masked_row, mask, row

In [10]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len):
        super().__init__()
        self.col_embed = nn.Embedding(max_len, d_model)

    def forward(self, x):
        batch_size, num_features, d_model = x.size()

        positions = torch.arange(0, num_features, device=x.device).unsqueeze(0).repeat(batch_size, 1)

        pe = self.col_embed(positions)

        x = x + pe
        return x

class TabTransformer(nn.Module):
    def __init__(self, num_features, d_model=64, nhead=4, num_layers=2):

        super().__init__()
        self.num_features = num_features
        self.d_model = d_model

        self.value_embedding = nn.Linear(1, d_model)

        self.pos_encoder = PositionalEncoding(d_model, max_len=num_features)

        encoder_layers = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=256,
            dropout=0.1,
            activation='relu'
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)

        self.output_layer = nn.Linear(d_model, 1)

    def forward(self, x, mask=None):

        batch_size, num_features = x.size()

        x = x.unsqueeze(-1)

        x = self.value_embedding(x)

        x = self.pos_encoder(x)

        x = x.transpose(0, 1)

        x = self.transformer_encoder(x)

        x = x.transpose(0, 1)

        x = self.output_layer(x)

        x = x.squeeze(-1)
        return x


In [None]:
train_dataset = MaskedImputationDataset(data_full, mask_ratio=0.1)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

model = TabTransformer(num_features=num_features, d_model=64, nhead=4, num_layers=2).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 20
model.train()
for epoch in range(num_epochs):
    total_loss = 0.0
    for batch in train_loader:
        masked_row, mask, original_row = batch

        optimizer.zero_grad()
        outputs = model(masked_row)

        loss = criterion(outputs[mask == 1], original_row[mask == 1])

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.4f}")


In [None]:
def create_test_masked_data(data_tensor, mask_ratio=0.1):

    mask = (torch.rand_like(data_tensor) < mask_ratio).float()
    masked_data = data_tensor.clone()
    masked_data[mask == 1] = 0.0
    return masked_data, mask, data_tensor

data_test = data_full.clone()

masked_test, test_mask, original_test = create_test_masked_data(data_test, mask_ratio=0.1)

model.eval()
with torch.no_grad():
    predictions = model(masked_test)

masked_indices = (test_mask == 1)

pred_vals = predictions[masked_indices]
true_vals = original_test[masked_indices]

mse = torch.mean((pred_vals - true_vals) ** 2)
rmse = torch.sqrt(mse)

val_min = torch.min(true_vals)
val_max = torch.max(true_vals)
norm_factor = val_max - val_min

nrmse = (rmse / norm_factor).item()

print(f"Test RMSE: {rmse.item():.4f}")
print(f"Test NRMSE: {nrmse:.4f}")
