In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [2]:
#########################################
# 1. Data Loading and Preprocessing
#########################################

# Load the Excel file (make sure you have openpyxl installed: pip install openpyxl)
data = pd.read_excel("colored_columns_output_filtered.xlsx")  # Replace with your file path

# Define the 7 input columns.
input_cols = [
    "SchiefstandBewegungMmDurchschnitt_links",
    "SchiefstandBewegungMmDurchschnitt_rechts",
    "SchiefstandRuheMmDurchschnitt_links",
    "SchiefstandRuheMmDurchschnitt_rechts",
    "AuftrittDurchschnitt_links",
    "AuftrittDurchschnitt_rechts",
    "Schuhgröße"
]

# Define the target columns.
binary_target_cols = [
    "Schmerz_Vorfuß_Links", "Schmerz_Vorfuß_Rechts", 
    "Schmerz_Mittelfuß_Links", "Schmerz_Mittelfuß_Rechts",
    "Schmerz_Ferse_Links", "Schmerz_Ferse_Rechts",
    "Schmerz_Handgelenk_links", "Schmerz_Handgelenk_rechts",	
    "Schmerz_Ellenbogen_links", "Schmerz_Ellenbogen_rechts",	
    "Schmerz_Finger_links",	"Schmerz_Finger_rechts",	
    "Schmerz_Oberarm_links", "Schmerz_Oberarm_rechts",	
    "Schmerz_Daumen_links",	"Schmerz_Daumen_rechts",	
    "Schmerz_Unterarm_links", "Schmerz_Unterarm_rechts"
]

# Assume that any "Schmerz_*" column that is not in binary_target_cols is ordinal.
all_schmerz_cols = [col for col in data.columns if col.startswith("Schmerz_")]
ordinal_target_cols = [col for col in all_schmerz_cols if col not in binary_target_cols]

data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(subset=ordinal_target_cols, inplace=True)

# Convert binary targets to 0/1.
for col in binary_target_cols:
    data[col] = data[col].map({True: 1, 'TRUE': 1, 'True': 1,
                               False: 0, 'FALSE': 0, 'False': 0})

# Ensure ordinal targets are integers.
for col in ordinal_target_cols:
    data[col] = data[col].astype(int)

# Extract features and targets.
X = data[input_cols].values.astype(np.float32)
y_binary = data[binary_target_cols].values.astype(np.float32)
y_ordinal = data[ordinal_target_cols].values.astype(np.int64)  # each column: class labels 0–5

# Split the data into training and test sets (80/20 split).
X_train, X_test, yb_train, yb_test, yo_train, yo_test = train_test_split(
    X, y_binary, y_ordinal, test_size=0.2, random_state=42
)

# Scale the input features.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
#########################################
# 2. PyTorch Dataset and DataLoader
#########################################

class PainDataset(Dataset):
    def __init__(self, X, y_binary, y_ordinal):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y_binary = torch.tensor(y_binary, dtype=torch.float32)
        self.y_ordinal = torch.tensor(y_ordinal, dtype=torch.long)  # shape: (n_samples, num_ordinal)
        
    def __len__(self):
        return len(self.X)
        
    def __getitem__(self, idx):
        return self.X[idx], self.y_binary[idx], self.y_ordinal[idx]

# Create dataset instances.
train_dataset = PainDataset(X_train, yb_train, yo_train)
test_dataset  = PainDataset(X_test, yb_test, yo_test)

# Create DataLoaders.
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [5]:
#########################################
# 3. Define the Multi–Task PyTorch Model
#########################################

class MultiTaskModel(nn.Module):
    def __init__(self, input_dim, num_binary, num_ordinal, num_ordinal_classes=6):
        super(MultiTaskModel, self).__init__()
        # Shared layers.
        self.shared_fc1 = nn.Linear(input_dim, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.dropout1 = nn.Dropout(0.2)
        self.shared_fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.dropout2 = nn.Dropout(0.2)
        
        # Binary head: outputs logits for each binary target.
        self.binary_out = nn.Linear(32, num_binary)
        
        # Ordinal heads using CORAL: For K ordinal classes, output K-1 logits.
        # Each head outputs (num_ordinal_classes-1) logits.
        self.num_thresholds = num_ordinal_classes - 1
        self.ordinal_heads = nn.ModuleList([
            nn.Sequential(
                nn.Linear(32, 16),
                nn.ReLU(),
                nn.Dropout(0.1),
                nn.Linear(16, self.num_thresholds)
            ) for _ in range(num_ordinal)
        ])
        
    def forward(self, x):
        # Shared trunk.
        x = F.relu(self.shared_fc1(x))
        x = self.bn1(x)
        x = self.dropout1(x)
        x = F.relu(self.shared_fc2(x))
        x = self.bn2(x)
        x = self.dropout2(x)
        
        # Binary outputs (logits; later passed to BCEWithLogitsLoss).
        binary_logits = self.binary_out(x)
        
        # Ordinal outputs: one tensor per ordinal target (each of shape [batch_size, num_thresholds]).
        ordinal_logits = [head(x) for head in self.ordinal_heads]
        return binary_logits, ordinal_logits

# Instantiate the model.
input_dim = 7  # number of input features
num_binary = len(binary_target_cols)
num_ordinal = len(ordinal_target_cols)
num_ordinal_classes = 6  # e.g., pain levels 0-5

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiTaskModel(input_dim, num_binary, num_ordinal, num_ordinal_classes).to(device)

In [6]:
#########################################
# 4. Training Setup
#########################################

# Define the loss for binary targets.
criterion_binary = nn.BCEWithLogitsLoss()  # for binary targets

# Define the custom CORAL loss for ordinal targets.
def coral_loss(logits, labels):
    """
    logits: tensor of shape (batch_size, K-1)
    labels: tensor of shape (batch_size,), with integer class labels in {0,1,...,K-1}
    """
    # Create binary labels for each threshold: for threshold k, target is 1 if label > k.
    batch_size, num_thresholds = logits.size()
    # Expand thresholds [0, 1, ..., num_thresholds-1] to match batch_size.
    thresholds = torch.arange(num_thresholds, device=labels.device).unsqueeze(0).expand(batch_size, -1)
    # Create binary target: 1 if label > threshold, else 0.
    target = (labels.unsqueeze(1) > thresholds).float()
    
    # Compute binary cross entropy loss for each threshold.
    loss = F.binary_cross_entropy_with_logits(logits, target)
    return loss

# Optionally, set weights for each loss term (adjust as needed).
loss_weight_binary = 1.0
loss_weight_ordinal = 1.0

# Use the Adam optimizer.
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Set up a learning rate scheduler (for example, ReduceLROnPlateau).
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', 
                                                       factor=0.5, patience=5, verbose=True)

num_epochs = 50  # adjust as needed



In [7]:
#########################################
# 5. Training Loop
#########################################

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for X_batch, yb_batch, yo_batch in train_loader:
        X_batch = X_batch.to(device)
        yb_batch = yb_batch.to(device)
        yo_batch = yo_batch.to(device)  # shape: [batch_size, num_ordinal]
        
        optimizer.zero_grad()
        binary_logits, ordinal_logits_list = model(X_batch)
        
        # Compute binary loss.
        loss_binary = criterion_binary(binary_logits, yb_batch)
        
        # Compute ordinal loss using the custom CORAL loss for each ordinal target.
        loss_ordinal = 0.0
        # For each ordinal target head, compute its CORAL loss.
        for i, ordinal_logits in enumerate(ordinal_logits_list):
            # yo_batch[:, i] has class labels in {0,...,K-1}
            loss_ordinal += coral_loss(ordinal_logits, yo_batch[:, i])
        
        # Total loss: weighted sum of both components.
        loss = loss_weight_binary * loss_binary + loss_weight_ordinal * loss_ordinal
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * X_batch.size(0)
    
    avg_loss = total_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss:.4f}")
    
    # Step the scheduler based on the average loss.
    scheduler.step(avg_loss)

Epoch 1/50, Training Loss: 13.0068
Epoch 2/50, Training Loss: 12.0387
Epoch 3/50, Training Loss: 11.2948
Epoch 4/50, Training Loss: 10.9307
Epoch 5/50, Training Loss: 10.7485
Epoch 6/50, Training Loss: 10.6838
Epoch 7/50, Training Loss: 10.6127
Epoch 8/50, Training Loss: 10.5549
Epoch 9/50, Training Loss: 10.5380
Epoch 10/50, Training Loss: 10.4907
Epoch 11/50, Training Loss: 10.4840
Epoch 12/50, Training Loss: 10.4587
Epoch 13/50, Training Loss: 10.4490
Epoch 14/50, Training Loss: 10.4617
Epoch 15/50, Training Loss: 10.4298
Epoch 16/50, Training Loss: 10.4130
Epoch 17/50, Training Loss: 10.3860
Epoch 18/50, Training Loss: 10.3960
Epoch 19/50, Training Loss: 10.3983
Epoch 20/50, Training Loss: 10.3817
Epoch 21/50, Training Loss: 10.3789
Epoch 22/50, Training Loss: 10.3697
Epoch 23/50, Training Loss: 10.3671
Epoch 24/50, Training Loss: 10.3755
Epoch 25/50, Training Loss: 10.3419
Epoch 26/50, Training Loss: 10.3507
Epoch 27/50, Training Loss: 10.3360
Epoch 28/50, Training Loss: 10.3377
E

In [8]:
#########################################
# 7. Inference Example
#########################################

model.eval()
with torch.no_grad():
    # Assume X_new is a numpy array of shape (n_samples, 7) (and already scaled!)
    # For demonstration, we use X_test.
    X_new = torch.tensor(X_test, dtype=torch.float32).to(device)
    binary_logits, ordinal_logits_list = model(X_new)
    
    # For binary outputs: apply sigmoid to get probabilities.
    binary_preds = torch.sigmoid(binary_logits)
    
    # For ordinal outputs using CORAL: count how many thresholds are passed.
    ordinal_preds = []
    for logits in ordinal_logits_list:
        # Compute probabilities for each threshold.
        prob_thresholds = torch.sigmoid(logits)
        # For each instance, count thresholds with probability > 0.5.
        # This count gives the predicted class (in {0,...,K-1}).
        pred_class = (prob_thresholds > 0.5).sum(dim=1)
        # If you need to convert to a different scale (e.g., 1–6 instead of 0–5), add 1.
        ordinal_preds.append(pred_class.cpu().numpy() + 1)
    
    # For example, print first 5 predictions.
    print("Binary predictions (first 5):")
    print(binary_preds[:5].cpu().numpy())
    print("Ordinal predictions (first ordinal target, first 5):")
    print(ordinal_preds[0][:5])

Binary predictions (first 5):
[[0.38379854 0.40166175 0.33546326 0.3647682  0.18747738 0.19689755
  0.15379342 0.20028712 0.07101373 0.10339792 0.21425523 0.27681246
  0.1677659  0.22043556 0.14158237 0.1467198  0.06142606 0.07107121]
 [0.49840266 0.44312155 0.39570817 0.42525753 0.25400436 0.24262118
  0.24148947 0.31179896 0.14494485 0.21396242 0.34880567 0.40847367
  0.24583411 0.34780803 0.2582178  0.32210392 0.11206195 0.11801917]
 [0.41499674 0.40447053 0.33111814 0.37698865 0.21661083 0.21829653
  0.12861454 0.1824023  0.11278104 0.13221511 0.21244527 0.2155318
  0.14272492 0.19313501 0.1321195  0.13210255 0.06439297 0.12196535]
 [0.36432073 0.38618973 0.2643989  0.3047712  0.22311851 0.25332028
  0.19281267 0.23335063 0.16253036 0.16305634 0.25093216 0.28314835
  0.15621722 0.15561493 0.18524674 0.182464   0.09820769 0.14699021]
 [0.3645084  0.2619948  0.31710893 0.28480968 0.28303197 0.23915073
  0.15096016 0.18667065 0.10777742 0.11775968 0.2737351  0.23522149
  0.18061462 0.

In [9]:
#########################################
# 7. Inference Example
#########################################

# To make predictions on new data:
model.eval()
with torch.no_grad():
    # Assume X_new is a numpy array of shape (n_samples, 7) (and already scaled!)
    # For demonstration, we use X_test.
    X_new = torch.tensor(X_test, dtype=torch.float32).to(device)
    binary_logits, ordinal_logits_list = model(X_new)
    
    # For binary outputs: apply sigmoid to get probabilities.
    binary_preds = torch.sigmoid(binary_logits)
    # For ordinal outputs: take the argmax to get the predicted class (0-4).
    ordinal_preds = [torch.argmax(logits, dim=1) for logits in ordinal_logits_list]
    
    # If needed, convert ordinal_preds back to the original 1–5 scale:
    ordinal_preds = [preds.cpu().numpy() + 1 for preds in ordinal_preds]
    
    # For example, print first 5 predictions.
    print("Binary predictions (first 5):")
    print(binary_preds[:5].cpu().numpy())
    print("Ordinal predictions (first ordinal target, first 5):")
    print(ordinal_preds[0][:5])

Binary predictions (first 5):
[[0.38379854 0.40166175 0.33546326 0.3647682  0.18747738 0.19689755
  0.15379342 0.20028712 0.07101373 0.10339792 0.21425523 0.27681246
  0.1677659  0.22043556 0.14158237 0.1467198  0.06142606 0.07107121]
 [0.49840266 0.44312155 0.39570817 0.42525753 0.25400436 0.24262118
  0.24148947 0.31179896 0.14494485 0.21396242 0.34880567 0.40847367
  0.24583411 0.34780803 0.2582178  0.32210392 0.11206195 0.11801917]
 [0.41499674 0.40447053 0.33111814 0.37698865 0.21661083 0.21829653
  0.12861454 0.1824023  0.11278104 0.13221511 0.21244527 0.2155318
  0.14272492 0.19313501 0.1321195  0.13210255 0.06439297 0.12196535]
 [0.36432073 0.38618973 0.2643989  0.3047712  0.22311851 0.25332028
  0.19281267 0.23335063 0.16253036 0.16305634 0.25093216 0.28314835
  0.15621722 0.15561493 0.18524674 0.182464   0.09820769 0.14699021]
 [0.3645084  0.2619948  0.31710893 0.28480968 0.28303197 0.23915073
  0.15096016 0.18667065 0.10777742 0.11775968 0.2737351  0.23522149
  0.18061462 0.