In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [2]:
# Generate synthetic data
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7ff9993addf0>

In [3]:
# Creating random input data
num_samples = 1000

# Generating input_A (5 features)
x_a = np.random.rand(num_samples, 5).astype(np.float32)

# Generating input_B (6 features)
x_b = np.random.rand(num_samples, 6).astype(np.float32)

# Generating output (target variable for main output and auxiliary output)
# Let's assume the output for main and auxiliary tasks are correlated to some extent
# Generating main output (target for main task)
y_main = 3 * x_a[:, 0] + 2 * x_a[:, 1] - 5 * x_a[:, 2] + np.random.randn(num_samples)

# Generating auxiliary output (target for auxiliary task)
y_aux = 0.5 * x_b[:, 0] + 1.5 * x_b[:, 1] + np.random.randn(num_samples)

# Splitting data into train and validation sets
X_train_A, X_valid_A, X_train_B, X_valid_B, y_train_main, y_valid_main, y_train_aux, y_valid_aux = train_test_split(
    x_a, x_b, y_main, y_aux, test_size=0.2, random_state=42)

In [4]:
# Define the model
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.fc1 = nn.Linear(6, 30)
        self.bn1 = nn.BatchNorm1d(30)
        self.fc2 = nn.Linear(30, 30)
        self.main_output = nn.Linear(35, 1)  # 5 (wide_input) + 30 (hidden2)
        self.aux_output = nn.Linear(30, 1)

    def forward(self, input_A, input_B):
        x = F.relu(self.fc1(input_B))
        x = self.bn1(x)
        x = (self.fc2(x))
        
        # Concatenate input_A with hidden2_output
        concat = torch.cat((input_A, x), dim=1)
        
        main_output = self.main_output(concat)
        aux_output = self.aux_output(x)
        
        return main_output, aux_output

In [9]:
# Convert your data to torch.float32 using clone().detach()
X_train_A = X_train_A.clone().detach().float()
X_train_B = X_train_B.clone().detach().float()
y_train_main = y_train_main.clone().detach().float().view(-1, 1)
y_train_aux = y_train_aux.clone().detach().float().view(-1, 1)

X_valid_A = X_valid_A.clone().detach().float()
X_valid_B = X_valid_B.clone().detach().float()
y_valid_main = y_valid_main.clone().detach().float().view(-1, 1)
y_valid_aux = y_valid_aux.clone().detach().float().view(-1, 1)


In [10]:
# Create model instance
model = CustomModel()

# Define optimizer and loss functions for each output
optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion_main = nn.MSELoss()
criterion_aux = nn.MSELoss()

# Assuming X_train_A, X_train_B, y_train, X_valid_A, X_valid_B, y_valid are torch tensors
train_dataset = TensorDataset(X_train_A, X_train_B, y_train_main, y_train_aux)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

valid_dataset = TensorDataset(X_valid_A, X_valid_B, y_valid_main, y_valid_aux)
valid_loader = DataLoader(valid_dataset, batch_size=32)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    epoch_main_loss = 0.0
    epoch_aux_loss = 0.0

    for input_A, input_B, target_main, target_aux in train_loader:
        optimizer.zero_grad()
        output_main, output_aux = model(input_A, input_B)
        loss_main = criterion_main(output_main, target_main)
        loss_aux = criterion_aux(output_aux, target_aux)
        total_loss = 0.9 * loss_main + 0.1 * loss_aux  # Weighted combination of losses
        total_loss.backward()
        optimizer.step()

        epoch_main_loss += loss_main.item() * input_A.size(0)
        epoch_aux_loss += loss_aux.item() * input_A.size(0)

    epoch_main_loss /= len(train_loader.dataset)
    epoch_aux_loss /= len(train_loader.dataset)

    # Validation
    model.eval()
    with torch.no_grad():
        valid_main_loss = 0.0
        valid_aux_loss = 0.0
        for input_A, input_B, target_main, target_aux in valid_loader:
            output_main, output_aux = model(input_A, input_B)
            valid_main_loss += criterion_main(output_main, target_main).item() * input_A.size(0)
            valid_aux_loss += criterion_aux(output_aux, target_aux).item() * input_A.size(0)

        valid_main_loss /= len(valid_loader.dataset)
        valid_aux_loss /= len(valid_loader.dataset)

    print(f"Epoch [{epoch + 1}/{num_epochs}] "
          f"Main Loss: Train {epoch_main_loss:.4f}, Valid {valid_main_loss:.4f} "
          f"Aux Loss: Train {epoch_aux_loss:.4f}, Valid {valid_aux_loss:.4f}")

# Use the trained model for predictions, etc.

Epoch [1/20] Main Loss: Train 4.3268, Valid 3.7887 Aux Loss: Train 1.6637, Valid 1.6789
Epoch [2/20] Main Loss: Train 4.2782, Valid 3.7871 Aux Loss: Train 1.6513, Valid 1.6892
Epoch [3/20] Main Loss: Train 4.2501, Valid 3.7577 Aux Loss: Train 1.6450, Valid 1.7024
Epoch [4/20] Main Loss: Train 4.1867, Valid 3.7392 Aux Loss: Train 1.6270, Valid 1.6786
Epoch [5/20] Main Loss: Train 4.1784, Valid 3.7170 Aux Loss: Train 1.6200, Valid 1.6690
Epoch [6/20] Main Loss: Train 4.1173, Valid 3.6940 Aux Loss: Train 1.5940, Valid 1.6644
Epoch [7/20] Main Loss: Train 4.1198, Valid 3.6716 Aux Loss: Train 1.5852, Valid 1.6438
Epoch [8/20] Main Loss: Train 4.0855, Valid 3.6438 Aux Loss: Train 1.5948, Valid 1.6410
Epoch [9/20] Main Loss: Train 4.0499, Valid 3.6170 Aux Loss: Train 1.5731, Valid 1.6201
Epoch [10/20] Main Loss: Train 4.0190, Valid 3.5941 Aux Loss: Train 1.5632, Valid 1.6270
Epoch [11/20] Main Loss: Train 3.9890, Valid 3.5714 Aux Loss: Train 1.5462, Valid 1.6216
Epoch [12/20] Main Loss: Train