In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn, optim
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

### Load dataset

In [3]:
def create_dataset(train_x: torch.Tensor, train_y: torch.Tensor, test_size=0.3):
    indices = torch.randperm(train_x.shape[0])
    test_size = int(train_x.shape[0] * test_size)
    test_indices, train_indices = indices[:test_size], indices[test_size:]
    return {'train_input': train_x[train_indices].to(device), 'test_input': train_x[test_indices].to(device),
            'train_label': train_y[train_indices].to(device), 'test_label': train_y[test_indices].to(device)}

In [4]:
# Read the train set files 
df_x = pd.read_csv('dataset/X_train.csv').drop(columns=['id'])
df_y = pd.read_csv('dataset/y_train.csv').drop(columns=['id', 'faulty'])
df_y['trq_target'] = df_x['trq_measured'] / (df_y['trq_margin'] / 100 + 1)
df_x_normalized = (df_x - df_x.mean()) / df_x.std()
df_y_normalized = (df_y['trq_target'] - df_y['trq_target'].mean()) / df_y['trq_target'].std()
df_x_normalized.drop(columns=['trq_measured'], inplace=True)

train_x_1 = torch.tensor(df_x_normalized.values, dtype=torch.float32, device=device)
train_y_1 = torch.tensor(df_y_normalized.values, dtype=torch.float32, device=device).unsqueeze(1)
assert (train_x_1.shape[0] == train_y_1.shape[0])

In [5]:
# Downsample to 10%
random_indices = np.random.choice(train_x_1.shape[0], size=train_x_1.shape[0] // 10, replace=False)
train_x_10 = train_x_1[random_indices]
train_y_10 = train_y_1[random_indices]
# Downsample to 1%
random_indices = np.random.choice(train_x_1.shape[0], size=train_x_1.shape[0] // 100, replace=False)
train_x_100 = train_x_1[random_indices]
train_y_100 = train_y_1[random_indices]

In [24]:
def train(model, optimizer, inputs: torch.Tensor, labels, epochs=5, batch_size=100, deterministic=False):
    criterion_stochastic = nn.GaussianNLLLoss()
    criterion_deterministic = nn.MSELoss()
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.999)
    for epoch in range(epochs):
        model.train()
        with tqdm(range(inputs.size(0) // batch_size)) as pbar:
            for i in pbar:
                x = inputs[i * batch_size:(i + 1) * batch_size].to(device)
                y = labels[i * batch_size:(i + 1) * batch_size].to(device)
                optimizer.zero_grad()
                if deterministic:
                    output = model(x)
                    loss = criterion_deterministic(output, y)
                else:
                    mu, var = model(x)
                    loss = criterion_stochastic(mu, y, var)
                loss.backward()
                optimizer.step(closure=lambda: loss)
                pbar.set_postfix(loss=loss.item(), lr=optimizer.param_groups[0]['lr'])
                scheduler.step()

### MLP training

In [12]:
class MLP(nn.Module):
    def __init__(self, layers_size, deterministic=False):
        super(MLP, self).__init__()
        layers = []
        for i in range(len(layers_size) - 1):
            layers.append(nn.Linear(layers_size[i], layers_size[i + 1]))
            if i < len(layers_size) - 2:  # Add activation for all layers except the last one
                layers.append(nn.Sigmoid())
        self.model = nn.Sequential(*layers)
        self.deterministic = deterministic

    def forward(self, x):
        output = self.model(x)
        if self.deterministic:
            return output
        else:
            mu = output[:, 0]
            var = F.softplus(output[:, 1]) + 1e-6
            return mu, var

In [13]:
mlp = MLP([6, 256, 256, 1], deterministic=True).to(device)

In [25]:
train(mlp, optim.Adam(mlp.parameters(), lr=0.0004), train_x_10, train_y_10, epochs=50,
      batch_size=1024, deterministic=True)

100%|██████████| 72/72 [00:00<00:00, 166.56it/s, loss=0.000323, lr=0.000373]
100%|██████████| 72/72 [00:00<00:00, 321.93it/s, loss=0.000318, lr=0.000347]
100%|██████████| 72/72 [00:00<00:00, 312.90it/s, loss=0.000315, lr=0.000323]
100%|██████████| 72/72 [00:00<00:00, 329.93it/s, loss=0.00031, lr=0.0003]   
100%|██████████| 72/72 [00:00<00:00, 323.61it/s, loss=0.000306, lr=0.000279]
100%|██████████| 72/72 [00:00<00:00, 312.98it/s, loss=0.000301, lr=0.00026] 
100%|██████████| 72/72 [00:00<00:00, 327.26it/s, loss=0.000297, lr=0.000242]
100%|██████████| 72/72 [00:00<00:00, 319.92it/s, loss=0.000293, lr=0.000225]
100%|██████████| 72/72 [00:00<00:00, 339.72it/s, loss=0.000289, lr=0.000209]
100%|██████████| 72/72 [00:00<00:00, 327.12it/s, loss=0.000285, lr=0.000195]
100%|██████████| 72/72 [00:00<00:00, 313.00it/s, loss=0.000281, lr=0.000181]
100%|██████████| 72/72 [00:00<00:00, 327.71it/s, loss=0.000278, lr=0.000169]
100%|██████████| 72/72 [00:00<00:00, 318.57it/s, loss=0.000275, lr=0.000157]

In [30]:
torch.save(mlp.state_dict(), 'models/torque_target_deterministic_mlp.pt')

In [32]:
mlp.load_state_dict(torch.load('models/torque_target_deterministic_mlp.pt', weights_only=True))
mlp.eval()
for i in range(10):
    x_test = train_x_1[i].unsqueeze(0)
    y_test = train_y_1[i].unsqueeze(0)
    print(y_test[0].item(), mlp(x_test))

-0.2830093502998352 tensor([[-0.2956]], device='cuda:0', grad_fn=<AddmmBackward0>)
-1.267451524734497 tensor([[-1.2753]], device='cuda:0', grad_fn=<AddmmBackward0>)
-0.4435439705848694 tensor([[-0.4572]], device='cuda:0', grad_fn=<AddmmBackward0>)
-0.3035033643245697 tensor([[-0.3209]], device='cuda:0', grad_fn=<AddmmBackward0>)
-0.5718362927436829 tensor([[-0.5891]], device='cuda:0', grad_fn=<AddmmBackward0>)
-0.8585125803947449 tensor([[-0.8682]], device='cuda:0', grad_fn=<AddmmBackward0>)
1.492055892944336 tensor([[1.4708]], device='cuda:0', grad_fn=<AddmmBackward0>)
-0.8199540972709656 tensor([[-0.8420]], device='cuda:0', grad_fn=<AddmmBackward0>)
1.0689382553100586 tensor([[1.0729]], device='cuda:0', grad_fn=<AddmmBackward0>)
2.143380641937256 tensor([[2.1397]], device='cuda:0', grad_fn=<AddmmBackward0>)
