In [1]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from tqdm import trange

from bikebench.prediction.prediction_utils import TorchStandardScaler, Preprocessor
from bikebench.prediction import aesthetics_predictor
from bikebench.data_loading import data_loading
from bikebench.resource_utils import models_and_scalers_path
from bikebench.prediction.prediction_utils import TorchStandardScaler
from bikebench.prediction.aesthetics_predictor import get_aesthetics_model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
X_tv, Y_tv = data_loading.load_aesthetics_train()

In [3]:
X_tv_tens = torch.tensor(X_tv.values, dtype=torch.float32, device=device)
Y_tv_tens = torch.tensor(Y_tv, dtype=torch.float32, device=device)

In [4]:
X_tv_tens = aesthetics_predictor.remove_wall_thickness_and_material(X_tv_tens, device)

In [5]:
scaler = TorchStandardScaler().to(device)
scaler.fit(X_tv_tens)


scaler_path = models_and_scalers_path("aesthetics_scaler.pt")
torch.save(scaler, scaler_path)

X_tv_tens = torch.tensor(scaler.transform(X_tv_tens), dtype=torch.float32, device=device)

  X_tv_tens = torch.tensor(scaler.transform(X_tv_tens), dtype=torch.float32, device=device)


In [6]:
X_train, X_val, Y_train, Y_val = train_test_split(X_tv_tens, Y_tv_tens, test_size=0.2, random_state=42)

In [7]:
model = get_aesthetics_model(dropout_on=True).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)

# Training loop
num_epochs = 200 
batch_size = 32
best_val_loss = float('inf')
best_model = None

bar = trange(num_epochs, desc="Training")
for epoch in bar:
    model.train()
    permutation = torch.randperm(X_train.size(0))
    for i in range(0, X_train.size(0), batch_size):
        indices = permutation[i : i + batch_size]
        batch_x, batch_y = X_train[indices], Y_train[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, Y_val)

    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        best_model = model

    bar.set_postfix({
        'loss': f'{loss.item():.4f}',
        'val_loss': f'{val_loss.item():.4f}',
        'best_val': f'{best_val_loss:.4f}'
    })


Training: 100%|██████████| 200/200 [4:19:40<00:00, 77.90s/it, loss=0.0028, val_loss=0.0026, best_val=0.0026]  


In [None]:
save_path = models_and_scalers_path("aesthetics_model_weights.pt")
# torch.save(best_model.state_dict(), save_path)

In [9]:
state  = torch.load(save_path, weights_only=True)

model = get_aesthetics_model(dropout_on = False).to(device)
model.load_state_dict(state)

<All keys matched successfully>

In [10]:
import evaluators

evaluators.evaluate_aesthetics(model, Preprocessor(scaler_path=scaler_path, preprocess_fn=aesthetics_predictor.remove_wall_thickness_and_material, device=device), device=device)

Predicted embedding more similar to GT than : 99.57% of test set designs, on average.


0.0060665542259812355