In [None]:
import torch
from torch import nn
from pipeline import pipeline, preprocessing
from util import sample, generate_submission

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATA_PATH = './data/train.txt'

data_bundle = preprocessing(DATA_PATH)
train_df, val_df, test_df, user_map, movie_map, n_users, n_movies, min_value, max_value = data_bundle
print(f"Users: {n_users}, Movies: {n_movies}")
trained_models = {}

Scaling ratings from [1, 5] to [0, 1].
Users: 943, Movies: 1680


In [46]:
gmf_config = {
    'epochs': 7,
    'batch_size': 512,
    'learning_rate': 0.00065,
    'weight_decay': 1e-5,
    'embedding_dim': 64,
    'device': device
}
trained_models['gmf'] = pipeline('gmf', gmf_config, data_bundle)


Training Model: best_gmf
Epoch 1/7 | Train Loss: 0.0734 | Val Loss: 0.0669 | Val RMSE: 1.0344
Epoch 2/7 | Train Loss: 0.0635 | Val Loss: 0.0581 | Val RMSE: 0.9642
Epoch 3/7 | Train Loss: 0.0549 | Val Loss: 0.0524 | Val RMSE: 0.9152
Epoch 4/7 | Train Loss: 0.0495 | Val Loss: 0.0498 | Val RMSE: 0.8923
Epoch 5/7 | Train Loss: 0.0458 | Val Loss: 0.0488 | Val RMSE: 0.8832
Epoch 6/7 | Train Loss: 0.0422 | Val Loss: 0.0480 | Val RMSE: 0.8766
Epoch 7/7 | Train Loss: 0.0387 | Val Loss: 0.0479 | Val RMSE: 0.8754
Best Val RMSE: 0.8754
Test RMSE: 0.9173


In [63]:
dmf_config = {
    'epochs': 20,
    'batch_size': 512,
    'learning_rate': 0.003,
    'weight_decay': 1e-6,
    'embedding_dim': 64,
    'user_hidden_dims': [64, 32],
    'item_hidden_dims': [64, 32],
    'dropout': 0.25,
    'device': device
}
trained_models['dmf'] = pipeline('dmf', dmf_config, data_bundle)


Training Model: best_dmf
Epoch 1/20 | Train Loss: 0.0712 | Val Loss: 0.0555 | Val RMSE: 0.9427
Epoch 2/20 | Train Loss: 0.0570 | Val Loss: 0.0533 | Val RMSE: 0.9234
Epoch 3/20 | Train Loss: 0.0553 | Val Loss: 0.0523 | Val RMSE: 0.9147
Epoch 4/20 | Train Loss: 0.0547 | Val Loss: 0.0524 | Val RMSE: 0.9157
Epoch 5/20 | Train Loss: 0.0540 | Val Loss: 0.0533 | Val RMSE: 0.9237
Epoch 6/20 | Train Loss: 0.0538 | Val Loss: 0.0518 | Val RMSE: 0.9107
Epoch 7/20 | Train Loss: 0.0536 | Val Loss: 0.0516 | Val RMSE: 0.9085
Epoch 8/20 | Train Loss: 0.0533 | Val Loss: 0.0519 | Val RMSE: 0.9113
Epoch 9/20 | Train Loss: 0.0533 | Val Loss: 0.0515 | Val RMSE: 0.9075
Epoch 10/20 | Train Loss: 0.0531 | Val Loss: 0.0512 | Val RMSE: 0.9050
Epoch 11/20 | Train Loss: 0.0526 | Val Loss: 0.0511 | Val RMSE: 0.9039
Epoch 12/20 | Train Loss: 0.0525 | Val Loss: 0.0515 | Val RMSE: 0.9074
Epoch 13/20 | Train Loss: 0.0524 | Val Loss: 0.0511 | Val RMSE: 0.9045
Epoch 14/20 | Train Loss: 0.0521 | Val Loss: 0.0527 | Val RM

In [47]:
attn_config = {
    'epochs': 11,
    'batch_size': 512,
    'learning_rate': 0.00085,
    'weight_decay': 1e-3,
    'embedding_dim': 64,
    'hidden_dims': [128, 64],
    'n_attention_blocks': 2,
    'n_heads': 4,
    'dropout': 0.3,
    'device': device
}
trained_models['attention'] = pipeline('attention', attn_config, data_bundle)


Training Model: best_attention
Epoch 1/11 | Train Loss: 0.1083 | Val Loss: 0.0665 | Val RMSE: 1.0312
Epoch 2/11 | Train Loss: 0.0650 | Val Loss: 0.0588 | Val RMSE: 0.9703
Epoch 3/11 | Train Loss: 0.0585 | Val Loss: 0.0547 | Val RMSE: 0.9358
Epoch 4/11 | Train Loss: 0.0560 | Val Loss: 0.0548 | Val RMSE: 0.9364
Epoch 5/11 | Train Loss: 0.0549 | Val Loss: 0.0531 | Val RMSE: 0.9220
Epoch 6/11 | Train Loss: 0.0539 | Val Loss: 0.0524 | Val RMSE: 0.9157
Epoch 7/11 | Train Loss: 0.0536 | Val Loss: 0.0527 | Val RMSE: 0.9187
Epoch 8/11 | Train Loss: 0.0530 | Val Loss: 0.0532 | Val RMSE: 0.9222
Epoch 9/11 | Train Loss: 0.0527 | Val Loss: 0.0521 | Val RMSE: 0.9128
Epoch 10/11 | Train Loss: 0.0523 | Val Loss: 0.0519 | Val RMSE: 0.9112
Epoch 11/11 | Train Loss: 0.0520 | Val Loss: 0.0512 | Val RMSE: 0.9054
Best Val RMSE: 0.9054
Test RMSE: 0.9388


In [48]:
ncf_config = {
    'epochs': 15,
    'batch_size': 256,
    'learning_rate': 0.00035,
    'weight_decay': 0.001,
    'dropout': 0.3,
    'freeze_pretrained': False,
    'device': device
}
trained_models['ncf'] = pipeline('ncf', ncf_config, data_bundle, pretrained_models=trained_models)


Training Model: best_ncf
Epoch 1/15 | Train Loss: 0.0638 | Val Loss: 0.0493 | Val RMSE: 0.8880
Epoch 2/15 | Train Loss: 0.0528 | Val Loss: 0.0483 | Val RMSE: 0.8788
Epoch 3/15 | Train Loss: 0.0515 | Val Loss: 0.0483 | Val RMSE: 0.8789
Epoch 4/15 | Train Loss: 0.0505 | Val Loss: 0.0486 | Val RMSE: 0.8822
Epoch 5/15 | Train Loss: 0.0500 | Val Loss: 0.0486 | Val RMSE: 0.8816
Epoch 6/15 | Train Loss: 0.0480 | Val Loss: 0.0480 | Val RMSE: 0.8766
Epoch 7/15 | Train Loss: 0.0471 | Val Loss: 0.0484 | Val RMSE: 0.8804
Epoch 8/15 | Train Loss: 0.0467 | Val Loss: 0.0483 | Val RMSE: 0.8789
Epoch 9/15 | Train Loss: 0.0463 | Val Loss: 0.0488 | Val RMSE: 0.8841
Epoch 10/15 | Train Loss: 0.0447 | Val Loss: 0.0498 | Val RMSE: 0.8929
Epoch 11/15 | Train Loss: 0.0442 | Val Loss: 0.0495 | Val RMSE: 0.8895
Epoch 12/15 | Train Loss: 0.0439 | Val Loss: 0.0501 | Val RMSE: 0.8955
Epoch 13/15 | Train Loss: 0.0430 | Val Loss: 0.0501 | Val RMSE: 0.8956
Epoch 14/15 | Train Loss: 0.0426 | Val Loss: 0.0507 | Val RM

In [66]:
nmf_config = {
    'epochs': 9,
    'batch_size': 1024,
    'learning_rate': 0.0005,
    'weight_decay': 1e-6,
    'embedding_dim': 128,
    'mlp_hidden_dims': [128, 64, 32],
    'dropout': 0.1,
    'device': device
}
trained_models['nmf'] = pipeline('nmf', nmf_config, data_bundle)


Training Model: best_nmf
Epoch 1/9 | Train Loss: 0.2065 | Val Loss: 0.0586 | Val RMSE: 0.9681
Epoch 2/9 | Train Loss: 0.0619 | Val Loss: 0.0550 | Val RMSE: 0.9378
Epoch 3/9 | Train Loss: 0.0587 | Val Loss: 0.0547 | Val RMSE: 0.9359
Epoch 4/9 | Train Loss: 0.0569 | Val Loss: 0.0522 | Val RMSE: 0.9143
Epoch 5/9 | Train Loss: 0.0523 | Val Loss: 0.0502 | Val RMSE: 0.8961
Epoch 6/9 | Train Loss: 0.0464 | Val Loss: 0.0494 | Val RMSE: 0.8893
Epoch 7/9 | Train Loss: 0.0406 | Val Loss: 0.0481 | Val RMSE: 0.8775
Epoch 8/9 | Train Loss: 0.0350 | Val Loss: 0.0489 | Val RMSE: 0.8842
Epoch 9/9 | Train Loss: 0.0297 | Val Loss: 0.0486 | Val RMSE: 0.8817
Best Val RMSE: 0.8775
Test RMSE: 0.9013


In [67]:
lightgcn_config = {
    'epochs': 30,
    'batch_size': 512,
    'learning_rate': 0.002,
    'weight_decay': 1e-6,
    'embedding_dim': 128,
    'n_layers': 6,
    'device': device,
    'train_df': data_bundle[0]
}
trained_models['lightgcn'] = pipeline('lightgcn', lightgcn_config, data_bundle)


Training Model: best_lightgcn
Epoch 1/30 | Train Loss: 0.1484 | Val Loss: 0.1030 | Val RMSE: 1.2837
Epoch 2/30 | Train Loss: 0.0895 | Val Loss: 0.0847 | Val RMSE: 1.1641
Epoch 3/30 | Train Loss: 0.0768 | Val Loss: 0.0751 | Val RMSE: 1.0962
Epoch 4/30 | Train Loss: 0.0698 | Val Loss: 0.0693 | Val RMSE: 1.0532
Epoch 5/30 | Train Loss: 0.0656 | Val Loss: 0.0662 | Val RMSE: 1.0289
Epoch 6/30 | Train Loss: 0.0628 | Val Loss: 0.0633 | Val RMSE: 1.0062
Epoch 7/30 | Train Loss: 0.0610 | Val Loss: 0.0616 | Val RMSE: 0.9930
Epoch 8/30 | Train Loss: 0.0597 | Val Loss: 0.0603 | Val RMSE: 0.9822
Epoch 9/30 | Train Loss: 0.0588 | Val Loss: 0.0592 | Val RMSE: 0.9736
Epoch 10/30 | Train Loss: 0.0581 | Val Loss: 0.0587 | Val RMSE: 0.9693
Epoch 11/30 | Train Loss: 0.0575 | Val Loss: 0.0581 | Val RMSE: 0.9642
Epoch 12/30 | Train Loss: 0.0571 | Val Loss: 0.0575 | Val RMSE: 0.9595
Epoch 13/30 | Train Loss: 0.0567 | Val Loss: 0.0572 | Val RMSE: 0.9570
Epoch 14/30 | Train Loss: 0.0563 | Val Loss: 0.0566 | V

In [11]:
lightgcnpp_config = {
    'epochs': 12,
    'batch_size': 1024,
    'learning_rate': 0.001,
    'weight_decay': 1e-6,

    'embedding_dim': 48,
    'n_layers': 3,

    'device': device,
    'train_df': data_bundle[0]
}
trained_models['lightgcnpp'] = pipeline('lightgcnpp', lightgcnpp_config, data_bundle)


Training Model: best_lightgcnpp
Epoch 1/12 | Train Loss: 0.0702 | Val Loss: 0.0628 | Val RMSE: 1.0020
Epoch 2/12 | Train Loss: 0.0597 | Val Loss: 0.0568 | Val RMSE: 0.9535
Epoch 3/12 | Train Loss: 0.0536 | Val Loss: 0.0533 | Val RMSE: 0.9235
Epoch 4/12 | Train Loss: 0.0494 | Val Loss: 0.0512 | Val RMSE: 0.9050
Epoch 5/12 | Train Loss: 0.0460 | Val Loss: 0.0496 | Val RMSE: 0.8908
Epoch 6/12 | Train Loss: 0.0429 | Val Loss: 0.0485 | Val RMSE: 0.8813
Epoch 7/12 | Train Loss: 0.0399 | Val Loss: 0.0482 | Val RMSE: 0.8782
Epoch 8/12 | Train Loss: 0.0368 | Val Loss: 0.0480 | Val RMSE: 0.8767
Epoch 9/12 | Train Loss: 0.0338 | Val Loss: 0.0481 | Val RMSE: 0.8774
Epoch 10/12 | Train Loss: 0.0308 | Val Loss: 0.0486 | Val RMSE: 0.8817
Epoch 11/12 | Train Loss: 0.0279 | Val Loss: 0.0493 | Val RMSE: 0.8880
Epoch 12/12 | Train Loss: 0.0250 | Val Loss: 0.0496 | Val RMSE: 0.8910
Best Val RMSE: 0.8767
Test RMSE: 0.8973


In [38]:
simgcl_config = {
    'epochs': 30,
    'batch_size': 1024,
    'learning_rate': 0.0017,
    'weight_decay': 1e-6,
    'embedding_dim': 64,
    'n_layers': 3,

    'eps': 0.1,
    'temperature': 0.2,
    'lambda_cl': 0.1,

    'device': device,
    'train_df': data_bundle[0]
}
trained_models['simgcl'] = pipeline('simgcl', simgcl_config, data_bundle)


Training Model: best_simgcl
Epoch 1/30 | Train Loss: 0.7550 | Val Loss: 0.0628 | Val RMSE: 1.0023
Epoch 2/30 | Train Loss: 0.5769 | Val Loss: 0.0581 | Val RMSE: 0.9644
Epoch 3/30 | Train Loss: 0.5605 | Val Loss: 0.0563 | Val RMSE: 0.9488
Epoch 4/30 | Train Loss: 0.5525 | Val Loss: 0.0551 | Val RMSE: 0.9394
Epoch 5/30 | Train Loss: 0.5478 | Val Loss: 0.0546 | Val RMSE: 0.9349
Epoch 6/30 | Train Loss: 0.5446 | Val Loss: 0.0542 | Val RMSE: 0.9310
Epoch 7/30 | Train Loss: 0.5424 | Val Loss: 0.0539 | Val RMSE: 0.9290
Epoch 8/30 | Train Loss: 0.5406 | Val Loss: 0.0537 | Val RMSE: 0.9269
Epoch 9/30 | Train Loss: 0.5394 | Val Loss: 0.0536 | Val RMSE: 0.9262
Epoch 10/30 | Train Loss: 0.5383 | Val Loss: 0.0534 | Val RMSE: 0.9244
Epoch 11/30 | Train Loss: 0.5373 | Val Loss: 0.0535 | Val RMSE: 0.9248
Epoch 12/30 | Train Loss: 0.5364 | Val Loss: 0.0534 | Val RMSE: 0.9243
Epoch 13/30 | Train Loss: 0.5358 | Val Loss: 0.0533 | Val RMSE: 0.9234
Epoch 14/30 | Train Loss: 0.5352 | Val Loss: 0.0534 | Val

In [68]:
ensemble_models = ['lightgcn', 'ncf', 'gmf', 'simgcl']
for model_name in ensemble_models:
    state = torch.load(f"best_{model_name}.pth", map_location="cpu")
    trained_models[model_name].load_state_dict(state)

ensemble_config = {
    'epochs': 8,
    'batch_size': 1024,
    'learning_rate': 0.04,
    'weight_decay': 0.0001,
    'ensemble_models': ensemble_models,
    'learn_weights': True,
    'device': device
}
trained_models['ensemble'] = pipeline('ensemble', ensemble_config, data_bundle, trained_models)


Training Model: best_ensemble
Epoch 1/8 | Train Loss: 0.0468 | Val Loss: 0.0503 | Val RMSE: 0.8969
Epoch 2/8 | Train Loss: 0.0467 | Val Loss: 0.0503 | Val RMSE: 0.8968
Epoch 3/8 | Train Loss: 0.0467 | Val Loss: 0.0502 | Val RMSE: 0.8961
Epoch 4/8 | Train Loss: 0.0467 | Val Loss: 0.0502 | Val RMSE: 0.8959
Epoch 5/8 | Train Loss: 0.0467 | Val Loss: 0.0502 | Val RMSE: 0.8964
Epoch 6/8 | Train Loss: 0.0467 | Val Loss: 0.0502 | Val RMSE: 0.8962
Epoch 7/8 | Train Loss: 0.0467 | Val Loss: 0.0502 | Val RMSE: 0.8963
Epoch 8/8 | Train Loss: 0.0467 | Val Loss: 0.0501 | Val RMSE: 0.8956
Best Val RMSE: 0.8956
Test RMSE: 0.9129


In [49]:
model_name = 'ncf'
state = torch.load(f"best_{model_name}.pth", map_location="cpu")
trained_models[model_name].load_state_dict(state)
trained_models[model_name].eval()
sample(
    model=trained_models[model_name],
    test_df=test_df,
    n_samples=700,
    device=device
)
None

 userId  movieId  rating  predicted_rating  error  absolute_error
    250      271     4.0              3.36  -0.64            0.64
    222      145     2.0              1.81  -0.19            0.19
    889       81     4.0              3.45  -0.55            0.55
    208      523     4.0              3.80  -0.20            0.20
    312      614     4.0              4.08   0.08            0.08
    293      419     3.0              3.22   0.22            0.22
    422      672     3.0              2.74  -0.26            0.26
    220      289     4.0              3.31  -0.69            0.69
    151      736     4.0              4.22   0.22            0.22
    884      640     1.0              3.44   2.44            2.44

----------------------------------------------------------------------------------------------------
RMSE: 0.9383
MAE: 0.7351


In [50]:
submission = generate_submission(
    movie_map=movie_map,
    user_map=user_map,
    train_df=train_df,
    model=trained_models[model_name],
    prompt_path="/kaggle/input/recsys/test.txt",
    output_path=f"{model_name}.csv",
    device=device
)

Saved submission file to: ncf.csv
   Id     Score
0   1  3.882270
1   2  3.730960
2   3  4.067957
3   4  3.261101
4   5  2.164522
