In [None]:
import torch
from pipeline import pipeline, preprocessing
from hungq.util import generate_submission, sample

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATA_PATH = '/data/train.txt'
TEST_PATH = '/data/test.txt'

data_bundle = preprocessing(DATA_PATH)
train_df, val_df, test_df, train_loader, val_loader, user_map, movie_map, n_users, n_movies, min_value, max_value = data_bundle
print(f"Users: {n_users}, Movies: {n_movies}")
trained_models = {}

Scaling ratings from [1, 5] to [0, 1].
Users: 943, Movies: 1680


In [167]:
gmf_config = {
    'epochs': 7,
    'batch_size': 512,
    'learning_rate': 0.00065,
    'weight_decay': 1e-5,
    'embedding_dim': 64,
    'device': device
}
trained_models['gmf'] = pipeline('gmf', gmf_config, data_bundle)


Training Model: best_gmf
Epoch 1/7 | Train Loss: 0.0757 | Val Loss: 0.0701 | Val RMSE: 1.0588
Epoch 2/7 | Train Loss: 0.0689 | Val Loss: 0.0650 | Val RMSE: 1.0196
Epoch 3/7 | Train Loss: 0.0624 | Val Loss: 0.0595 | Val RMSE: 0.9759
Epoch 4/7 | Train Loss: 0.0553 | Val Loss: 0.0561 | Val RMSE: 0.9470
Epoch 5/7 | Train Loss: 0.0495 | Val Loss: 0.0542 | Val RMSE: 0.9313
Epoch 6/7 | Train Loss: 0.0445 | Val Loss: 0.0534 | Val RMSE: 0.9243
Epoch 7/7 | Train Loss: 0.0399 | Val Loss: 0.0532 | Val RMSE: 0.9230
Best Val RMSE: 0.9230
Test RMSE: 0.9236


In [171]:
attn_config = {
    'epochs': 10,
    'batch_size': 512,
    'learning_rate': 0.002,
    'weight_decay': 1e-3,
    'embedding_dim': 64,
    'hidden_dims': [32],
    'n_attention_blocks': 2,
    'n_heads': 4,
    'dropout': 0.3,
    'device': device
}
trained_models['attention'] = pipeline('attention', attn_config, data_bundle)


Training Model: best_attention
Epoch 1/10 | Train Loss: 0.0968 | Val Loss: 0.0637 | Val RMSE: 1.0096
Epoch 2/10 | Train Loss: 0.0622 | Val Loss: 0.0582 | Val RMSE: 0.9652
Epoch 3/10 | Train Loss: 0.0579 | Val Loss: 0.0563 | Val RMSE: 0.9489
Epoch 4/10 | Train Loss: 0.0565 | Val Loss: 0.0562 | Val RMSE: 0.9484
Epoch 5/10 | Train Loss: 0.0557 | Val Loss: 0.0555 | Val RMSE: 0.9423
Epoch 6/10 | Train Loss: 0.0550 | Val Loss: 0.0551 | Val RMSE: 0.9389
Epoch 7/10 | Train Loss: 0.0545 | Val Loss: 0.0552 | Val RMSE: 0.9397
Epoch 8/10 | Train Loss: 0.0541 | Val Loss: 0.0546 | Val RMSE: 0.9345
Epoch 9/10 | Train Loss: 0.0537 | Val Loss: 0.0546 | Val RMSE: 0.9347
Epoch 10/10 | Train Loss: 0.0533 | Val Loss: 0.0549 | Val RMSE: 0.9370
Best Val RMSE: 0.9345
Test RMSE: 0.9397


In [40]:
ncf_config = {
    'epochs': 20,
    'batch_size': 256,
    'learning_rate': 0.001,
    'weight_decay': 0.001,
    'dropout': 0.3,
    'freeze_pretrained': False,
    'device': device
}
trained_models['ncf'] = pipeline('ncf', ncf_config, data_bundle, pretrained_models=trained_models)


Training Model: best_ncf
Epoch 1/20 | Train Loss: 0.0610 | Val Loss: 0.0544 | Val RMSE: 0.9331
Epoch 2/20 | Train Loss: 0.0535 | Val Loss: 0.0544 | Val RMSE: 0.9330
Epoch 3/20 | Train Loss: 0.0522 | Val Loss: 0.0541 | Val RMSE: 0.9305
Epoch 4/20 | Train Loss: 0.0516 | Val Loss: 0.0544 | Val RMSE: 0.9329
Epoch 5/20 | Train Loss: 0.0508 | Val Loss: 0.0539 | Val RMSE: 0.9285
Epoch 6/20 | Train Loss: 0.0503 | Val Loss: 0.0546 | Val RMSE: 0.9348
Epoch 7/20 | Train Loss: 0.0500 | Val Loss: 0.0542 | Val RMSE: 0.9314
Epoch 8/20 | Train Loss: 0.0493 | Val Loss: 0.0549 | Val RMSE: 0.9372
Epoch 9/20 | Train Loss: 0.0468 | Val Loss: 0.0540 | Val RMSE: 0.9296
Epoch 10/20 | Train Loss: 0.0462 | Val Loss: 0.0544 | Val RMSE: 0.9329
Epoch 11/20 | Train Loss: 0.0457 | Val Loss: 0.0547 | Val RMSE: 0.9352
Epoch 12/20 | Train Loss: 0.0438 | Val Loss: 0.0553 | Val RMSE: 0.9407
Epoch 13/20 | Train Loss: 0.0431 | Val Loss: 0.0553 | Val RMSE: 0.9404
Epoch 14/20 | Train Loss: 0.0430 | Val Loss: 0.0553 | Val RM

In [222]:
nmf_config = {
    'epochs': 9,
    'batch_size': 1024,
    'learning_rate': 0.0005,
    'weight_decay': 1e-6,
    'embedding_dim': 128,
    'mlp_hidden_dims': [128, 64, 32],
    'dropout': 0.1,
    'device': device
}
trained_models['nmf'] = pipeline('nmf', nmf_config, data_bundle)


Training Model: best_nmf
Epoch 1/9 | Train Loss: 0.0733 | Val Loss: 0.0596 | Val RMSE: 0.9764
Epoch 2/9 | Train Loss: 0.0570 | Val Loss: 0.0561 | Val RMSE: 0.9474
Epoch 3/9 | Train Loss: 0.0541 | Val Loss: 0.0551 | Val RMSE: 0.9392
Epoch 4/9 | Train Loss: 0.0520 | Val Loss: 0.0546 | Val RMSE: 0.9349
Epoch 5/9 | Train Loss: 0.0497 | Val Loss: 0.0539 | Val RMSE: 0.9284
Epoch 6/9 | Train Loss: 0.0461 | Val Loss: 0.0534 | Val RMSE: 0.9241
Epoch 7/9 | Train Loss: 0.0412 | Val Loss: 0.0530 | Val RMSE: 0.9207
Epoch 8/9 | Train Loss: 0.0352 | Val Loss: 0.0528 | Val RMSE: 0.9190
Epoch 9/9 | Train Loss: 0.0291 | Val Loss: 0.0528 | Val RMSE: 0.9191
Best Val RMSE: 0.9190
Test RMSE: 0.9198


In [None]:
lightgcn_config = {
    'epochs': 30,
    'batch_size': 512,
    'learning_rate': 0.002,
    'weight_decay': 1e-6,
    'embedding_dim': 128,
    'n_layers': 6,
    'device': device,
    'train_df': train_df
}
trained_models['lightgcn'] = pipeline('lightgcn', lightgcn_config, data_bundle)

In [None]:
lightgcnpp_config = {
    'epochs': 100,
    'batch_size': 1024,
    'learning_rate': 0.00065,
    'weight_decay': 1e-6,

    'embedding_dim': 32,
    'n_layers': 8,

    'device': device,
    'train_df': train_df
}
trained_models['lightgcnpp'] = pipeline('lightgcnpp', lightgcnpp_config, data_bundle)


Training Model: best_lightgcnpp
Epoch 1/100 | Train Loss: 0.0746 | Val Loss: 0.0686 | Val RMSE: 1.0474
Epoch 2/100 | Train Loss: 0.0685 | Val Loss: 0.0648 | Val RMSE: 1.0179
Epoch 3/100 | Train Loss: 0.0643 | Val Loss: 0.0620 | Val RMSE: 0.9959
Epoch 4/100 | Train Loss: 0.0611 | Val Loss: 0.0603 | Val RMSE: 0.9826
Epoch 5/100 | Train Loss: 0.0586 | Val Loss: 0.0586 | Val RMSE: 0.9685
Epoch 6/100 | Train Loss: 0.0567 | Val Loss: 0.0570 | Val RMSE: 0.9551
Epoch 7/100 | Train Loss: 0.0555 | Val Loss: 0.0560 | Val RMSE: 0.9462
Epoch 8/100 | Train Loss: 0.0540 | Val Loss: 0.0560 | Val RMSE: 0.9464
Epoch 9/100 | Train Loss: 0.0526 | Val Loss: 0.0565 | Val RMSE: 0.9505
Epoch 10/100 | Train Loss: 0.0516 | Val Loss: 0.0541 | Val RMSE: 0.9301
Epoch 11/100 | Train Loss: 0.0504 | Val Loss: 0.0537 | Val RMSE: 0.9273
Epoch 12/100 | Train Loss: 0.0490 | Val Loss: 0.0531 | Val RMSE: 0.9221
Epoch 13/100 | Train Loss: 0.0485 | Val Loss: 0.0529 | Val RMSE: 0.9199
Epoch 14/100 | Train Loss: 0.0474 | Val 

KeyboardInterrupt: 

In [None]:
simgcl_config = {
    'epochs': 10,
    'batch_size': 1024,
    'learning_rate': 0.002,
    'weight_decay': 1e-6,
    'embedding_dim': 64,
    'n_layers': 3,

    'eps': 0.1,
    'temperature': 0.2,
    'lambda_cl': 0.1,

    'device': device,
    'train_df': train_df
}
trained_models['simgcl'] = pipeline('simgcl', simgcl_config, data_bundle)


Training Model: best_simgcl
Epoch 1/10 | Train Loss: 0.8007 | Val Loss: 0.0652 | Val RMSE: 1.0211
Epoch 2/10 | Train Loss: 0.5778 | Val Loss: 0.0601 | Val RMSE: 0.9804
Epoch 3/10 | Train Loss: 0.5597 | Val Loss: 0.0578 | Val RMSE: 0.9614
Epoch 4/10 | Train Loss: 0.5512 | Val Loss: 0.0566 | Val RMSE: 0.9519
Epoch 5/10 | Train Loss: 0.5454 | Val Loss: 0.0560 | Val RMSE: 0.9470
Epoch 6/10 | Train Loss: 0.5415 | Val Loss: 0.0558 | Val RMSE: 0.9447
Epoch 7/10 | Train Loss: 0.5386 | Val Loss: 0.0556 | Val RMSE: 0.9429
Epoch 8/10 | Train Loss: 0.5366 | Val Loss: 0.0555 | Val RMSE: 0.9419
Epoch 9/10 | Train Loss: 0.5346 | Val Loss: 0.0554 | Val RMSE: 0.9412
Epoch 10/10 | Train Loss: 0.5337 | Val Loss: 0.0554 | Val RMSE: 0.9415
Best Val RMSE: 0.9412
Test RMSE: 0.9428


In [None]:
model_name = 'nmf'
# state = torch.load(f"best_{model_name}.pth", map_location="cpu")
# trained_models[model_name].load_state_dict(state)
trained_models[model_name].eval()
sample(
    model=trained_models[model_name],
    test_df=test_df,
    n_samples=7000,
    device=device
)
None

 userId  movieId  rating  predicted_rating  error  absolute_error
    312      156     3.0              4.15   1.15            1.15
    474       44     3.0              4.01   1.01            1.01
    450      714     4.0              3.96  -0.04            0.04
    345       14     4.0              4.20   0.20            0.20
    655      315     4.0              3.28  -0.72            0.72
    487      978     1.0              2.21   1.21            1.21
    318      158     5.0              2.75  -2.25            2.25
     60      665     4.0              3.45  -0.55            0.55
    435       54     4.0              3.12  -0.88            0.88
    864      710     2.0              4.00   2.00            2.00

----------------------------------------------------------------------------------------------------
RMSE: 0.9169
MAE: 0.7228


In [None]:
submission = generate_submission(
    movie_map=movie_map,
    user_map=user_map,
    train_df=train_df,
    model=trained_models[model_name],
    prompt_path=TEST_PATH,
    output_path=f"{model_name}.csv",
    device=device
)

Saved submission file to: nmf.csv
   Id     Score
0   1  3.326147
1   2  2.766855
2   3  4.220539
3   4  4.108191
4   5  2.616639
