# Preparation

In [1]:
import torch
torch.__version__

'2.3.0a0+6ddf5cf85e.nv24.04'

In [2]:
import os
import pandas as pd
import numpy as np
import random
from torch import nn

from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from model import *
from dataset import *
from train import *

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [4]:
BATCH_SIZE = 256
NUMEPOCHS = 50

In [5]:
# import pandas as pd
# df = pd.read_csv("Data/ml-1m.train.rating",sep='\t', header=None, names=['uid', 'mid', 'rating', 'timestamp'], engine='python')
# df

In [6]:
# df2 = pd.read_csv("Data/ml-1m.test.negative",sep = "\t",header=None)
# df2

# Trian and Test

### Compare the three methods 

In [7]:
model_config_mf = {
    "model_mark": "gmf(mf_dim=8)",
    'embedding_dim_mf': 8,
    # "mlp_layers(X)" : 0,
    # 'mlp_layers': [ 32, 16, 8],
    'model_type': 'GMF'     #　MLP, NeuMF
}

model_config_mlp = {
    "model_mark": "mlp(mlp_layer=3)",
    "mlp_layers(X)" : 3,
    'mlp_layers': [32, 16, 8],
    'model_type': 'MLP'     #　MLP, NeuMF
}

model_config_neumf = {
    "model_mark": "neumf(mf_dim=8,mlp_layer=3)",
    'embedding_dim_mf': 8,
    "mlp_layers(X)" : 3,
    'mlp_layers': [32, 16, 8],
    'model_type': 'NeuMF'     #　MLP, NeuMF
}

model_train(model_config_mf, seed = 42, num_of_negatives=4, num_of_epochs=30)


Using cuda device
Preprocessing data beginning...
Preprocessing finished and saving to preprocessed_data/ml.pkl.
config of model: 
 {'embedding_dim_mf': 8, 'mlp_layers(X)': 3, 'mlp_layers': [32, 16, 8], 'model_type': 'NeuMF', 'num_users': 6040, 'num_items': 3706}


  3%|▎         | 1/30 [02:06<1:01:01, 126.25s/it]

Epoch 1, Loss: 0.0015, HR@10: 0.4462, NDCG@10: 0.2497


  7%|▋         | 2/30 [03:43<50:51, 108.99s/it]  

Epoch 2, Loss: 0.0014, HR@10: 0.4482, NDCG@10: 0.2512


  7%|▋         | 2/30 [05:34<1:17:56, 167.03s/it]


KeyboardInterrupt: 

In [None]:
model_train(model_config_mlp, seed = 42, num_of_negatives=4, num_of_epochs=30)


In [None]:
model_train(model_config_neumf, seed = 42, num_of_negatives=4, num_of_epochs=30)


for movielens

HR, NDCG

-   `bias = False` for some linear layers
    -   GMF(Factor-8, X-0): 0.3490, 0.1849
    -   MLP(Factor-8, X-3): 0.4235, 0.2279
    -   NeuMF(Factor-8, X-3): 0.4233, 0.2258

## Reproduce the ablation study --- MLP with different layers

 K: 8
 
 layers (X): 0, 1, 2, 3, 4

In [8]:
model_config_mlp0 = {
    "model_mark": "mlp(mlp_layer=0)",
    "mlp_layers(X)" : 0,
    'mlp_layers': [8],
    'model_type': 'MLP'     #　MLP, NeuMF
}

model_config_mlp1 = {
    "model_mark": "mlp(mlp_layer=1)",
    "mlp_layers(X)" : 1,
    'mlp_layers': [8],
    'model_type': 'MLP'     #　MLP, NeuMF
}

model_config_mlp2 = {
    "model_mark": "mlp(mlp_layer=2)",
    "mlp_layers(X)" : 2,
    'mlp_layers': [16, 8],
    'model_type': 'MLP'     #　MLP, NeuMF
}

model_config_mlp3 = {
    "model_mark": "mlp(mlp_layer=3)",
    "mlp_layers(X)" : 4,
    'mlp_layers': [32, 16, 8],
    'model_type': 'MLP'     #　MLP, NeuMF
}

model_config_mlp4 = {
    "model_mark": "mlp(mlp_layer=4)",
    "mlp_layers(X)" : 4,
    'mlp_layers': [64, 32, 16, 8],
    'model_type': 'MLP'     #　MLP, NeuMF
}

model_train(model_config_mlp0, seed = 42, num_of_negatives=4, num_of_epochs=30)


Using cuda device
Loading preprocessed data from preprocessed_data/ml.pkl
config of model: 
 {'mlp_layers(X)': 0, 'mlp_layers': [8], 'model_type': 'MLP', 'num_users': 6040, 'num_items': 3706}


  5%|▌         | 1/20 [00:55<17:32, 55.37s/it]

Epoch 1, Loss: 0.0016, HR@10: 0.4503, NDCG@10: 0.2517


 10%|█         | 2/20 [02:09<19:55, 66.42s/it]

Epoch 2, Loss: 0.0014, HR@10: 0.4488, NDCG@10: 0.2491


 15%|█▌        | 3/20 [03:30<20:38, 72.86s/it]

Epoch 3, Loss: 0.0014, HR@10: 0.4457, NDCG@10: 0.2488


 20%|██        | 4/20 [04:26<17:43, 66.49s/it]

Epoch 4, Loss: 0.0014, HR@10: 0.4510, NDCG@10: 0.2499


 25%|██▌       | 5/20 [05:54<18:33, 74.27s/it]

Epoch 5, Loss: 0.0014, HR@10: 0.4512, NDCG@10: 0.2503


 30%|███       | 6/20 [06:46<15:34, 66.75s/it]

Epoch 6, Loss: 0.0014, HR@10: 0.4485, NDCG@10: 0.2507


 30%|███       | 6/20 [07:25<17:20, 74.33s/it]


KeyboardInterrupt: 

In [None]:
model_train(model_config_mlp1, seed = 42, num_of_negatives=4, num_of_epochs=30)


In [None]:
model_train(model_config_mlp2, seed = 42, num_of_negatives=4, num_of_epochs=30)


In [None]:
model_train(model_config_mlp3, seed = 42, num_of_negatives=4, num_of_epochs=30)


In [None]:
model_train(model_config_mlp4, seed = 42, num_of_negatives=4, num_of_epochs=30)


HR, NDCG

-   MLP0: 0.4368, 0.2394
-   MLP1: 0.4368, 0.2394
-   MLP2: 0.4222, 0.2244
-   MLP3: 