In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from dataset import BprMFData
from model import MFbpr
from evaluation import evaluate
from torch.utils.data import DataLoader
import torch 

Load the data

In [2]:
data = pd.read_csv("../../../preprocessed_data/ratings.csv")
rec_data = data[['user_id','app_id','implicit_rating']].copy()


In [3]:
rec_train, rec_test = train_test_split(rec_data,test_size= 0.2, random_state= 42)
users_train,games_train = rec_train["user_id"].unique(), rec_train["app_id"].unique()
rec_test =rec_test.loc[rec_test["user_id"].isin(users_train) & rec_test["app_id"].isin(games_train)]
testset = list(zip(rec_test.user_id.values, rec_test.app_id.values, rec_test.implicit_rating.values))
print(rec_train.shape)
print(rec_test.shape)

(3483739, 3)
(870400, 3)


Remap the index of userId and gamesId

In [4]:
users_id_map = {id : idx for idx, id in enumerate(rec_data.user_id.unique())}
games_id_map = {id : idx for idx, id in enumerate(rec_data.app_id.unique())}

def generate(df):
    dataset = {}
    for user_id, user_group in df.groupby("user_id"):
        user_like = user_group[user_group["implicit_rating"]== True]["app_id"].tolist()
        user_dislike = user_group[user_group["implicit_rating"] == False]["app_id"].tolist()
        if len(user_like) > 0 and len(user_dislike) > 0:
            dataset[users_id_map[user_id]] = ([games_id_map[like_item] for like_item in user_like], [games_id_map[dis_item] for dis_item in user_dislike])
    return dataset
train = generate(rec_train)
test = generate(rec_train)


In [5]:
trainset =BprMFData(train)
testset = BprMFData(test)
train_loader = DataLoader(trainset, batch_size = 1024, shuffle = True)
test_loader = DataLoader(testset, batch_size = 1024, shuffle = True)

In [6]:
def train_bpr(model, train_loader, test_loader, test_data, epochs=20, lr=0.1):
    optimizer = torch.optim.SGD([model.U, model.V], lr=lr, momentum= 0.4)
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for users, items_pos, items_neg in train_loader:
            users, items_pos, items_neg = users.long(), items_pos.long(), items_neg.long()
            optimizer.zero_grad()
            _, _, loss = model(users, items_pos, items_neg)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        epoch_loss /= len(train_loader)
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}')
        evaluate(model, test_loader, test_data)

In [7]:
model = MFbpr(len(rec_data.user_id.unique()), len(rec_data.app_id.unique()))
train_bpr(model, train_loader, test_loader, test, epochs=80, lr=0.08)

Epoch 1/80, Loss: 12516.2088
Accuracy: 0.6434 - NDCG@5: 0.9398 - F1@20: 0.5665
Epoch 2/80, Loss: 10397.4838
Accuracy: 0.6496 - NDCG@5: 0.9386 - F1@20: 0.5679
Epoch 3/80, Loss: 9011.0985
Accuracy: 0.6566 - NDCG@5: 0.9387 - F1@20: 0.5697
Epoch 4/80, Loss: 7968.3745
Accuracy: 0.6675 - NDCG@5: 0.9399 - F1@20: 0.5725
Epoch 5/80, Loss: 7132.4542
Accuracy: 0.6677 - NDCG@5: 0.9412 - F1@20: 0.5725
Epoch 6/80, Loss: 6432.1628
Accuracy: 0.6811 - NDCG@5: 0.9429 - F1@20: 0.5746
Epoch 7/80, Loss: 5821.9996
Accuracy: 0.6854 - NDCG@5: 0.9459 - F1@20: 0.5763
Epoch 8/80, Loss: 5307.4375
Accuracy: 0.6899 - NDCG@5: 0.9461 - F1@20: 0.5772
Epoch 9/80, Loss: 4859.7869
Accuracy: 0.6939 - NDCG@5: 0.9471 - F1@20: 0.5777
Epoch 10/80, Loss: 4510.8651
Accuracy: 0.6971 - NDCG@5: 0.9483 - F1@20: 0.5790
Epoch 11/80, Loss: 4136.5329
Accuracy: 0.6990 - NDCG@5: 0.9481 - F1@20: 0.5796
Epoch 12/80, Loss: 3854.8135
Accuracy: 0.7067 - NDCG@5: 0.9499 - F1@20: 0.5816
Epoch 13/80, Loss: 3587.4287
Accuracy: 0.7113 - NDCG@5: 0.9