In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from dataset import BprMFData
from model import MFbpr
from evaluation import evaluate
from torch.utils.data import DataLoader
import torch 

Load the data

In [2]:
data = pd.read_csv("../../preprocessed_data/ratings.csv")
rec_data = data[['user_id','app_id','implicit_rating']].copy()


In [3]:
rec_train, rec_test = train_test_split(rec_data,test_size= 0.2, random_state= 42)
users_train,games_train = rec_train["user_id"].unique(), rec_train["app_id"].unique()
rec_test =rec_test.loc[rec_test["user_id"].isin(users_train) & rec_test["app_id"].isin(games_train)]
testset = list(zip(rec_test.user_id.values, rec_test.app_id.values, rec_test.implicit_rating.values))
print(rec_train.shape)
print(rec_test.shape)

(3483739, 3)
(870400, 3)


Remap the index of userId and gamesId

In [4]:
users_id_map = {id : idx for idx, id in enumerate(rec_data.user_id.unique())}
games_id_map = {id : idx for idx, id in enumerate(rec_data.app_id.unique())}

def generate(df):
    dataset = {}
    for user_id, user_group in df.groupby("user_id"):
        user_like = user_group[user_group["implicit_rating"]== True]["app_id"].tolist()
        user_dislike = user_group[user_group["implicit_rating"] == False]["app_id"].tolist()
        if len(user_like) > 0 and len(user_dislike) > 0:
            dataset[users_id_map[user_id]] = ([games_id_map[like_item] for like_item in user_like], [games_id_map[dis_item] for dis_item in user_dislike])
    return dataset
train = generate(rec_train)
test = generate(rec_train)


In [5]:
trainset =BprMFData(train)
testset = BprMFData(test)
train_loader = DataLoader(trainset, batch_size = 1024, shuffle = True)
test_loader = DataLoader(testset, batch_size = 1024, shuffle = True)

In [6]:
def train_bpr(model, train_loader, test_loader, test_data, epochs=20, lr=0.1):
    optimizer = torch.optim.SGD([model.U, model.V], lr=lr, momentum= 0.4)
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for users, items_pos, items_neg in train_loader:
            users, items_pos, items_neg = users.long(), items_pos.long(), items_neg.long()
            optimizer.zero_grad()
            _, _, loss = model(users, items_pos, items_neg)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        epoch_loss /= len(train_loader)
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}')
        evaluate(model, test_loader, test_data)

In [7]:
model = MFbpr(len(rec_data.user_id.unique()), len(rec_data.app_id.unique()))
train_bpr(model, train_loader, test_loader, test, epochs=60, lr=0.08)

Epoch 1/60, Loss: 12524.5381
Accuracy: 0.6455 - NDCG@5: 0.9438 - F1@10: 0.9159
Epoch 2/60, Loss: 10358.5524
Accuracy: 0.6524 - NDCG@5: 0.9417 - F1@10: 0.9175
Epoch 3/60, Loss: 9034.0020
Accuracy: 0.6640 - NDCG@5: 0.9410 - F1@10: 0.9186
Epoch 4/60, Loss: 7969.7298
Accuracy: 0.6704 - NDCG@5: 0.9421 - F1@10: 0.9204
Epoch 5/60, Loss: 7122.1549
Accuracy: 0.6810 - NDCG@5: 0.9427 - F1@10: 0.9228
Epoch 6/60, Loss: 6400.8289
Accuracy: 0.6793 - NDCG@5: 0.9428 - F1@10: 0.9217
Epoch 7/60, Loss: 5827.3283
Accuracy: 0.6827 - NDCG@5: 0.9420 - F1@10: 0.9237
Epoch 8/60, Loss: 5333.5035
Accuracy: 0.6914 - NDCG@5: 0.9461 - F1@10: 0.9258
Epoch 9/60, Loss: 4853.5401
Accuracy: 0.6933 - NDCG@5: 0.9467 - F1@10: 0.9281
Epoch 10/60, Loss: 4504.7171
Accuracy: 0.6981 - NDCG@5: 0.9486 - F1@10: 0.9287
Epoch 11/60, Loss: 4135.2013
Accuracy: 0.6980 - NDCG@5: 0.9498 - F1@10: 0.9296
Epoch 12/60, Loss: 3847.8058
Accuracy: 0.7085 - NDCG@5: 0.9523 - F1@10: 0.9312
Epoch 13/60, Loss: 3580.7293
Accuracy: 0.7124 - NDCG@5: 0.9