In [1]:
from neu import NeuMF
import pickle
import pandas as pd
import torch
from torch import nn
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ProcessPoolExecutor
import numpy as np
from operator import itemgetter
THREADS = 16
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import time
import torch.nn.functional as F
loss_func = torch.nn.MSELoss()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
mlp_factors = 8
gmf_factors = 32 
layers = [64,32,16,8]

In [2]:
def binary_predictions(true_ratings, predicted_ratings):
    assert len(true_ratings) == len(predicted_ratings)
    binary_true_ratings = []
    binary_predicted_ratings = []

    for i in range(len(true_ratings)):
        if true_ratings[i] >= 3:
            binary_true_ratings.append(1)
        else:
            binary_true_ratings.append(0)

        if predicted_ratings[i] >= 3:
            binary_predicted_ratings.append(1)
        else:
            binary_predicted_ratings.append(0)

    return precision_score(binary_true_ratings, binary_predicted_ratings), recall_score(binary_true_ratings, binary_predicted_ratings), f1_score(binary_true_ratings, binary_predicted_ratings)

In [3]:
def arg_accuracy_int(ratings, predictions):
    ratings = ratings.cpu().detach().numpy()
    predictions = predictions.cpu().detach().numpy()
    total_nr = len(ratings)
    total_pred = 0
    for i in range(total_nr):
        (true_rating, pred_rating) = ratings[i], predictions[i]
        if round(pred_rating) >= int(true_rating)-1 and round(pred_rating) <= int(true_rating)+1:
            total_pred += 1

    return float(total_pred)/total_nr


def round_of_rating(number):
    return round(number * 2) / 2

In [4]:
def RMSE(data, model):
    users_index = data.iloc[:, 0].values
    users = torch.LongTensor(users_index).to(DEVICE)
    items_index = data.iloc[:, 1].values
    items = torch.LongTensor(items_index).to(DEVICE)
    rating = torch.FloatTensor(data.iloc[:, 4].values).to(DEVICE)
    prediction= model(users, items)
    rmse = loss_func(prediction, rating)
    mae = torch.nn.L1Loss()(prediction, rating)
    
    p,r,f = binary_predictions(rating, prediction)
    accuracy = arg_accuracy_int(rating,prediction)
    return rmse ** 0.5,mae,p,r,f, accuracy

In [5]:
import json
import pandas as pd
trainset = pd.read_csv('train.csv')
testset = pd.read_csv('test.csv')
#trainset['user_rating'] = (trainset['user_rating'] + 1) * 2 + 1
#testset['user_rating'] = (testset['user_rating'] + 1) * 2 + 1
def traite_train_test(df):
    df['authors'] = df['authors'].apply(lambda x: json.loads(x))
    df['genres'] = df['genres'].apply(lambda x: json.loads(x))
    return df
trainset = traite_train_test(trainset)
testset = traite_train_test(testset)
hehe_test = trainset.copy()
df_empty = testset.copy()
df_empty['user'] = df_empty['user'].astype('int')
df_empty['rating'] = df_empty['rating'].astype('float')
df_empty['item'] = df_empty['item'].astype('int')
hehe_test.index = range(len(hehe_test))
df_empty.index = range(len(df_empty))

In [6]:
hehe = pd.concat([hehe_test,df_empty])

In [7]:
def train(lr, mlp_factors,gmf_factors, layers, reg, batch_size, num_epochs, train, test):
    model = NeuMF(n_users, n_items, mlp_factors,gmf_factors, layers).to(DEVICE)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr,weight_decay=reg)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=10, threshold_mode='abs',threshold = 0.005)
    for epoch in range(num_epochs):
        model.train()
        t1 = time.time()
        num_example = len(train)
        indices = list(range(num_example))
        for i in tqdm(range(0, num_example, batch_size)):
            optimizer.zero_grad()
            indexs = indices[i:min(i+batch_size, num_example)]
            users_index = train.iloc[:, 0].loc[indexs].values
            users = torch.LongTensor(users_index).to(DEVICE)
            items_index = train.iloc[:, 1].loc[indexs].values
            items = torch.LongTensor(items_index).to(DEVICE)
            
           
            rating = torch.FloatTensor(
                train.iloc[:, 4].loc[indexs].values).to(DEVICE)
            prediction = model(
                users, items)

            err = loss_func(prediction, rating) 
            err.backward()
            optimizer.step()
        t2 = time.time()
        #rmse, mae = RMSE(test, model)
        
        
        rmse, mae, p, r, f, accuracy = RMSE(testset,model)
        scheduler.step(rmse)
        print("Learning rate: ", lr, "Regulation: ", reg,"Bath_size:",batch_size)
        print("RMSE: ", rmse, "MAE: ", mae)
        print("Accuracy: ", accuracy, "Precision: ", p, "Recall: ", r, "F1 score: ", f)
    
    return model

In [9]:
n_users = len(hehe['user'].value_counts())
n_items = len(hehe['item'].value_counts())

In [11]:
from tqdm import tqdm
lr = 0.002
reg = 1e-4
batch_size = 128
num_epochs = 10
model_neumf = train(lr, mlp_factors,gmf_factors, layers, reg, batch_size, num_epochs, hehe_test,df_empty)

100%|██████████| 413/413 [00:09<00:00, 45.24it/s]
  1%|          | 5/413 [00:00<00:08, 48.56it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9997, grad_fn=<PowBackward0>) MAE:  tensor(0.8067, grad_fn=<L1LossBackward>)
Accuracy:  0.8924348089751365 Precision:  0.9224235989990142 Recall:  0.9996712689020382 F1 score:  0.9594951686057976


100%|██████████| 413/413 [00:08<00:00, 46.50it/s]
  1%|          | 5/413 [00:00<00:08, 48.40it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9697, grad_fn=<PowBackward0>) MAE:  tensor(0.7746, grad_fn=<L1LossBackward>)
Accuracy:  0.9192692540933899 Precision:  0.9223771983020013 Recall:  1.0 F1 score:  0.959621451104101


100%|██████████| 413/413 [00:08<00:00, 47.25it/s]
  1%|          | 5/413 [00:00<00:08, 49.57it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9622, grad_fn=<PowBackward0>) MAE:  tensor(0.7613, grad_fn=<L1LossBackward>)
Accuracy:  0.9201030927835051 Precision:  0.9223771983020013 Recall:  1.0 F1 score:  0.959621451104101


100%|██████████| 413/413 [00:08<00:00, 47.71it/s]
  1%|          | 5/413 [00:00<00:08, 49.21it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9533, grad_fn=<PowBackward0>) MAE:  tensor(0.7523, grad_fn=<L1LossBackward>)
Accuracy:  0.9180563978168587 Precision:  0.9223771983020013 Recall:  1.0 F1 score:  0.959621451104101


100%|██████████| 413/413 [00:08<00:00, 47.53it/s]
  1%|          | 5/413 [00:00<00:08, 47.93it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9290, grad_fn=<PowBackward0>) MAE:  tensor(0.7297, grad_fn=<L1LossBackward>)
Accuracy:  0.917449969678593 Precision:  0.9235581324382057 Recall:  0.994904667981591 F1 score:  0.9579047317613547


100%|██████████| 413/413 [00:08<00:00, 46.58it/s]
  1%|          | 5/413 [00:00<00:08, 48.18it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9116, grad_fn=<PowBackward0>) MAE:  tensor(0.7112, grad_fn=<L1LossBackward>)
Accuracy:  0.9154032747119466 Precision:  0.9273756006820648 Recall:  0.9833168967784353 F1 score:  0.9545273234942163


100%|██████████| 413/413 [00:08<00:00, 47.99it/s]
  1%|▏         | 6/413 [00:00<00:07, 51.75it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9084, grad_fn=<PowBackward0>) MAE:  tensor(0.7075, grad_fn=<L1LossBackward>)
Accuracy:  0.9144936325045482 Precision:  0.9304955627110657 Recall:  0.9737015121630507 F1 score:  0.9516083691418015


100%|██████████| 413/413 [00:08<00:00, 47.75it/s]
  1%|▏         | 6/413 [00:00<00:07, 51.86it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9124, grad_fn=<PowBackward0>) MAE:  tensor(0.7107, grad_fn=<L1LossBackward>)
Accuracy:  0.9105518496058217 Precision:  0.9327190404321233 Recall:  0.9649901380670611 F1 score:  0.9485801995395242


100%|██████████| 413/413 [00:08<00:00, 48.83it/s]
  1%|▏         | 6/413 [00:00<00:07, 51.04it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9210, grad_fn=<PowBackward0>) MAE:  tensor(0.7185, grad_fn=<L1LossBackward>)
Accuracy:  0.9075197089144936 Precision:  0.9338962605548854 Recall:  0.9543885601577909 F1 score:  0.9440312157054018


100%|██████████| 413/413 [00:08<00:00, 47.73it/s]


Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9359, grad_fn=<PowBackward0>) MAE:  tensor(0.7313, grad_fn=<L1LossBackward>)
Accuracy:  0.9008489993935719 Precision:  0.9354154774524994 Recall:  0.9427186061801447 F1 score:  0.9390528426998486


In [12]:
state = { 'model': model_neumf.state_dict()}   
torch.save(state, 'model_neumf.pkl')
                    