In [21]:
from neu import NeuMF
import pickle
import pandas as pd
import torch
from torch import nn
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ProcessPoolExecutor
import numpy as np
from operator import itemgetter
THREADS = 16
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import time
import torch.nn.functional as F
loss_func = torch.nn.MSELoss()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
mlp_factors = 8
gmf_factors = 32 
layers = [64,32,16,8]

In [22]:
def binary_predictions(true_ratings, predicted_ratings):
    assert len(true_ratings) == len(predicted_ratings)
    binary_true_ratings = []
    binary_predicted_ratings = []

    for i in range(len(true_ratings)):
        if true_ratings[i] >= 3:
            binary_true_ratings.append(1)
        else:
            binary_true_ratings.append(0)

        if predicted_ratings[i] >= 3:
            binary_predicted_ratings.append(1)
        else:
            binary_predicted_ratings.append(0)

    return precision_score(binary_true_ratings, binary_predicted_ratings), recall_score(binary_true_ratings, binary_predicted_ratings), f1_score(binary_true_ratings, binary_predicted_ratings)

In [23]:
def arg_accuracy_int(ratings, predictions):
    ratings = ratings.cpu().detach().numpy()
    predictions = predictions.cpu().detach().numpy()
    total_nr = len(ratings)
    total_pred = 0
    for i in range(total_nr):
        (true_rating, pred_rating) = ratings[i], predictions[i]
        if round(pred_rating) >= int(true_rating)-1 and round(pred_rating) <= int(true_rating)+1:
            total_pred += 1

    return float(total_pred)/total_nr


def round_of_rating(number):
    return round(number * 2) / 2

In [24]:
def RMSE(data, model):
    users_index = data.iloc[:, 0].values
    users = torch.LongTensor(users_index).to(DEVICE)
    items_index = data.iloc[:, 1].values
    items = torch.LongTensor(items_index).to(DEVICE)
    rating = torch.FloatTensor(data.iloc[:, 5].values).to(DEVICE)
    prediction= model(users, items)
    rmse = loss_func(prediction, rating)
    mae = torch.nn.L1Loss()(prediction, rating)
    
    p,r,f = binary_predictions(rating, prediction)
    accuracy = arg_accuracy_int(rating,prediction)
    return rmse ** 0.5,mae,p,r,f, accuracy

In [25]:
import json
trainset = pd.read_csv('train.csv')
testset = pd.read_csv('test.csv')
#trainset['user_rating'] = (trainset['user_rating'] + 1) * 2 + 1
#testset['user_rating'] = (testset['user_rating'] + 1) * 2 + 1
def traite_train_test(df):
    df['actors'] = df['actors'].apply(lambda x: json.loads(x))
    df['director'] = df['director'].apply(lambda x: json.loads(x))
    df['genre'] = df['genre'].apply(lambda x: json.loads(x))
    return df
trainset = traite_train_test(trainset)
testset = traite_train_test(testset)
hehe_test = trainset.copy()
df_empty = testset.copy()
df_empty['user_id'] = df_empty['user_id'].astype('int')
df_empty['user_rating'] = df_empty['user_rating'].astype('float')
df_empty['movie'] = df_empty['movie'].astype('int')
hehe_test.index = range(len(hehe_test))
df_empty.index = range(len(df_empty))

In [26]:
hehe = pd.concat([hehe_test,df_empty])

In [27]:
def train(lr, mlp_factors,gmf_factors, layers, reg, batch_size, num_epochs, train, test):
    model = NeuMF(n_users, n_items, mlp_factors,gmf_factors, layers).to(DEVICE)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr,weight_decay=reg)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=10, threshold_mode='abs',threshold = 0.005)
    for epoch in range(num_epochs):
        model.train()
        t1 = time.time()
        num_example = len(train)
        indices = list(range(num_example))
        for i in tqdm(range(0, num_example, batch_size)):
            optimizer.zero_grad()
            indexs = indices[i:min(i+batch_size, num_example)]
            users_index = train.iloc[:, 0].loc[indexs].values
            users = torch.LongTensor(users_index).to(DEVICE)
            items_index = train.iloc[:, 1].loc[indexs].values
            items = torch.LongTensor(items_index).to(DEVICE)
            
           
            rating = torch.FloatTensor(
                train.iloc[:, 5].loc[indexs].values).to(DEVICE)
            prediction = model(
                users, items)

            err = loss_func(prediction, rating) 
            err.backward()
            optimizer.step()
        t2 = time.time()
        #rmse, mae = RMSE(test, model)
        
        
        rmse, mae, p, r, f, accuracy = RMSE(testset,model)
        scheduler.step(rmse)
        print("Learning rate: ", lr, "Regulation: ", reg,"Bath_size:",batch_size)
        print("RMSE: ", rmse, "MAE: ", mae)
        print("Accuracy: ", accuracy, "Precision: ", p, "Recall: ", r, "F1 score: ", f)
    
    return model

In [28]:
n_users = len(hehe['user_id'].value_counts())
n_items = len(hehe['movie'].value_counts())

In [29]:
from tqdm import tqdm
lr = 0.002
reg = 1e-4
batch_size = 128
num_epochs = 15
model_neumf = train(lr, mlp_factors,gmf_factors, layers, reg, batch_size, num_epochs, hehe_test,df_empty)

100%|██████████| 542/542 [00:11<00:00, 45.18it/s]
  1%|          | 5/542 [00:00<00:10, 49.01it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(1.0718, grad_fn=<PowBackward0>) MAE:  tensor(0.8727, grad_fn=<L1LossBackward>)
Accuracy:  0.8259789765507681 Precision:  0.8426065162907268 Recall:  0.9370122630992196 F1 score:  0.8873053576141462


100%|██████████| 542/542 [00:11<00:00, 47.28it/s]
  1%|          | 5/542 [00:00<00:12, 43.59it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(1.0141, grad_fn=<PowBackward0>) MAE:  tensor(0.8103, grad_fn=<L1LossBackward>)
Accuracy:  0.8635208501790459 Precision:  0.8602797019997386 Recall:  0.9172240802675585 F1 score:  0.8878397518041411


100%|██████████| 542/542 [00:11<00:00, 48.16it/s]
  1%|          | 5/542 [00:00<00:11, 47.33it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9845, grad_fn=<PowBackward0>) MAE:  tensor(0.7804, grad_fn=<L1LossBackward>)
Accuracy:  0.8758230333833892 Precision:  0.8758469646157006 Recall:  0.8916527313266444 F1 score:  0.8836791768808478


100%|██████████| 542/542 [00:10<00:00, 49.59it/s]
  1%|          | 6/542 [00:00<00:10, 52.12it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9703, grad_fn=<PowBackward0>) MAE:  tensor(0.7674, grad_fn=<L1LossBackward>)
Accuracy:  0.8813099226059836 Precision:  0.884620776671807 Recall:  0.8793199554069119 F1 score:  0.881962401285904


100%|██████████| 542/542 [00:10<00:00, 50.81it/s]
  1%|          | 5/542 [00:00<00:10, 49.94it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9633, grad_fn=<PowBackward0>) MAE:  tensor(0.7608, grad_fn=<L1LossBackward>)
Accuracy:  0.8854684070694235 Precision:  0.8880022597274204 Recall:  0.8761845039018952 F1 score:  0.882053800021043


100%|██████████| 542/542 [00:11<00:00, 48.62it/s]
  1%|          | 6/542 [00:00<00:10, 52.36it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9599, grad_fn=<PowBackward0>) MAE:  tensor(0.7573, grad_fn=<L1LossBackward>)
Accuracy:  0.8866235416426014 Precision:  0.8900694149312934 Recall:  0.8755574136008919 F1 score:  0.882753775904461


100%|██████████| 542/542 [00:10<00:00, 51.76it/s]
  1%|          | 6/542 [00:00<00:10, 51.91it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9580, grad_fn=<PowBackward0>) MAE:  tensor(0.7551, grad_fn=<L1LossBackward>)
Accuracy:  0.8875476493011436 Precision:  0.8913536498936924 Recall:  0.8763238573021181 F1 score:  0.8837748577050101


100%|██████████| 542/542 [00:10<00:00, 50.35it/s]
  1%|          | 6/542 [00:00<00:10, 51.54it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9547, grad_fn=<PowBackward0>) MAE:  tensor(0.7518, grad_fn=<L1LossBackward>)
Accuracy:  0.8892225944322514 Precision:  0.8930067447639333 Recall:  0.8763935340022296 F1 score:  0.8846221472025881


100%|██████████| 542/542 [00:10<00:00, 51.05it/s]
  1%|          | 6/542 [00:00<00:10, 52.90it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9436, grad_fn=<PowBackward0>) MAE:  tensor(0.7414, grad_fn=<L1LossBackward>)
Accuracy:  0.8929767817950791 Precision:  0.8954276339253869 Recall:  0.8746516164994426 F1 score:  0.8849176976490077


100%|██████████| 542/542 [00:12<00:00, 41.88it/s]
  1%|          | 5/542 [00:00<00:11, 45.79it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9392, grad_fn=<PowBackward0>) MAE:  tensor(0.7373, grad_fn=<L1LossBackward>)
Accuracy:  0.893034538523738 Precision:  0.8982212021230813 Recall:  0.8725613154960981 F1 score:  0.8852053438891638


100%|██████████| 542/542 [00:11<00:00, 47.78it/s]
  1%|          | 5/542 [00:00<00:10, 49.38it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9376, grad_fn=<PowBackward0>) MAE:  tensor(0.7361, grad_fn=<L1LossBackward>)
Accuracy:  0.8948249971121636 Precision:  0.9011695062084898 Recall:  0.8697742474916388 F1 score:  0.8851935895617644


100%|██████████| 542/542 [00:12<00:00, 44.83it/s]
  1%|          | 4/542 [00:00<00:15, 35.86it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9394, grad_fn=<PowBackward0>) MAE:  tensor(0.7373, grad_fn=<L1LossBackward>)
Accuracy:  0.8937853759963036 Precision:  0.9018031718444492 Recall:  0.8676839464882943 F1 score:  0.8844146159582401


100%|██████████| 542/542 [00:13<00:00, 41.41it/s]
  1%|          | 5/542 [00:00<00:11, 46.62it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9422, grad_fn=<PowBackward0>) MAE:  tensor(0.7389, grad_fn=<L1LossBackward>)
Accuracy:  0.892514727965808 Precision:  0.9024832994481556 Recall:  0.8660117056856187 F1 score:  0.8838714265396103


100%|██████████| 542/542 [00:11<00:00, 55.74it/s]
  1%|          | 6/542 [00:00<00:09, 56.96it/s]

Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9468, grad_fn=<PowBackward0>) MAE:  tensor(0.7417, grad_fn=<L1LossBackward>)
Accuracy:  0.891475106849948 Precision:  0.9019935972060535 Recall:  0.8637820512820513 F1 score:  0.8824743735763098


100%|██████████| 542/542 [00:11<00:00, 48.37it/s]


Learning rate:  0.002 Regulation:  0.0001 Bath_size: 128
RMSE:  tensor(0.9521, grad_fn=<PowBackward0>) MAE:  tensor(0.7451, grad_fn=<L1LossBackward>)
Accuracy:  0.8898001617188402 Precision:  0.9016058394160584 Recall:  0.8606465997770345 F1 score:  0.8806502210181093


In [30]:
state = { 'model': model_neumf.state_dict()}   
torch.save(state, 'model_neumf.pkl')
                    