In [1]:
# %load main.py
import os
import random

import torch
import numpy as np

from time import time
from tqdm import tqdm
from copy import deepcopy
import logging
from prettytable import PrettyTable

from utils.parser import parse_args
from utils.data_loader import load_data
from utils.evaluate import test
from utils.helper import early_stopping

n_users = 0
n_items = 0


def get_feed_dict(train_entity_pairs, train_pos_set, start, end, n_negs=1):

    def sampling(user_item, train_set, n):
        neg_items = []
        for user, _ in user_item.cpu().numpy():
            user = int(user)
            negitems = []
            for i in range(n):  # sample n times
                while True:
                    negitem = random.choice(range(n_items))
                    if negitem not in train_set[user]:
                        break
                negitems.append(negitem)
            neg_items.append(negitems)
        return neg_items

    feed_dict = {}
    entity_pairs = train_entity_pairs[start:end]
    feed_dict['users'] = entity_pairs[:, 0]
    feed_dict['pos_items'] = entity_pairs[:, 1]
    feed_dict['neg_items'] = torch.LongTensor(sampling(entity_pairs,
                                                       train_pos_set,
                                                       n_negs*K)).to(device)
    return feed_dict


if __name__ == '__main__':
    """fix the random seed"""
    seed = 2020
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    """read args"""
    global args, device
    args = parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0") if args.cuda else torch.device("cpu")

    """build dataset"""
    train_cf, user_dict, n_params, norm_mat = load_data(args)
    train_cf_size = len(train_cf)
    train_cf = torch.LongTensor(np.array([[cf[0], cf[1]] for cf in train_cf], np.int32))

    n_users = n_params['n_users']
    n_items = n_params['n_items']
    n_negs = args.n_negs
    K = args.K

    """define model"""
    from modules.LightGCN import LightGCN
    if args.gnn == 'lightgcn':
        model = LightGCN(n_params, args, norm_mat).to(device)

    """define optimizer"""
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    cur_best_pre_0 = 0
    stopping_step = 0
    should_stop = False

    print("start training ...")
    for epoch in range(args.epoch):
        # shuffle training data
        train_cf_ = train_cf
        index = np.arange(len(train_cf_))
        np.random.shuffle(index)
        train_cf_ = train_cf_[index].to(device)

        """training"""
        model.train()
        loss, s = 0, 0
        hits = 0
        train_s_t = time()
        while s + args.batch_size <= len(train_cf):
            batch = get_feed_dict(train_cf_,
                                  user_dict['train_user_set'],
                                  s, s + args.batch_size,
                                  n_negs)

            batch_loss, _, _ = model(epoch, batch)

            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

            loss += batch_loss
            s += args.batch_size

        train_e_t = time()

        if epoch % 5 == 0:
            """testing"""

            train_res = PrettyTable()
            train_res.field_names = ["Epoch", "training time(s)", "tesing time(s)", "Loss", "recall", "ndcg", "precision", "hit_ratio"]

            model.eval()
            test_s_t = time()
            test_ret = test(model, user_dict, n_params, mode='test')
            test_e_t = time()
            train_res.add_row(
                [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), test_ret['recall'], test_ret['ndcg'],
                 test_ret['precision'], test_ret['hit_ratio']])

            if user_dict['valid_user_set'] is None:
                valid_ret = test_ret
            else:
                test_s_t = time()
                valid_ret = test(model, user_dict, n_params, mode='valid')
                test_e_t = time()
                train_res.add_row(
                    [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), valid_ret['recall'], valid_ret['ndcg'],
                     valid_ret['precision'], valid_ret['hit_ratio']])
            print(train_res)

            # *********************************************************
            # early stopping when cur_best_pre_0 is decreasing for 10 successive steps.
            cur_best_pre_0, stopping_step, should_stop = early_stopping(valid_ret['recall'][0], cur_best_pre_0,
                                                                        stopping_step, expected_order='acc',
                                                                        flag_step=10)
            if should_stop:
                break

            """save weight"""
            if valid_ret['recall'][0] == cur_best_pre_0 and args.save:
                torch.save(model.state_dict(), args.out_dir + 'model_' + '.ckpt')
        else:
            # logging.info('training loss at epoch %d: %f' % (epoch, loss.item()))
            print('using time %.4fs, training loss at epoch %d: %.4f' % (train_e_t - train_s_t, epoch, loss.item()))

    print('early stopping at %d, recall@20:%.4f' % (epoch, cur_best_pre_0))


reading train and test user-item set ...
building the adj mat ...
{'n_users': 640, 'n_items': 4165}
loading over ...
start training ...
+-------+---------------------+-------------------+-------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)  |        Loss       |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+-------------------+-------------------+--------------+--------------+--------------+--------------+
|   0   | 0.27814292907714844 | 2.192070722579956 | 4.153146266937256 | [0.01520858] | [0.01429468] | [0.01182609] | [0.10608696] |
+-------+---------------------+-------------------+-------------------+--------------+--------------+--------------+--------------+
using time 0.6289s, training loss at epoch 1: 4.1454
using time 0.4771s, training loss at epoch 2: 4.1320
using time 0.4620s, training loss at epoch 3: 4.1112
using time 0.2787s, training loss at epoch 4:

using time 0.4082s, training loss at epoch 46: 2.1675
using time 0.3796s, training loss at epoch 47: 2.1290
using time 0.5543s, training loss at epoch 48: 2.1020
using time 0.4627s, training loss at epoch 49: 2.0760
+-------+--------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)  |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
|   50  | 0.5627434253692627 | 2.394721269607544 | 2.0608396530151367 | [0.09267205] | [0.08630028] | [0.04869565] | [0.32347826] |
+-------+--------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.6810s, training loss at epoch 51: 2.0455
using time 0.4475s, training loss at epoch 52: 2.0246
using time 0.426

using time 0.3316s, training loss at epoch 96: 1.3037
using time 0.2865s, training loss at epoch 97: 1.2968
using time 0.4030s, training loss at epoch 98: 1.2595
using time 0.4865s, training loss at epoch 99: 1.2789
+-------+---------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)  |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
|  100  | 0.43477439880371094 | 1.727527141571045 | 1.2521851062774658 | [0.10341318] | [0.09776345] | [0.05565217] | [0.34608696] |
+-------+---------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.4404s, training loss at epoch 101: 1.2464
using time 0.3388s, training loss at epoch 102: 1.2166
using tim

using time 0.3088s, training loss at epoch 146: 0.8097
using time 0.2357s, training loss at epoch 147: 0.8077
using time 0.3840s, training loss at epoch 148: 0.8070
using time 0.5166s, training loss at epoch 149: 0.8014
+-------+--------------------+-------------------+--------------------+--------------+-------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)  |        Loss        |    recall    |     ndcg    |  precision   |  hit_ratio   |
+-------+--------------------+-------------------+--------------------+--------------+-------------+--------------+--------------+
|  150  | 0.6812434196472168 | 2.729287624359131 | 0.7916152477264404 | [0.10708669] | [0.1020011] | [0.05634783] | [0.35304348] |
+-------+--------------------+-------------------+--------------------+--------------+-------------+--------------+--------------+
using time 0.6067s, training loss at epoch 151: 0.7845
using time 0.3184s, training loss at epoch 152: 0.7712
using time 0.24

using time 0.3256s, training loss at epoch 196: 0.5432
using time 0.3629s, training loss at epoch 197: 0.5505
using time 0.4313s, training loss at epoch 198: 0.5376
using time 0.5933s, training loss at epoch 199: 0.5325
+-------+---------------------+-------------------+--------------------+-------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)  |        Loss        |    recall   |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+-------------------+--------------------+-------------+--------------+--------------+--------------+
|  200  | 0.43787550926208496 | 2.094268560409546 | 0.5348901748657227 | [0.1058871] | [0.10219963] | [0.05634783] | [0.35304348] |
+-------+---------------------+-------------------+--------------------+-------------+--------------+--------------+--------------+
using time 0.3981s, training loss at epoch 201: 0.5552
using time 0.3833s, training loss at epoch 202: 0.5303
using time