In [1]:
# %load main.py
import os
import random

import torch
import numpy as np

from time import time
from tqdm import tqdm
from copy import deepcopy
import logging
from prettytable import PrettyTable

from utils.parser import parse_args
from utils.data_loader import load_data
from utils.evaluate import test
from utils.helper import early_stopping

n_users = 0
n_items = 0


def get_feed_dict(train_entity_pairs, train_pos_set, start, end, n_negs=1):

    def sampling(user_item, train_set, n):
        neg_items = []
        for user, _ in user_item.cpu().numpy():
            user = int(user)
            negitems = []
            for i in range(n):  # sample n times
                while True:
                    negitem = random.choice(range(n_items))
                    if negitem not in train_set[user]:
                        break
                negitems.append(negitem)
            neg_items.append(negitems)
        return neg_items

    feed_dict = {}
    entity_pairs = train_entity_pairs[start:end]
    feed_dict['users'] = entity_pairs[:, 0]
    feed_dict['pos_items'] = entity_pairs[:, 1]
    feed_dict['neg_items'] = torch.LongTensor(sampling(entity_pairs,
                                                       train_pos_set,
                                                       n_negs*K)).to(device)
    return feed_dict


if __name__ == '__main__':
    """fix the random seed"""
    seed = 2020
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    """read args"""
    global args, device
    args = parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0") if args.cuda else torch.device("cpu")

    """build dataset"""
    train_cf, user_dict, n_params, norm_mat = load_data(args)
    train_cf_size = len(train_cf)
    train_cf = torch.LongTensor(np.array([[cf[0], cf[1]] for cf in train_cf], np.int32))

    n_users = n_params['n_users']
    n_items = n_params['n_items']
    n_negs = args.n_negs
    K = args.K

    """define model"""
    from modules.LightGCN import LightGCN
    if args.gnn == 'lightgcn':
        model = LightGCN(n_params, args, norm_mat).to(device)

    """define optimizer"""
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    cur_best_pre_0 = 0
    stopping_step = 0
    should_stop = False

    print("start training ...")
    for epoch in range(args.epoch):
        # shuffle training data
        train_cf_ = train_cf
        index = np.arange(len(train_cf_))
        np.random.shuffle(index)
        train_cf_ = train_cf_[index].to(device)

        """training"""
        model.train()
        loss, s = 0, 0
        hits = 0
        train_s_t = time()
        while s + args.batch_size <= len(train_cf):
            batch = get_feed_dict(train_cf_,
                                  user_dict['train_user_set'],
                                  s, s + args.batch_size,
                                  n_negs)

            batch_loss, _, _ = model(epoch, batch)

            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

            loss += batch_loss
            s += args.batch_size

        train_e_t = time()

        if epoch % 5 == 0:
            """testing"""

            train_res = PrettyTable()
            train_res.field_names = ["Epoch", "training time(s)", "tesing time(s)", "Loss", "recall", "ndcg", "precision", "hit_ratio"]

            model.eval()
            test_s_t = time()
            test_ret = test(model, user_dict, n_params, mode='test')
            test_e_t = time()
            train_res.add_row(
                [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), test_ret['recall'], test_ret['ndcg'],
                 test_ret['precision'], test_ret['hit_ratio']])

            if user_dict['valid_user_set'] is None:
                valid_ret = test_ret
            else:
                test_s_t = time()
                valid_ret = test(model, user_dict, n_params, mode='valid')
                test_e_t = time()
                train_res.add_row(
                    [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), valid_ret['recall'], valid_ret['ndcg'],
                     valid_ret['precision'], valid_ret['hit_ratio']])
            print(train_res)

            # *********************************************************
            # early stopping when cur_best_pre_0 is decreasing for 10 successive steps.
            cur_best_pre_0, stopping_step, should_stop = early_stopping(valid_ret['recall'][0], cur_best_pre_0,
                                                                        stopping_step, expected_order='acc',
                                                                        flag_step=10)
            if should_stop:
                break

            """save weight"""
            if valid_ret['recall'][0] == cur_best_pre_0 and args.save:
                torch.save(model.state_dict(), args.out_dir + 'model_' + '.ckpt')
        else:
            # logging.info('training loss at epoch %d: %f' % (epoch, loss.item()))
            print('using time %.4fs, training loss at epoch %d: %.4f' % (train_e_t - train_s_t, epoch, loss.item()))

    print('early stopping at %d, recall@20:%.4f' % (epoch, cur_best_pre_0))


reading train and test user-item set ...
building the adj mat ...
{'n_users': 4626, 'n_items': 10083}
loading over ...
start training ...
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
|   0   | 0.47034788131713867 | 1.8267369270324707 | 14.540289878845215 | [0.02154832] | [0.01160856] | [0.00223974] | [0.03528529] |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.4846s, training loss at epoch 1: 14.4346
using time 0.4728s, training loss at epoch 2: 14.0251
using time 0.5142s, training loss at epoch 3: 13.1932
using time 0.5081s, training l

using time 0.4893s, training loss at epoch 46: 2.6081
using time 0.4725s, training loss at epoch 47: 2.5378
using time 0.4928s, training loss at epoch 48: 2.5520
using time 0.4796s, training loss at epoch 49: 2.4378
+-------+---------------------+--------------------+-------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)   |        Loss       |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+--------------------+-------------------+--------------+--------------+--------------+--------------+
|   50  | 0.49297547340393066 | 1.8141717910766602 | 2.435375928878784 | [0.04021117] | [0.02096495] | [0.00449199] | [0.07657658] |
+-------+---------------------+--------------------+-------------------+--------------+--------------+--------------+--------------+
using time 0.4739s, training loss at epoch 51: 2.3748
using time 0.4533s, training loss at epoch 52: 2.2830
using time 

using time 0.4851s, training loss at epoch 96: 1.2162
using time 0.4561s, training loss at epoch 97: 1.2394
using time 0.4238s, training loss at epoch 98: 1.2155
using time 0.5402s, training loss at epoch 99: 1.1800
+-------+--------------------+--------------------+-------------------+--------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)   |        Loss       |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+--------------------+-------------------+--------------+--------------+--------------+--------------+
|  100  | 0.4588742256164551 | 1.9226725101470947 | 1.210059642791748 | [0.04283721] | [0.02219167] | [0.00471722] | [0.07782783] |
+-------+--------------------+--------------------+-------------------+--------------+--------------+--------------+--------------+
using time 0.4737s, training loss at epoch 101: 1.1531
using time 0.4682s, training loss at epoch 102: 1.1935
using time 0.5

using time 0.6212s, training loss at epoch 146: 0.8034
using time 0.5009s, training loss at epoch 147: 0.7957
using time 0.6418s, training loss at epoch 148: 0.8082
using time 0.5347s, training loss at epoch 149: 0.7817
+-------+--------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)  |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
|  150  | 0.5315370559692383 | 2.535181999206543 | 0.7870030403137207 | [0.04364122] | [0.02269023] | [0.00469219] | [0.07507508] |
+-------+--------------------+-------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.4837s, training loss at epoch 151: 0.8038
using time 0.5778s, training loss at epoch 152: 0.7890
using time