In [1]:
# %load main.py
import os
import random

import torch
import numpy as np

from time import time
from tqdm import tqdm
from copy import deepcopy
import logging
from prettytable import PrettyTable

from utils.parser import parse_args
from utils.data_loader import load_data
from utils.evaluate import test
from utils.helper import early_stopping

n_users = 0
n_items = 0


def get_feed_dict(train_entity_pairs, train_pos_set, start, end, n_negs=1):

    def sampling(user_item, train_set, n):
        neg_items = []
        for user, _ in user_item.cpu().numpy():
            user = int(user)
            negitems = []
            for i in range(n):  # sample n times
                while True:
                    negitem = random.choice(range(n_items))
                    if negitem not in train_set[user]:
                        break
                negitems.append(negitem)
            neg_items.append(negitems)
        return neg_items

    feed_dict = {}
    entity_pairs = train_entity_pairs[start:end]
    feed_dict['users'] = entity_pairs[:, 0]
    feed_dict['pos_items'] = entity_pairs[:, 1]
    feed_dict['neg_items'] = torch.LongTensor(sampling(entity_pairs,
                                                       train_pos_set,
                                                       n_negs*K)).to(device)
    return feed_dict


if __name__ == '__main__':
    """fix the random seed"""
    seed = 2020
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    """read args"""
    global args, device
    args = parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0") if args.cuda else torch.device("cpu")

    """build dataset"""
    train_cf, user_dict, n_params, norm_mat = load_data(args)
    train_cf_size = len(train_cf)
    train_cf = torch.LongTensor(np.array([[cf[0], cf[1]] for cf in train_cf], np.int32))

    n_users = n_params['n_users']
    n_items = n_params['n_items']
    n_negs = args.n_negs
    K = args.K

    """define model"""
    from modules.LightGCN import LightGCN
    if args.gnn == 'lightgcn':
        model = LightGCN(n_params, args, norm_mat).to(device)

    """define optimizer"""
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    cur_best_pre_0 = 0
    stopping_step = 0
    should_stop = False

    print("start training ...")
    for epoch in range(args.epoch):
        # shuffle training data
        train_cf_ = train_cf
        index = np.arange(len(train_cf_))
        np.random.shuffle(index)
        train_cf_ = train_cf_[index].to(device)

        """training"""
        model.train()
        loss, s = 0, 0
        hits = 0
        train_s_t = time()
        while s + args.batch_size <= len(train_cf):
            batch = get_feed_dict(train_cf_,
                                  user_dict['train_user_set'],
                                  s, s + args.batch_size,
                                  n_negs)

            batch_loss, _, _ = model(epoch, batch)

            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

            loss += batch_loss
            s += args.batch_size

        train_e_t = time()

        if epoch % 5 == 0:
            """testing"""

            train_res = PrettyTable()
            train_res.field_names = ["Epoch", "training time(s)", "tesing time(s)", "Loss", "recall", "ndcg", "precision", "hit_ratio"]

            model.eval()
            test_s_t = time()
            test_ret = test(model, user_dict, n_params, mode='test')
            test_e_t = time()
            train_res.add_row(
                [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), test_ret['recall'], test_ret['ndcg'],
                 test_ret['precision'], test_ret['hit_ratio']])

            if user_dict['valid_user_set'] is None:
                valid_ret = test_ret
            else:
                test_s_t = time()
                valid_ret = test(model, user_dict, n_params, mode='valid')
                test_e_t = time()
                train_res.add_row(
                    [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), valid_ret['recall'], valid_ret['ndcg'],
                     valid_ret['precision'], valid_ret['hit_ratio']])
            print(train_res)

            # *********************************************************
            # early stopping when cur_best_pre_0 is decreasing for 10 successive steps.
            cur_best_pre_0, stopping_step, should_stop = early_stopping(valid_ret['recall'][0], cur_best_pre_0,
                                                                        stopping_step, expected_order='acc',
                                                                        flag_step=10)
            if should_stop:
                break

            """save weight"""
            if valid_ret['recall'][0] == cur_best_pre_0 and args.save:
                torch.save(model.state_dict(), args.out_dir + 'model_' + '.ckpt')
        else:
            # logging.info('training loss at epoch %d: %f' % (epoch, loss.item()))
            print('using time %.4fs, training loss at epoch %d: %.4f' % (train_e_t - train_s_t, epoch, loss.item()))

    print('early stopping at %d, recall@20:%.4f' % (epoch, cur_best_pre_0))


reading train and test user-item set ...
building the adj mat ...
{'n_users': 640, 'n_items': 4165}
loading over ...
start training ...
+-------+---------------------+-------------------+-------------------+--------------+--------------+--------------+-------------+
| Epoch |   training time(s)  |   tesing time(s)  |        Loss       |    recall    |     ndcg     |  precision   |  hit_ratio  |
+-------+---------------------+-------------------+-------------------+--------------+--------------+--------------+-------------+
|   0   | 0.27576303482055664 | 2.391097068786621 | 4.157106876373291 | [0.01403065] | [0.01255436] | [0.01078261] | [0.1026087] |
+-------+---------------------+-------------------+-------------------+--------------+--------------+--------------+-------------+
using time 0.4458s, training loss at epoch 1: 4.1535
using time 0.4973s, training loss at epoch 2: 4.1473
using time 0.5312s, training loss at epoch 3: 4.1380
using time 0.4206s, training loss at epoch 4: 4.12

using time 0.3935s, training loss at epoch 46: 2.5990
using time 0.3312s, training loss at epoch 47: 2.5640
using time 0.2568s, training loss at epoch 48: 2.5567
using time 0.2849s, training loss at epoch 49: 2.5338
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
|   50  | 0.27422142028808594 | 2.4628729820251465 | 2.5026838779449463 | [0.09340186] | [0.08621054] | [0.04721739] | [0.32173913] |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.4185s, training loss at epoch 51: 2.5132
using time 0.5326s, training loss at epoch 52: 2.4972
using 

using time 0.3272s, training loss at epoch 96: 1.9181
using time 0.2674s, training loss at epoch 97: 1.9173
using time 0.2756s, training loss at epoch 98: 1.8737
using time 0.2969s, training loss at epoch 99: 1.8953
+-------+--------------------+--------------------+--------------------+--------------+--------------+-------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision  |  hit_ratio   |
+-------+--------------------+--------------------+--------------------+--------------+--------------+-------------+--------------+
|  100  | 0.2855873107910156 | 1.5228002071380615 | 1.8718390464782715 | [0.09737241] | [0.09083125] | [0.0513913] | [0.33565217] |
+-------+--------------------+--------------------+--------------------+--------------+--------------+-------------+--------------+
using time 0.2792s, training loss at epoch 101: 1.8815
using time 0.3014s, training loss at epoch 102: 1.8370
using time 0.3

using time 0.5007s, training loss at epoch 146: 1.4335
using time 0.3213s, training loss at epoch 147: 1.4104
using time 0.2295s, training loss at epoch 148: 1.4110
using time 0.2997s, training loss at epoch 149: 1.3955
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
|  150  | 0.24651527404785156 | 1.8780498504638672 | 1.3883250951766968 | [0.10305894] | [0.09839716] | [0.05547826] | [0.34434783] |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.4046s, training loss at epoch 151: 1.3943
using time 0.3547s, training loss at epoch 152: 1.3561


using time 0.2983s, training loss at epoch 196: 1.0657
using time 0.2272s, training loss at epoch 197: 1.0611
using time 0.3495s, training loss at epoch 198: 1.0524
using time 0.5410s, training loss at epoch 199: 1.0622
+-------+---------------------+--------------------+--------------------+--------------+--------------+-----------+--------------+
| Epoch |   training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     | precision |  hit_ratio   |
+-------+---------------------+--------------------+--------------------+--------------+--------------+-----------+--------------+
|  200  | 0.49773406982421875 | 2.4710826873779297 | 1.0514805316925049 | [0.10732894] | [0.10079624] |  [0.056]  | [0.34782609] |
+-------+---------------------+--------------------+--------------------+--------------+--------------+-----------+--------------+
using time 0.3185s, training loss at epoch 201: 1.0731
using time 0.2711s, training loss at epoch 202: 1.0439
using time 0.33

using time 0.3407s, training loss at epoch 246: 0.8798
using time 0.2738s, training loss at epoch 247: 0.8720
using time 0.3710s, training loss at epoch 248: 0.8299
using time 0.2095s, training loss at epoch 249: 0.8184
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |   training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
|  250  | 0.21085858345031738 | 1.8385684490203857 | 0.8579568862915039 | [0.10585393] | [0.10118133] | [0.05634783] | [0.34956522] |
+-------+---------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.3666s, training loss at epoch 251: 0.8309
using time 0.3493s, training loss at epoch 252: 0.7976
