In [1]:
# %load main.py
import os
import random

import torch
import numpy as np

from time import time
from tqdm import tqdm
from copy import deepcopy
import logging
from prettytable import PrettyTable

from utils.parser import parse_args
from utils.data_loader import load_data
from utils.evaluate import test
from utils.helper import early_stopping

n_users = 0
n_items = 0


def get_feed_dict(train_entity_pairs, train_pos_set, start, end, n_negs=1):

    def sampling(user_item, train_set, n):
        neg_items = []
        for user, _ in user_item.cpu().numpy():
            user = int(user)
            negitems = []
            for i in range(n):  # sample n times
                while True:
                    negitem = random.choice(range(n_items))
                    if negitem not in train_set[user]:
                        break
                negitems.append(negitem)
            neg_items.append(negitems)
        return neg_items

    feed_dict = {}
    entity_pairs = train_entity_pairs[start:end]
    feed_dict['users'] = entity_pairs[:, 0]
    feed_dict['pos_items'] = entity_pairs[:, 1]
    feed_dict['neg_items'] = torch.LongTensor(sampling(entity_pairs,
                                                       train_pos_set,
                                                       n_negs*K)).to(device)
    return feed_dict


if __name__ == '__main__':
    """fix the random seed"""
    seed = 2020
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    """read args"""
    global args, device
    args = parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    device = torch.device("cuda:0") if args.cuda else torch.device("cpu")

    """build dataset"""
    train_cf, user_dict, n_params, norm_mat = load_data(args)
    train_cf_size = len(train_cf)
    train_cf = torch.LongTensor(np.array([[cf[0], cf[1]] for cf in train_cf], np.int32))

    n_users = n_params['n_users']
    n_items = n_params['n_items']
    n_negs = args.n_negs
    K = args.K

    """define model"""
    from modules.LightGCN import LightGCN
    if args.gnn == 'lightgcn':
        model = LightGCN(n_params, args, norm_mat).to(device)

    """define optimizer"""
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    cur_best_pre_0 = 0
    stopping_step = 0
    should_stop = False

    print("start training ...")
    for epoch in range(args.epoch):
        # shuffle training data
        train_cf_ = train_cf
        index = np.arange(len(train_cf_))
        np.random.shuffle(index)
        train_cf_ = train_cf_[index].to(device)

        """training"""
        model.train()
        loss, s = 0, 0
        hits = 0
        train_s_t = time()
        while s + args.batch_size <= len(train_cf):
            batch = get_feed_dict(train_cf_,
                                  user_dict['train_user_set'],
                                  s, s + args.batch_size,
                                  n_negs)

            batch_loss, _, _ = model(epoch, batch)

            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

            loss += batch_loss
            s += args.batch_size

        train_e_t = time()

        if epoch % 5 == 0:
            """testing"""

            train_res = PrettyTable()
            train_res.field_names = ["Epoch", "training time(s)", "tesing time(s)", "Loss", "recall", "ndcg", "precision", "hit_ratio"]

            model.eval()
            test_s_t = time()
            test_ret = test(model, user_dict, n_params, mode='test')
            test_e_t = time()
            train_res.add_row(
                [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), test_ret['recall'], test_ret['ndcg'],
                 test_ret['precision'], test_ret['hit_ratio']])

            if user_dict['valid_user_set'] is None:
                valid_ret = test_ret
            else:
                test_s_t = time()
                valid_ret = test(model, user_dict, n_params, mode='valid')
                test_e_t = time()
                train_res.add_row(
                    [epoch, train_e_t - train_s_t, test_e_t - test_s_t, loss.item(), valid_ret['recall'], valid_ret['ndcg'],
                     valid_ret['precision'], valid_ret['hit_ratio']])
            print(train_res)

            # *********************************************************
            # early stopping when cur_best_pre_0 is decreasing for 10 successive steps.
            cur_best_pre_0, stopping_step, should_stop = early_stopping(valid_ret['recall'][0], cur_best_pre_0,
                                                                        stopping_step, expected_order='acc',
                                                                        flag_step=10)
            if should_stop:
                break

            """save weight"""
            if valid_ret['recall'][0] == cur_best_pre_0 and args.save:
                torch.save(model.state_dict(), args.out_dir + 'model_' + '.ckpt')
        else:
            # logging.info('training loss at epoch %d: %f' % (epoch, loss.item()))
            print('using time %.4fs, training loss at epoch %d: %.4f' % (train_e_t - train_s_t, epoch, loss.item()))

    print('early stopping at %d, recall@20:%.4f' % (epoch, cur_best_pre_0))


reading train and test user-item set ...
building the adj mat ...
{'n_users': 7670, 'n_items': 10887}
loading over ...
start training ...
+-------+--------------------+-------------------+------------------+--------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)  |       Loss       |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+-------------------+------------------+--------------+--------------+--------------+--------------+
|   0   | 0.7475862503051758 | 2.922084331512451 | 21.4107723236084 | [0.01504579] | [0.00814599] | [0.00237229] | [0.03526942] |
+-------+--------------------+-------------------+------------------+--------------+--------------+--------------+--------------+
using time 0.7197s, training loss at epoch 1: 20.5793
using time 0.6935s, training loss at epoch 2: 18.5607
using time 0.6713s, training loss at epoch 3: 16.3386
using time 0.6548s, training loss at epoch 4: 14.7

using time 0.7055s, training loss at epoch 46: 3.0338
using time 0.7239s, training loss at epoch 47: 2.9695
using time 0.7103s, training loss at epoch 48: 2.8998
using time 0.7053s, training loss at epoch 49: 2.8343
+-------+--------------------+--------------------+-------------------+--------------+------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)   |        Loss       |    recall    |    ndcg    |  precision   |  hit_ratio   |
+-------+--------------------+--------------------+-------------------+--------------+------------+--------------+--------------+
|   50  | 0.7137954235076904 | 2.9489128589630127 | 2.712573528289795 | [0.03479958] | [0.019296] | [0.00503149] | [0.07879636] |
+-------+--------------------+--------------------+-------------------+--------------+------------+--------------+--------------+
using time 0.7762s, training loss at epoch 51: 2.7289
using time 0.6709s, training loss at epoch 52: 2.6409
using time 0.7031s, traini

using time 0.7090s, training loss at epoch 96: 1.2314
using time 0.6832s, training loss at epoch 97: 1.2264
using time 0.7177s, training loss at epoch 98: 1.2260
using time 0.6877s, training loss at epoch 99: 1.2109
+-------+--------------------+-------------------+-------------------+--------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)  |        Loss       |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+-------------------+-------------------+--------------+--------------+--------------+--------------+
|  100  | 0.7056469917297363 | 2.972174644470215 | 1.177783727645874 | [0.03602409] | [0.02016001] | [0.00512946] | [0.07977607] |
+-------+--------------------+-------------------+-------------------+--------------+--------------+--------------+--------------+
using time 0.7409s, training loss at epoch 101: 1.2027
using time 0.6978s, training loss at epoch 102: 1.1461
using time 0.6708s,

using time 0.7040s, training loss at epoch 146: 0.7453
using time 0.7063s, training loss at epoch 147: 0.7552
using time 0.7247s, training loss at epoch 148: 0.7315
using time 0.7033s, training loss at epoch 149: 0.7322
+-------+--------------------+-------------------+--------------------+-------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)  |        Loss        |    recall   |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+-------------------+--------------------+-------------+--------------+--------------+--------------+
|  150  | 0.6892476081848145 | 3.000887393951416 | 0.7160650491714478 | [0.0361701] | [0.02033787] | [0.00522743] | [0.07935619] |
+-------+--------------------+-------------------+--------------------+-------------+--------------+--------------+--------------+
using time 0.7307s, training loss at epoch 151: 0.7152
using time 0.6862s, training loss at epoch 152: 0.7132
using time 0.67

using time 0.8408s, training loss at epoch 196: 0.5329
using time 0.8428s, training loss at epoch 197: 0.5275
using time 0.7708s, training loss at epoch 198: 0.5335
using time 0.7033s, training loss at epoch 199: 0.5208
+-------+--------------------+--------------------+--------------------+------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)   |        Loss        |   recall   |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+--------------------+--------------------+------------+--------------+--------------+--------------+
|  200  | 0.7057132720947266 | 2.9764344692230225 | 0.5392257571220398 | [0.037488] | [0.02050166] | [0.00529741] | [0.08047586] |
+-------+--------------------+--------------------+--------------------+------------+--------------+--------------+--------------+
using time 0.7911s, training loss at epoch 201: 0.5141
using time 0.7254s, training loss at epoch 202: 0.5109
using time 0.87

using time 0.7315s, training loss at epoch 246: 0.4293
using time 0.6979s, training loss at epoch 247: 0.4092
using time 0.7122s, training loss at epoch 248: 0.4158
using time 0.6812s, training loss at epoch 249: 0.4338
+-------+--------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
| Epoch |  training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio   |
+-------+--------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
|  250  | 0.7115204334259033 | 3.0247671604156494 | 0.3955393135547638 | [0.03764883] | [0.02015375] | [0.00526942] | [0.07977607] |
+-------+--------------------+--------------------+--------------------+--------------+--------------+--------------+--------------+
using time 0.7572s, training loss at epoch 251: 0.4169
using time 0.7046s, training loss at epoch 252: 0.4146
using

using time 0.6761s, training loss at epoch 296: 0.3316
using time 0.6753s, training loss at epoch 297: 0.3687
using time 0.7153s, training loss at epoch 298: 0.3650
using time 0.6874s, training loss at epoch 299: 0.3397
+-------+--------------------+--------------------+--------------------+--------------+--------------+--------------+-------------+
| Epoch |  training time(s)  |   tesing time(s)   |        Loss        |    recall    |     ndcg     |  precision   |  hit_ratio  |
+-------+--------------------+--------------------+--------------------+--------------+--------------+--------------+-------------+
|  300  | 0.7071404457092285 | 3.1493520736694336 | 0.3558851480484009 | [0.03765801] | [0.02021433] | [0.00526242] | [0.0786564] |
+-------+--------------------+--------------------+--------------------+--------------+--------------+--------------+-------------+
using time 0.7028s, training loss at epoch 301: 0.3524
using time 0.6797s, training loss at epoch 302: 0.3462
using time