<a href="https://colab.research.google.com/github/Thuandang297/BigData/blob/main/NCKH_THUAN_AN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from Models.BUIR_ID import BUIR_ID
from Models.BUIR_NB import BUIR_NB

from Utils.data_loaders import ImplicitFeedback
from Utils.data_utils import load_dataset, build_adjmat
from Utils.evaluation import evaluate, print_eval_results

import torch
from torch.utils.data import DataLoader

import numpy as np
import argparse
import os, time

def run(args):
    
    path = os.path.join(args.path, args.dataset)
    # filename = 'users.dat'
    filename = 'testuser.dat'

    user_count, item_count, train_mat, valid_mat, test_mat = load_dataset(path, filename, train_ratio=args.train_ratio, random_seed=args.random_seed)
    train_dataset = ImplicitFeedback(user_count, item_count, train_mat)
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    # train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False)
    # print('Testmat:')
    # print(test_mat)
    # for t in train_loader:
    #     print(t)
    # for t in train_loader:
        # print(t)
    if args.model == 'buir-id':
        model = BUIR_ID(user_count, item_count, args.latent_size, args.momentum)
    elif args.model == 'buir-nb':
        # norm_adjmat = build_adjmat(user_count, item_count, train_mat, selfloop_flag=False)
        norm_adjmat = build_adjmat(user_count, item_count, train_mat, selfloop_flag=False)
        model = BUIR_NB(user_count, item_count, args.latent_size, norm_adjmat, args.momentum, n_layers=args.n_layers, drop_flag=args.drop_flag)

    model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    best_score = -np.inf
    early_stop_cnt = 0

    for epoch in range(args.max_epochs):
        tic1 = time.time()
        
        train_loss = []
        for batch in train_loader:
            batch = {key: value.cuda() for key, value in batch.items()} 

            # Forward
            model.train()
            output = model(batch)
            batch_loss = model.get_loss(output)
            train_loss.append(batch_loss)

            # Backward and optimize
            optimizer.zero_grad()
            batch_loss.backward()
            
            optimizer.step()
            
            model._update_target()

        train_loss = torch.mean(torch.stack(train_loss)).data.cpu().numpy()
        toc1 = time.time()

        if epoch % 10 == 0:
            # is_improved = False

            model.eval()
            with torch.no_grad():
                tic2 = time.time()
                eval_results = evaluate(model, train_loader, train_mat, valid_mat, test_mat)
                toc2 = time.time()

            if eval_results['valid']['P50'] > best_score:
                # is_improved = True
                best_score = eval_results['valid']['P50']
                valid_result = eval_results['valid']
                test_result = eval_results['test']
            
                print('Epoch [{}/{}]'.format(epoch, args.max_epochs))
                print('Training Loss: {:.4f}, Elapsed Time for Training: {:.2f}s, for Testing: {:.2f}s\n'.format(train_loss, toc1-tic1, toc2-tic2))
                print_eval_results(eval_results)

            else:
                early_stop_cnt += 1
                if early_stop_cnt == args.early_stop: break
                
    print('===== [FINAL PERFORMANCE] =====\n')
    print_eval_results({'valid': valid_result, 'test': test_result})

def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Invalid boolean value')

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # parser.add_argument('--model', type=str, default='buir-id', help='buir-id | buir-nb')
    parser.add_argument('--model', type=str, default='buir-nb', help='buir-id | buir-nb')
    parser.add_argument('--latent_size', type=int, default=250)
    parser.add_argument('--n_layers', type=int, default=3)
    parser.add_argument('--drop_flag', type=str2bool, default=False)

    # parser.add_argument('--batch_size', type=int, default=1035)
    parser.add_argument('--batch_size', type=int, default=10024)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--weight_decay', type=float, default=0.0001)
    parser.add_argument('--momentum', type=float, default=0.995)
    parser.add_argument('--max_epochs', type=int, default=1000)
    parser.add_argument('--early_stop', type=int, default=5)
    
    parser.add_argument('--path', type=str, default='./Data/')
    parser.add_argument('--dataset', type=str, default='toy-dataset')
    parser.add_argument('--train_ratio', type=float, default=0.5)
    parser.add_argument('--random_seed', type=int, default=0)

    parser.add_argument('--gpu', type=int, default=0)
    
    # args = parser.parse_args()
    args, unknown = parser.parse_known_args()
     
    # GPU setting
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    
    # Random seed initialization
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)

    run(args)



{0: 4, 1: 24, 2: 23, 3: 3, 4: 29, 5: 4, 6: 17, 7: 4, 8: 7, 9: 4, 10: 12, 11: 7, 12: 3, 13: 4, 14: 73, 15: 54, 16: 4, 17: 7, 18: 15, 19: 6, 20: 7, 21: 7, 22: 3, 23: 9, 24: 10, 25: 4, 26: 3, 27: 8, 28: 3, 29: 10, 30: 4, 31: 6, 32: 3, 33: 8, 34: 4, 35: 28, 36: 8, 37: 3, 38: 3, 39: 5, 40: 8, 41: 30, 42: 6, 43: 4, 44: 4, 45: 15, 46: 10, 47: 3, 48: 9, 49: 3, 50: 35, 51: 11, 52: 4, 53: 7, 54: 6, 55: 3, 56: 3, 57: 3, 58: 4, 59: 12, 60: 5, 61: 8, 62: 6, 63: 3, 64: 5, 65: 94, 66: 4, 67: 4, 68: 3, 69: 3, 70: 5, 71: 3, 72: 8, 73: 19, 74: 6, 75: 5, 76: 19, 77: 16, 78: 4, 79: 4, 80: 4, 81: 3, 82: 4, 83: 3, 84: 4, 85: 3, 86: 13, 87: 3, 88: 11, 89: 3, 90: 5, 91: 20, 92: 4, 93: 4, 94: 9, 95: 10, 96: 3, 97: 3, 98: 5, 99: 5, 100: 107, 101: 4, 102: 3, 103: 7, 104: 3, 105: 3, 106: 3, 107: 10, 108: 7, 109: 3, 110: 9, 111: 4, 112: 11, 113: 5, 114: 13, 115: 7, 116: 5, 117: 57, 118: 4, 119: 16, 120: 3, 121: 22, 122: 4, 123: 4, 124: 4, 125: 4, 126: 19, 127: 3, 128: 6, 129: 5, 130: 8, 131: 5, 132: 9, 133: 11, 13

  d_inv_sqrt = np.power(rowsum, -0.5).flatten()


Epoch [0/1000]
Training Loss: 3.9473, Elapsed Time for Training: 1.55s, for Testing: 26.60s

VALID P@10: 0.0028, R@10: 0.0024, N@10: 0.0024
VALID P@20: 0.0041, R@20: 0.0037, N@20: 0.0028
VALID P@50: 0.0089, R@50: 0.0087, N@50: 0.0040

TEST  P@10: 0.0018, R@10: 0.0016, N@10: 0.0013
TEST  P@20: 0.0027, R@20: 0.0024, N@20: 0.0016
TEST  P@50: 0.0085, R@50: 0.0083, N@50: 0.0031

Epoch [10/1000]
Training Loss: 1.4724, Elapsed Time for Training: 1.43s, for Testing: 26.93s

VALID P@10: 0.1004, R@10: 0.0957, N@10: 0.0733
VALID P@20: 0.1383, R@20: 0.1363, N@20: 0.0847
VALID P@50: 0.2036, R@50: 0.2031, N@50: 0.1014

TEST  P@10: 0.1020, R@10: 0.0975, N@10: 0.0745
TEST  P@20: 0.1367, R@20: 0.1348, N@20: 0.0850
TEST  P@50: 0.2026, R@50: 0.2021, N@50: 0.1022

Epoch [20/1000]
Training Loss: 1.1516, Elapsed Time for Training: 1.43s, for Testing: 26.85s

VALID P@10: 0.1173, R@10: 0.1122, N@10: 0.0859
VALID P@20: 0.1597, R@20: 0.1576, N@20: 0.0986
VALID P@50: 0.2315, R@50: 0.2310, N@50: 0.1172

TEST  P@1