In [1]:
GLOBAL_SEED = 42

import os
os.environ["PYTHONIOENCODING"] = "utf8"
os.environ['PYTHONHASHSEED'] = str(GLOBAL_SEED)
import sys
from glob import glob

import pandas as pd
import numpy as np
from numpy import random as np_rnd
import random as rnd
import shutil
import gc
import datetime
from collections import defaultdict, Counter
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
import time
import pickle
import sklearn as skl
from sklearn import model_selection

import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch.nn.functional as F
from torch.optim import AdamW, Adam, SparseAdam
from transformers import get_polynomial_decay_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.__version__


'2.1.2+cu118'

In [2]:
from torch_geometric.data import Data
from torch_geometric.utils import coalesce, is_undirected, to_undirected, sort_edge_index
from torch_geometric.sampler import BaseSampler
from torch_geometric.nn import GCNConv

In [3]:
class CFG:
    contentType_mapper = pd.Series(["clicks", "carts", "orders"], index=[0, 1, 2])
    target_weight = (0.1, 0.3, 0.6)
    
    n_folds = 3
    batch_size = 256
    epochs = 70
    early_stopping_rounds = 10
    eta = 5e-4
    weight_decay = 1e-4
    max_grad_norm = 1e+2
    embed_dim = 28

In [4]:
def pickleIO(obj, src, op="w"):
    if op=="w":
        with open(src, op + "b") as f:
            pickle.dump(obj, f)
    elif op=="r":
        with open(src, op + "b") as f:
            tmp = pickle.load(f)
        return tmp
    else:
        print("unknown operation")
        return obj

def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    # python random
    rnd.seed(seed)
    # numpy random
    np_rnd.seed(seed)
    # RAPIDS random
    try:
        cp.random.seed(seed)
    except:
        pass
    # tf random
    try:
        tf_rnd.set_seed(seed)
    except:
        pass
    # pytorch random
    try:
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
    except:
        pass

In [5]:
node_feature = pickleIO(None, "node_feature.pkl", "r")
node_feature = node_feature.to(device)
n_aids = node_feature.shape[0]

In [6]:
seed_everything()

edge_index = torch.tensor(pd.read_parquet("train_edge.parquet").values, dtype=torch.int64).T
shuffled_idx = torch.randperm(edge_index.shape[1])
edge_train = edge_index[:, shuffled_idx[: ((512 * 10000) * 1)]]
pickleIO(edge_train, "sampled_edge_train.pkl", "w")
edge_train = edge_train.to(device)

edge_index = torch.tensor(pd.read_parquet("valid_edge.parquet").values, dtype=torch.int64).T
shuffled_idx = torch.randperm(edge_index.shape[1])
edge_valid = edge_index[:, shuffled_idx[: ((512 * 2000) * 1)]]
pickleIO(edge_valid, "sampled_edge_valid.pkl", "w")
edge_valid = edge_valid.to(device)

del edge_index, shuffled_idx
torch.cuda.empty_cache()
gc.collect()

0

In [7]:
def get_optimizer_params(model, eta, weight_decay):
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        # áp dụng weight decay
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         'lr': eta, 'weight_decay': weight_decay},
        # không áp dụng với tầng chuẩn hóa/bias
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
         'lr': eta, 'weight_decay': 0.0},
    ]
    return optimizer_parameters

def get_scheduler(optimizer, num_warmup_steps, num_training_steps, power=0.5):
    scheduler = get_polynomial_decay_schedule_with_warmup(
        optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, power=power, lr_end=1e-7
    )
    return scheduler

class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count



In [8]:
class GCN(torch.nn.Module):
    def __init__(self, n_aids, embed_dim):
        super().__init__()
        self.aid_factors = nn.Embedding(n_aids, embed_dim, sparse=False)
        self.gcn = GCNConv(embed_dim, embed_dim)
        self.gcn_act = nn.ReLU()
        self.lin = nn.Linear(embed_dim, embed_dim)
    def forward(self, x, edge_index):
        x = self.aid_factors(x)
        x = self.gcn(x, edge_index=edge_index)
        x = self.gcn_act(x)
        x = self.lin(x)
        return x

In [9]:
def train_fn(fold, model, criterion, optimizer, scheduler, grad_scaler):
    model.train()
    metrics = {
        "loss": AverageMeter(),
        "accuracy": AverageMeter(),
    }
    
    with torch.cuda.amp.autocast():
        # Lấy embedding
        output = model(node_feature, edge_train)
        # Tính loss và accuracy
        output_neg = (output[edge_train[0]] * output[edge_train[1]][torch.randperm(len(edge_train[1]))]).sum(dim=-1)
        output_pos = (output[edge_train[0]] * output[edge_train[1]]).sum(dim=-1)
        loss = criterion(torch.cat([output_pos, output_neg]), torch.cat([torch.ones_like(output_pos), torch.zeros_like(output_neg)]))
    
    optimizer.zero_grad()
    
    grad_scaler.scale(loss).backward()
    grad_scaler.step(optimizer)
    grad_scaler.update()
    scheduler.step()

    metrics["loss"].update(loss.item())
    metrics["accuracy"].update(torch.cat([output_pos.flatten().sigmoid() >= 0.5, output_neg.flatten().sigmoid() < 0.5]).float().mean().item())        

    return metrics


def valid_fn(fold, model, criterion):
    model.eval()
    losses = AverageMeter()
    metrics = {
        "loss": AverageMeter(),
        "accuracy": AverageMeter(),
    }   
    
    with torch.no_grad():
        output = model(node_feature, edge_train)
        output_neg = (output[edge_valid[0]] * output[edge_valid[1]][torch.randperm(len(edge_valid[1]))]).sum(dim=-1)
        output_pos = (output[edge_valid[0]] * output[edge_valid[1]]).sum(dim=-1)
        loss = criterion(torch.cat([output_pos, output_neg]), torch.cat([torch.ones_like(output_pos), torch.zeros_like(output_neg)]))

    metrics["loss"].update(loss.item())
    metrics["accuracy"].update(torch.cat([output_pos.flatten().sigmoid() >= 0.5, output_neg.flatten().sigmoid() < 0.5]).float().mean().item())        

    return metrics


def infer_fn(data, model, force_to_cpu=True):
    model.to("cpu") if force_to_cpu else model.to(device)
    model.eval()
  
    with torch.no_grad():
        predictions = model(data.x, data.edge_index)

    return predictions


def do_fold_training(fold):
    seed_everything(fold)
    model = GCN(n_aids=n_aids, embed_dim=CFG.embed_dim).to(device)
    optimizer_parameters = get_optimizer_params(
        model,
        eta=CFG.eta,
        weight_decay=CFG.weight_decay
    )
    optimizer = AdamW(optimizer_parameters, lr=CFG.eta, weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=CFG.epochs
    )
    grad_scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None
    criterion = nn.BCEWithLogitsLoss(reduction="mean")
    best_score = np.inf
    
    early_stopping_cnt = 0
    for epoch in range(CFG.epochs):
        epoch_start_time = time.time()
        train_metrics = train_fn(fold, model, criterion, optimizer, scheduler, grad_scaler)
        valid_metrics = valid_fn(fold, model, criterion)

        score = valid_metrics["loss"].avg
        print("Epoch[{0}/{1}]\n train loss : {2}\n valid loss : {3}\n train accuracy : {4}\n valid accuracy : {5}\n eta : {6}\n Elapsed : {7}\n"
              .format(
                  epoch+1, CFG.epochs,
                  round(train_metrics["loss"].avg, 5), round(valid_metrics["loss"].avg, 5),
                  round(train_metrics["accuracy"].avg, 5), round(valid_metrics["accuracy"].avg, 5),
                  round(scheduler.get_lr()[0], 5), round(time.time() - epoch_start_time, 3)
              )
        )
        
        if score < best_score:
            best_score = score
            return_score_dic = {
                "fold": fold,
                "train_loss": train_metrics["loss"].avg,
                "valid_loss": valid_metrics["loss"].avg,
                "train_accuracy": train_metrics["accuracy"].avg,
                "valid_accuracy": valid_metrics["accuracy"].avg,
            }
            model_save_dic = {'model': model.state_dict()}
            early_stopping_cnt = 0
        else:
            early_stopping_cnt += 1
        
        if early_stopping_cnt == CFG.early_stopping_rounds:
            print("INFO : Early Stopped ! (Epoch[{0}/{1}])".format(epoch+1, CFG.epochs))
            break

    torch.save(
        model_save_dic,
        f"./model_fold{fold}_best.pth",
    )
    score_list.append(return_score_dic)

In [10]:
%%time

score_list = []

do_fold_training(0)

torch.cuda.empty_cache()
gc.collect()



Epoch[1/70]
 train loss : 0.71534
 valid loss : 0.73259
 train accuracy : 0.50356
 valid accuracy : 0.49989
 eta : 0.0005
 Elapsed : 26.009

Epoch[2/70]
 train loss : 0.71212
 valid loss : 0.72963
 train accuracy : 0.50412
 valid accuracy : 0.50015
 eta : 0.00049
 Elapsed : 25.059

Epoch[3/70]
 train loss : 0.70931
 valid loss : 0.72707
 train accuracy : 0.50481
 valid accuracy : 0.50013
 eta : 0.00049
 Elapsed : 24.81

Epoch[4/70]
 train loss : 0.70652
 valid loss : 0.7245
 train accuracy : 0.50551
 valid accuracy : 0.50022
 eta : 0.00049
 Elapsed : 24.581

Epoch[5/70]
 train loss : 0.70401
 valid loss : 0.72216
 train accuracy : 0.50629
 valid accuracy : 0.5004
 eta : 0.00048
 Elapsed : 24.641

Epoch[6/70]
 train loss : 0.70169
 valid loss : 0.7204
 train accuracy : 0.50712
 valid accuracy : 0.50056
 eta : 0.00048
 Elapsed : 24.541

Epoch[7/70]
 train loss : 0.69956
 valid loss : 0.7183
 train accuracy : 0.50809
 valid accuracy : 0.50071
 eta : 0.00047
 Elapsed : 24.771

Epoch[8/70]


80

In [11]:
print(score_list[0])

{'fold': 0, 'train_loss': 0.6723130941390991, 'valid_loss': 0.6995790600776672, 'train_accuracy': 0.5523701906204224, 'valid_accuracy': 0.5100537538528442}
