In [1]:
import numpy as np
import pandas as pd
import csv
import random
import time
import matplotlib.pyplot as plt
from tqdm import tqdm
from pickle import load, dump
from scipy.special import softmax

In [2]:
# raw_data_path = './Data/tbrain_cc_training_48tags_hash_final.csv'
raw_data_path = './Data/chid/10181291.csv'
df = pd.read_csv(raw_data_path)
df = df.replace(np.nan, 0.0)

In [3]:
df.at[df['shop_tag'] == 'other', 'shop_tag'] = '0'
df['shop_tag'] = df['shop_tag'].astype('int8')

predictable_classes = np.array([2,6,10,12,13,15,18,19,21,22,25,26,36,37,39,48])
class2idx = np.zeros(49, dtype=int)
i = 0
for c in predictable_classes:
    class2idx[c] = i
    i += 1
predictable_classes_sort_by_freq = np.array([37, 15, 36, 10, 2, 48, 12, 19, 25, 6, 18, 13, 22, 39, 21, 26])
predictable_classes_sort_by_amt = np.array([39, 10, 2, 37, 15, 36, 48, 19, 12, 6, 18, 26, 25, 21, 13, 22])

# consumer_ids, num_ids = np.unique(df['chid'], return_counts=True)

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset

In [5]:
class LoadDataset(Dataset):
    def __init__(self, chid, less_class=False):
        self.chid = chid
        self.less_class = less_class
        
    def __len__(self):
        return len(self.chid)
    
    def __getitem__(self, _idx):
#         column = ['masts', 'educd', 'trdtp', 'naty', 'poscd', 'cuorg', 'gender_code', 'age', 'primary_card']
#         column = ['masts', 'educd', 'trdtp', 'naty', 'poscd', 'cuorg', 'slam', 'gender_code', 'age', 'primary_card']
        column = ['masts', 'educd', 'naty', 'gender_code', 'age', 'primary_card']
        try:
            df_id = pd.read_csv('Data/chid/'+str(self.chid[_idx])+'.csv')
        except:
            print(_idx)
        data = np.zeros((24,49,10))
        for i in range(1,24+1):
            idx = (df_id['dt'] == i)
            df_id_idx = df_id[idx]
            shop_tag = df_id_idx['shop_tag'].to_numpy()
            txn_amt = df_id_idx['txn_amt'].to_numpy()
            txn_cnt = df_id_idx['txn_cnt'].to_numpy()
            domestic_offline_cnt = df_id_idx['domestic_offline_cnt'].to_numpy()
            domestic_online_cnt = df_id_idx['domestic_online_cnt'].to_numpy()
            overseas_offline_cnt = df_id_idx['overseas_offline_cnt'].to_numpy()
            overseas_online_cnt = df_id_idx['overseas_online_cnt'].to_numpy()
            domestic_offline_amt_pct = df_id_idx['domestic_offline_amt_pct'].to_numpy()
            domestic_online_amt_pct = df_id_idx['domestic_online_amt_pct'].to_numpy()
            overseas_offline_amt_pct = df_id_idx['overseas_offline_amt_pct'].to_numpy()
            overseas_online_amt_pct = df_id_idx['overseas_online_amt_pct'].to_numpy()
            data[i-1,shop_tag,0] = txn_amt
            data[i-1,shop_tag,1] = txn_cnt
            data[i-1,shop_tag,2] = domestic_offline_cnt
            data[i-1,shop_tag,3] = domestic_online_cnt
            data[i-1,shop_tag,4] = overseas_offline_cnt
            data[i-1,shop_tag,5] = overseas_online_cnt
            data[i-1,shop_tag,6] = domestic_offline_amt_pct
            data[i-1,shop_tag,7] = domestic_online_amt_pct
            data[i-1,shop_tag,8] = overseas_offline_amt_pct
            data[i-1,shop_tag,9] = overseas_online_amt_pct
            
        if self.less_class:
            data = data[:, predictable_classes]
            class_num = len(predictable_classes)
        else:
            class_num = 49
        
        chid_data = np.tile(df_id.iloc[-1:][column].to_numpy(), (24, 1))
        return data, chid_data

In [6]:
# loaddataset = LoadDataset(np.arange(10000000, 10500000), less_class=True)
# loader = DataLoader(dataset=loaddataset, batch_size=500, shuffle=False, num_workers=40)

# data_all = []
# chid_data_all = []
# for data, chid_data in tqdm(loader):
#     data_all.append(data)
#     chid_data_all.append(chid_data)
    
# data_all = np.concatenate(data_all, axis=0)
# chid_data_all = np.concatenate(chid_data_all, axis=0)
# print(data_all.shape, chid_data_all.shape)

# np.save('Data/data_all.npy', data_all)
# np.save('Data/chid_data_all.npy', chid_data_all)

In [7]:
class CustomDataset(Dataset):
    def __init__(self, chid, less_class=False):
        self.chid = chid
        self.less_class = less_class
        
    def __len__(self):
        return len(self.chid)
    
    def __getitem__(self, _idx):
        data = data_all[self.chid[_idx]-10000000].copy()[:,:,:6]
        chid_data = chid_data_all[self.chid[_idx]-10000000].copy()
        norm_pred = norm_pred_all[self.chid[_idx]-10000000].copy()
        
        output_data = data[:, :, 0].copy()
        sum_data = np.cumsum(data[:,:,0], axis=0)
        norm_data = data[:,:,0]/(np.sum(data[:,:,0], axis=1, keepdims=True) + 0.000001)
        norm_cnt_data = data[:,:,1]/(np.sum(np.abs(data[:,:,1]), axis=1, keepdims=True) + 0.000001)
#         norm_doff_data = data[:,:,2]/(np.sum(np.abs(data[:,:,2]), axis=1, keepdims=True) + 0.000001)
#         norm_donl_data = data[:,:,3]/(np.sum(np.abs(data[:,:,3]), axis=1, keepdims=True) + 0.000001)
#         norm_ooff_data = data[:,:,4]/(np.sum(np.abs(data[:,:,4]), axis=1, keepdims=True) + 0.000001)
#         norm_oonl_data = data[:,:,5]/(np.sum(np.abs(data[:,:,5]), axis=1, keepdims=True) + 0.000001)
        data = np.sign(data)*np.log(np.abs(data)+1)

        rank = np.zeros(output_data.shape)
        for i in range(output_data.shape[0]):
            idx = np.where(output_data[i] != 0)[0]
            _rank = np.argsort(output_data[i, idx]).argsort()
            _rank = np.clip(_rank-len(_rank)+3, 0, None)
            rank[i, idx] = _rank+12
        rank_output = rank[1:]
        input_data = data[:,:,:6].reshape((len(data), -1))

        sum_data_rank = np.zeros(sum_data.shape)
        for i in range(sum_data.shape[0]):
            idx = np.where(sum_data[i] != 0)[0]
            _rank = np.argsort(sum_data[i, idx]).argsort()
            sum_data_rank[i, idx] = _rank+1
        sum_data[sum_data[:,:] > 0] = np.log(sum_data[sum_data[:,:] > 0])
#         do_data = np.hstack([norm_doff_data, norm_donl_data, norm_ooff_data, norm_oonl_data])
        input_data = np.hstack([input_data, rank, sum_data_rank])
#         chid_data = np.log(chid_data+1)
        input_data = np.hstack([input_data, norm_data, norm_cnt_data, chid_data])
        return torch.Tensor(input_data), torch.Tensor(output_data[1:]), torch.Tensor(rank_output), torch.Tensor(norm_pred)

In [8]:
data_all = np.load('Data/data_all.npy')
chid_data_all = np.load('Data/chid_data_all.npy')
# chid_data_all = np.load('Data/chid_data_process.npy')

In [9]:
norm_pred_all = np.load('726603_ensem.npy')

In [10]:
data_all.shape, chid_data_all.shape

((500000, 24, 16, 10), (500000, 24, 6))

In [11]:
small_dataset = CustomDataset(np.arange(10000000, 10100000), less_class=True)
small_valid_dataset = CustomDataset(np.arange(10000000, 10050000), less_class=True)
small_test_dataset = CustomDataset(np.arange(10050000, 10100000), less_class=True)
small_loader = DataLoader(dataset=small_dataset, batch_size=500, shuffle=True, num_workers=10)
small_valid_loader = DataLoader(dataset=small_valid_dataset, batch_size=500, shuffle=False, num_workers=10)
small_test_loader = DataLoader(dataset=small_test_dataset, batch_size=500, shuffle=False, num_workers=10)

In [12]:
dataset = CustomDataset(np.arange(10000000, 10500000), less_class=True)
loader = DataLoader(dataset=dataset, batch_size=500, shuffle=True, num_workers=10)
test_loader = DataLoader(dataset=dataset, batch_size=500, shuffle=False, num_workers=10)

In [13]:
import time
t = time.time()
xx, yy, zz, ww = next(iter(small_loader))
print(xx.shape, yy.shape, zz.shape, ww.shape, time.time()-t)

torch.Size([500, 24, 166]) torch.Size([500, 23, 16]) torch.Size([500, 23, 16]) torch.Size([500, 16]) 1.7437705993652344


In [14]:
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(RNN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.rnn = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_dim, hidden_dim*4)
        self.out = nn.Linear(hidden_dim*4, output_dim)
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax(dim=2)
        self.dropout = torch.nn.Dropout(0.4)

    def forward(self, x):
        in_dim = x.shape[0]
        t_dim = x.shape[1]
        out, (h_n, h_c) = self.rnn(x, None)
        
        out = out.reshape((in_dim*t_dim, self.hidden_dim))
        out = self.relu(self.fc(out))
        out = self.dropout(out)
        out = self.out(out)
        out = out.reshape((in_dim, t_dim, self.output_dim))
        out = self.softmax(out)
        return out

In [15]:
from sklearn.metrics import ndcg_score
def ndcg_s(y, output):
    y = y.cpu().numpy()
    output = output.cpu().detach().numpy()
    nonzero_idx = (np.sum(y, 1) != 0)
    return ndcg_score(y[nonzero_idx], output[nonzero_idx], k=3), np.sum(nonzero_idx)

def ndcg_cpu(y, output):
    nonzero_idx = (np.sum(y, 1) != 0)
    return ndcg_score(y[nonzero_idx], output[nonzero_idx], k=3), np.sum(nonzero_idx)

In [16]:
def weighted_XE_NDCG_loss(output, y):
    if len(output.shape) == 2:
        batch, dim = output.shape
        t = 1
    else:
        batch, t, dim = output.shape
#     weight = torch.exp(torch.arange(1,t+1)/10).cuda()
#     weight = weight/torch.sum(weight)*t
#     weight = weight.tile((batch,))
    output = output.reshape(-1,dim)
    y = y.reshape(-1,dim)
#     weight = torch.ones(len(y)).cuda()
#     weight[torch.sum(y,1) == 0] = 0
    gamma = torch.rand(y.shape).cuda()
    y = (2**y-gamma)
    y = y/torch.sum(y, dim=1, keepdim=True)
    return torch.mean(-1*torch.sum(y*torch.log(output), dim=1))

In [17]:
def init():
    model = RNN(xx.shape[2], 320, 2, yy.shape[2])
    model.cuda()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.005)
#     optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)
#     optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)
    return model, optimizer, scheduler

torch.manual_seed(232)
random.seed(232)
np.random.seed(232)
MSE = torch.nn.MSELoss()
model, optimizer, scheduler = init()

In [18]:
def SoftmaxLoss(output, y, temp=1):
    if len(output.shape) == 2:
        batch, dim = output.shape
        t = 1
    else:
        batch, t, dim = output.shape
#     weight = torch.exp(torch.arange(1,t+1)/11.5).cuda()
#     weight = weight/torch.sum(weight)*t
#     weight = weight.tile((batch,))    
    output = output.reshape(-1,dim)/temp
    nonzero = torch.sum(y, 1) > 0
    y = y.reshape(-1,dim)
    y = y/(torch.sum(y, dim=1, keepdim=True) + 1e-20)
    return torch.mean(-1*torch.sum(y*torch.log(output + 1e-20), dim=1))

In [39]:
print(model)

RNN(
  (rnn): LSTM(166, 320, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=320, out_features=1280, bias=True)
  (out): Linear(in_features=1280, out_features=16, bias=True)
  (relu): ReLU()
  (softmax): Softmax(dim=2)
  (dropout): Dropout(p=0.4, inplace=False)
)


In [49]:
from torchinfo import summary

In [56]:
from torchinfo import summary

model = RNN(xx.shape[2], 320, 2, yy.shape[2])
batch_size = 1
summary(model, input_size=(1,24,166), col_names=["kernel_size", "output_size", "num_params"])

Layer (type:depth-idx)                   Kernel Shape              Output Shape              Param #
RNN                                      --                        --                        --
├─LSTM: 1-1                              --                        [1, 24, 320]              1,446,400
├─Linear: 1-2                            [320, 1280]               [24, 1280]                410,880
├─ReLU: 1-3                              --                        [24, 1280]                --
├─Dropout: 1-4                           --                        [24, 1280]                --
├─Linear: 1-5                            [1280, 16]                [24, 16]                  20,496
├─Softmax: 1-6                           --                        [1, 24, 16]               --
Total params: 1,877,776
Trainable params: 1,877,776
Non-trainable params: 0
Total mult-adds (M): 45.07
Input size (MB): 0.02
Forward/backward pass size (MB): 0.31
Params size (MB): 7.51
Estimated Total Size (MB)

In [50]:
summary(model, input_size=(1,24,166))

RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: []

In [43]:
xx.shape

torch.Size([500, 24, 166])

In [38]:
model.eval()
oo = model(xx.cuda())

In [20]:
weighted_XE_NDCG_loss(oo[:,:22], zz[:,:22].cuda())

tensor(2.7779, device='cuda:0', grad_fn=<MeanBackward0>)

In [21]:
ndcg_s(yy[:,22], oo[:,22])

(0.06260284707678863, 400)

# Use first 100000 data for method evaluation

In [22]:
from sklearn.model_selection import KFold
def train_kfold(dataset, test=0, n_splits=2, fn='rnn.pt'):
    max_epoch = 20
    patience = 3
    pred_sum = np.zeros((len(dataset), 16))
    pred_sum2 = np.zeros((len(dataset), 16))
    ndcg_sum = 0
    ndcg_list = []
    cnt_sum = 0
    kf = KFold(n_splits=n_splits)
    for kfidx, (train_index, valid_index) in enumerate(kf.split(np.arange(10000000, 10000000+len(dataset)))):
        train_subset = Subset(dataset, train_index)
        valid_subset = Subset(dataset, valid_index)
        train_loader = DataLoader(dataset=train_subset, batch_size=750, shuffle=True, num_workers=20)
        valid_loader = DataLoader(dataset=valid_subset, batch_size=750, shuffle=True, num_workers=20)
        valid_loader_ns = DataLoader(dataset=valid_subset, batch_size=750, shuffle=False, num_workers=20)
        test_loader = DataLoader(dataset=dataset, batch_size=750, shuffle=False, num_workers=20)
        best_ndcg = 0
        worse = 0
        model, optimizer, scheduler = init()
        for _epoch in range(max_epoch):
            train_ndcg = 0
            valid_ndcg = 0
            test_ndcg = 0
            train_cnt = 0
            valid_cnt = 0
            test_cnt = 0
            train_loss = 0
            model.train()
            for x, y_value, y, _ in tqdm(train_loader):
                x = x.cuda()
                y_value = y_value.cuda()
                y = y.cuda()
                output = model(x)
                loss = weighted_XE_NDCG_loss(output[:,:22], y[:,:22])
#                 loss = Amt_SoftmaxLoss(output[:,:22], y_value[:,:22], 4)
#                 loss = ApproxNDCGLoss(output[:,:22], y[:,:22])
#                 loss = GumbelApproxNDCGLoss(output[:,:22], y[:,:22])
#                 loss = SoftmaxLoss(output[:,:22], y[:,:22])
#                 loss = MSE(output[:,:22], y[:,:22])
#                 loss = UniqueSoftmaxLoss(output[:,:22], y[:,:22])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
            for x, y_value, y, _ in tqdm(valid_loader):
                x = x.cuda()
                y_value = y_value.cuda()
                y = y.cuda()
                output = model(x)
                loss = weighted_XE_NDCG_loss(output[:,:21], y[:,:21])
#                 loss = Amt_SoftmaxLoss(output[:,:21], y_value[:,:21], 4)
#                 loss = ApproxNDCGLoss(output[:,:21], y[:,:21])
#                 loss = GumbelApproxNDCGLoss(output[:,:21], y[:,:21])
#                 loss = SoftmaxLoss(output[:,:21], y[:,:21])
#                 loss = MSE(output[:,:21], y[:,:21])
#                 loss = UniqueSoftmaxLoss(output[:,:21], y[:,:21])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                s, n = ndcg_s(y_value[:,21], output[:,21])
                train_ndcg += s*n
                train_cnt += n
            model.eval()
            for x, y_value, y, _ in tqdm(valid_loader_ns):
                x = x.cuda()
                y_value = y_value.cuda()
                y = y.cuda()
                output = model(x)
                s, n = ndcg_s(y_value[:,21], output[:,21])
                valid_ndcg += s*n
                valid_cnt += n
            if test:
                for x, y_value, y, _ in tqdm(test_loader):
                    x = x.cuda()
                    y_value = y_value.cuda()
                    y = y.cuda()
                    output = model(x)
                    s, n = ndcg_s(y_value[:,22], output[:,22])
                    test_ndcg += s*n
                    test_cnt += n

            if best_ndcg < valid_ndcg:
                best_ndcg = valid_ndcg
                worse = 0
                torch.save(model.state_dict(), 'Model/'+fn)
            else:
                if _epoch >= 5:
                    worse += 1
            if test:
                print(f'KF: {kfidx}, Epoch: {_epoch:}, Train Loss: {train_loss/(len(train_loader)+len(valid_loader)):.4f}, Train NDCG: {train_ndcg/train_cnt:.4f}, Valid NDCG: {valid_ndcg/valid_cnt:.4f}, Test NDCG: {test_ndcg/test_cnt:.4f}')                
            else:
                print(f'KF: {kfidx}, Epoch: {_epoch:}, Train Loss: {train_loss/(len(train_loader)+len(valid_loader)):.4f}, Train NDCG: {train_ndcg/train_cnt:.4f}, Valid NDCG: {valid_ndcg/valid_cnt:.4f}')
            scheduler.step()
            if worse >= patience: 
                break

        model.load_state_dict(torch.load('Model/'+fn))
        model.eval()
        pred = []
        gt = []
        for x, y_value, _, _ in tqdm(test_loader):
            x = x.cuda()
            y_value = y_value.cuda()
            output = model(x)
            pred.append(output[:,22].detach().cpu().numpy())
            gt.append(y_value[:,22].cpu().numpy())
        pred = np.concatenate(pred, axis=0)
        gt = np.concatenate(gt, axis=0)
        s, n = ndcg_cpu(gt, pred)
        print(f'Test NDCG: {s:.6f}')
        ndcg_list.append(s)
        pred_sum += pred
        pred_sum2 += softmax(pred, axis=1)
    pred_sum = pred_sum/n_splits
    s, n = ndcg_cpu(gt, pred_sum)
    print(ndcg_list)
    print(f'Ensemble Test NDCG: {s:.6f}')
    pred_sum2 = pred_sum2/n_splits
    s, n = ndcg_cpu(gt, pred_sum2)
    print(f'Ensemble Test NDCG: {s:.6f}')
    
def train_kfold_sep(dataset, test=0, n_splits=2, fn='rnn.pt', windows=7):
    max_epoch = 20
    patience = 3
    pred_sum = np.zeros((len(dataset), 16))
    pred_sum2 = np.zeros((len(dataset), 16))
    ndcg_sum = 0
    ndcg_list = []
    cnt_sum = 0
    kf = KFold(n_splits=n_splits)
    for kfidx, (train_index, valid_index) in enumerate(kf.split(np.arange(10000000, 10000000+len(dataset)))):
        train_subset = Subset(dataset, train_index)
        valid_subset = Subset(dataset, valid_index)
        train_loader = DataLoader(dataset=train_subset, batch_size=750, shuffle=True, num_workers=20)
        valid_loader = DataLoader(dataset=valid_subset, batch_size=750, shuffle=True, num_workers=20)
        valid_loader_ns = DataLoader(dataset=valid_subset, batch_size=750, shuffle=False, num_workers=20)
        test_loader = DataLoader(dataset=dataset, batch_size=750, shuffle=False, num_workers=20)
        best_ndcg = 0
        worse = 0
        model, optimizer, scheduler = init()
        for _epoch in range(max_epoch):
            train_ndcg = 0
            valid_ndcg = 0
            test_ndcg = 0
            train_cnt = 0
            valid_cnt = 0
            test_cnt = 0
            train_loss = 0
            model.train()
            for x, y_value, y, _ in tqdm(train_loader):
                x = x.cuda()
                y_value = y_value.cuda()
                y = y.cuda()
                loss = 0
                for w in range(windows, 23):
                    output = model(x[:,w-windows:w])
                    loss += weighted_XE_NDCG_loss(output[:,-1], y[:,w-1])
#                     loss += MSE(output[:,-1], y[:,w-1])
#                 loss = weighted_XE_NDCG_loss(output[:,:22], y[:,:22])
#                 loss = Amt_SoftmaxLoss(output[:,:22], y_value[:,:22], 4)
#                 loss = ApproxNDCGLoss(output[:,:22], y[:,:22])
#                 loss = GumbelApproxNDCGLoss(output[:,:22], y[:,:22])
#                 loss = SoftmaxLoss(output[:,:22], y[:,:22])
#                 loss = MSE(output[:,:22], y[:,:22])
#                 loss = UniqueSoftmaxLoss(output[:,:22], y[:,:22])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
            for x, y_value, y, _ in tqdm(valid_loader):
                x = x.cuda()
                y_value = y_value.cuda()
                y = y.cuda()
                output = model(x)
                loss = 0
                for w in range(windows, 22):
                    output = model(x[:,w-windows:w])
                    loss += weighted_XE_NDCG_loss(output[:,-1], y[:,w-1])
#                     loss += MSE(output[:,-1], y[:,w-1])
#                 loss = weighted_XE_NDCG_loss(output[:,:21], y[:,:21])
#                 loss = Amt_SoftmaxLoss(output[:,:21], y_value[:,:21], 4)
#                 loss = ApproxNDCGLoss(output[:,:21], y[:,:21])
#                 loss = GumbelApproxNDCGLoss(output[:,:21], y[:,:21])
#                 loss = SoftmaxLoss(output[:,:21], y[:,:21])
#                 loss = MSE(output[:,:21], y[:,:21])
#                 loss = UniqueSoftmaxLoss(output[:,:21], y[:,:21])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                output = model(x[:,22-windows:22])
                s, n = ndcg_s(y_value[:,21], output[:,-1])
                train_ndcg += s*n
                train_cnt += n
            model.eval()
            for x, y_value, y, _ in tqdm(valid_loader_ns):
                x = x.cuda()
                y_value = y_value.cuda()
                y = y.cuda()
                output = model(x[:,22-windows:22])
                s, n = ndcg_s(y_value[:,21], output[:,-1])
#                 output = model(x)
#                 s, n = ndcg_s(y_value[:,21], output[:,21])
                valid_ndcg += s*n
                valid_cnt += n
            if test:
                for x, y_value, y, _ in tqdm(test_loader):
                    x = x.cuda()
                    y_value = y_value.cuda()
                    y = y.cuda()
                    output = model(x)
                    s, n = ndcg_s(y_value[:,22], output[:,22])
                    test_ndcg += s*n
                    test_cnt += n

            if best_ndcg < valid_ndcg:
                best_ndcg = valid_ndcg
                worse = 0
                torch.save(model.state_dict(), 'Model/'+fn)
            else:
                if _epoch >= 5:
                    worse += 1
            if test:
                print(f'KF: {kfidx}, Epoch: {_epoch:}, Train Loss: {train_loss/(len(train_loader)+len(valid_loader)):.4f}, Train NDCG: {train_ndcg/train_cnt:.4f}, Valid NDCG: {valid_ndcg/valid_cnt:.4f}, Test NDCG: {test_ndcg/test_cnt:.4f}')                
            else:
                print(f'KF: {kfidx}, Epoch: {_epoch:}, Train Loss: {train_loss/(len(train_loader)+len(valid_loader)):.4f}, Train NDCG: {train_ndcg/train_cnt:.4f}, Valid NDCG: {valid_ndcg/valid_cnt:.4f}')
            scheduler.step()
            if worse >= patience: 
                break

        model.load_state_dict(torch.load('Model/'+fn))
        model.eval()
        pred = []
        gt = []
        for x, y_value, _, _ in tqdm(test_loader):
            x = x.cuda()
            y_value = y_value.cuda()
            output = model(x[:,23-windows:23])
#             output = model(x)
            pred.append(output[:,-1].detach().cpu().numpy())
            gt.append(y_value[:,22].cpu().numpy())
        pred = np.concatenate(pred, axis=0)
        gt = np.concatenate(gt, axis=0)
        s, n = ndcg_cpu(gt, pred)
        print(f'Test NDCG: {s:.6f}')
        ndcg_list.append(s)
        pred_sum += pred
        pred_sum2 += softmax(pred, axis=1)
    pred_sum = pred_sum/n_splits
    s, n = ndcg_cpu(gt, pred_sum)
    print(ndcg_list)
    print(f'Ensemble Test NDCG: {s:.6f}')
    pred_sum2 = pred_sum2/n_splits
    s, n = ndcg_cpu(gt, pred_sum2)
    print(f'Ensemble Test NDCG: {s:.6f}')

In [23]:
train_kfold(small_dataset, test=0, n_splits=3)

100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.11it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.35it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.30it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 0, Train Loss: 2.2186, Train NDCG: 0.7261, Valid NDCG: 0.7295


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.53it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.62it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.97it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 1, Train Loss: 2.1754, Train NDCG: 0.7279, Valid NDCG: 0.7301


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.82it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.51it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.15it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 2, Train Loss: 2.1705, Train NDCG: 0.7300, Valid NDCG: 0.7326


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00,  9.92it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.07it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.05it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 3, Train Loss: 2.1676, Train NDCG: 0.7307, Valid NDCG: 0.7305


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.44it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.23it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.60it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 4, Train Loss: 2.1656, Train NDCG: 0.7317, Valid NDCG: 0.7317


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.65it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.05it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.56it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 5, Train Loss: 2.1637, Train NDCG: 0.7328, Valid NDCG: 0.7344


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.00it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.37it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.84it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 6, Train Loss: 2.1625, Train NDCG: 0.7321, Valid NDCG: 0.7354


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00,  9.93it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.19it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.76it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 7, Train Loss: 2.1609, Train NDCG: 0.7319, Valid NDCG: 0.7335


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00,  9.98it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.27it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.69it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 0, Epoch: 8, Train Loss: 2.1594, Train NDCG: 0.7323, Valid NDCG: 0.7349


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.06it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.48it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.49it/s]
  0%|                                                                 | 0/134 [00:00<?, ?it/s]

KF: 0, Epoch: 9, Train Loss: 2.1578, Train NDCG: 0.7309, Valid NDCG: 0.7345


100%|███████████████████████████████████████████████████████| 134/134 [00:11<00:00, 12.13it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

Test NDCG: 0.708027


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.29it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.48it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.73it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 0, Train Loss: 2.2702, Train NDCG: 0.7223, Valid NDCG: 0.7284


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.56it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.36it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.74it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 1, Train Loss: 2.1780, Train NDCG: 0.7273, Valid NDCG: 0.7317


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.83it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.63it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.72it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 2, Train Loss: 2.1726, Train NDCG: 0.7287, Valid NDCG: 0.7314


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.13it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.51it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.61it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 3, Train Loss: 2.1696, Train NDCG: 0.7297, Valid NDCG: 0.7325


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.74it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.42it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.79it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 4, Train Loss: 2.1673, Train NDCG: 0.7300, Valid NDCG: 0.7324


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.65it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.29it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.56it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 5, Train Loss: 2.1655, Train NDCG: 0.7306, Valid NDCG: 0.7330


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.87it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.31it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.74it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 6, Train Loss: 2.1644, Train NDCG: 0.7312, Valid NDCG: 0.7316


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00,  9.94it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.85it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.53it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 7, Train Loss: 2.1632, Train NDCG: 0.7315, Valid NDCG: 0.7339


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.59it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.12it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.96it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 8, Train Loss: 2.1618, Train NDCG: 0.7308, Valid NDCG: 0.7343


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00,  9.93it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.19it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.76it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 9, Train Loss: 2.1609, Train NDCG: 0.7307, Valid NDCG: 0.7339


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.05it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.15it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.67it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 1, Epoch: 10, Train Loss: 2.1600, Train NDCG: 0.7313, Valid NDCG: 0.7338


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.66it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.48it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.18it/s]
  0%|                                                                 | 0/134 [00:00<?, ?it/s]

KF: 1, Epoch: 11, Train Loss: 2.1589, Train NDCG: 0.7310, Valid NDCG: 0.7337


100%|███████████████████████████████████████████████████████| 134/134 [00:11<00:00, 11.85it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

Test NDCG: 0.710269


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.33it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.95it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.77it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 0, Train Loss: 2.2588, Train NDCG: 0.7213, Valid NDCG: 0.7272


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.02it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.28it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.82it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 1, Train Loss: 2.1780, Train NDCG: 0.7262, Valid NDCG: 0.7262


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.12it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.60it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.38it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 2, Train Loss: 2.1720, Train NDCG: 0.7284, Valid NDCG: 0.7311


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.22it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.21it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.70it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 3, Train Loss: 2.1689, Train NDCG: 0.7292, Valid NDCG: 0.7325


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.16it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.61it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  8.06it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 4, Train Loss: 2.1668, Train NDCG: 0.7296, Valid NDCG: 0.7315


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.31it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.67it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.70it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 5, Train Loss: 2.1653, Train NDCG: 0.7303, Valid NDCG: 0.7325


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00,  9.96it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.69it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.61it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 6, Train Loss: 2.1642, Train NDCG: 0.7303, Valid NDCG: 0.7317


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.29it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.28it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.79it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 7, Train Loss: 2.1632, Train NDCG: 0.7308, Valid NDCG: 0.7340


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.60it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:07<00:00,  6.23it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.84it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 8, Train Loss: 2.1619, Train NDCG: 0.7310, Valid NDCG: 0.7334


100%|█████████████████████████████████████████████████████████| 89/89 [00:09<00:00,  9.88it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.74it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.52it/s]
  0%|                                                                  | 0/89 [00:00<?, ?it/s]

KF: 2, Epoch: 9, Train Loss: 2.1613, Train NDCG: 0.7300, Valid NDCG: 0.7338


100%|█████████████████████████████████████████████████████████| 89/89 [00:08<00:00, 10.12it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  6.61it/s]
100%|█████████████████████████████████████████████████████████| 45/45 [00:05<00:00,  7.77it/s]
  0%|                                                                 | 0/134 [00:00<?, ?it/s]

KF: 2, Epoch: 10, Train Loss: 2.1603, Train NDCG: 0.7312, Valid NDCG: 0.7326


100%|███████████████████████████████████████████████████████| 134/134 [00:11<00:00, 11.90it/s]


Test NDCG: 0.711107
[0.7080271526054656, 0.7102689241672943, 0.7111065262511872]
Ensemble Test NDCG: 0.711800
Ensemble Test NDCG: 0.711800


In [24]:
# train_kfold_sep(small_dataset, test=0, n_splits=2, windows=15)

In [25]:
from sklearn.model_selection import KFold
n_splits = 10
max_epoch = 20
patience = 3
pred_sum = np.zeros((500000, 16))
pred_sum_weighted = np.zeros((500000, 16))
ndcg_sum = 0
ndcg_list = []
cnt_sum = 0
kf = KFold(n_splits=n_splits, shuffle=True)
for kfidx, (train_index, valid_index) in enumerate(kf.split(np.arange(10000000, 10500000))):
    train_subset = Subset(dataset, train_index)
    valid_subset = Subset(dataset, valid_index)
    train_loader = DataLoader(dataset=train_subset, batch_size=750, shuffle=True, num_workers=30)
    valid_loader = DataLoader(dataset=valid_subset, batch_size=750, shuffle=True, num_workers=30)
    valid_loader2 = DataLoader(dataset=valid_subset, batch_size=750, shuffle=False, num_workers=30)
    best_ndcg = 0
    worse = 0
    model, optimizer, scheduler = init()
    
    for _epoch in range(max_epoch):
        train_ndcg = 0
        valid_ndcg = 0
        train_cnt = 0
        valid_cnt = 0
        train_loss = 0
        model.train()
        for x, y_value, y, z in tqdm(train_loader):
            x = x.cuda()
            y_value = y_value.cuda()
            y = y.cuda()
            z = z.cuda()
            output = model(x)
            loss = weighted_XE_NDCG_loss(output[:,:23], y[:,:23])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
        for x, y_value, y, z in tqdm(valid_loader):
            x = x.cuda()
            y_value = y_value.cuda()
            y = y.cuda()
            z = z.cuda()
            output = model(x)
            loss = weighted_XE_NDCG_loss(output[:,:22], y[:,:22])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            s, n = ndcg_s(y_value[:,22], output[:,22])
            train_ndcg += s*n
            train_cnt += n
            
        model.eval()
        for x, y_value, y, _ in tqdm(valid_loader2):
            x = x.cuda()
            y_value = y_value.cuda()
            y = y.cuda()
            output = model(x)
            s, n = ndcg_s(y_value[:,22], output[:,22])
            valid_ndcg += s*n
            valid_cnt += n
            
        if best_ndcg < valid_ndcg:
            best_ndcg = valid_ndcg
            worse = 0
            torch.save(model.state_dict(), 'Model/best.pt')
        else:
            if _epoch >= 5:
                worse += 1
        print(f'KF: {kfidx}, Epoch: {_epoch:}, Train Loss: {train_loss/(len(train_loader)+len(valid_loader)):.4f}, Train NDCG: {train_ndcg/train_cnt:.4f}, Valid NDCG: {valid_ndcg/valid_cnt:.4f}')
        scheduler.step()
        if worse >= patience: 
            break

    model.load_state_dict(torch.load('Model/best.pt'))
    model.eval()
    ndcg_sum += best_ndcg
    cnt_sum += valid_cnt
    ndcg_list.append(best_ndcg/valid_cnt)
    pred = []
    for x, _, y, _ in tqdm(test_loader):
        x = x.cuda()
        y = y.cuda()
        output = model(x)
        pred.append(output[:,23].detach().cpu().numpy())
    pred = np.concatenate(pred, axis=0)
    pred_sum_weighted += pred*(best_ndcg/valid_cnt)
    pred_sum += pred
pred_sum_weighted = pred_sum_weighted/sum(ndcg_list)
pred_sum = pred_sum/n_splits
print(f'Valid NDCG: {ndcg_sum/cnt_sum:.6f}')

100%|███████████████████████████████████████████████████████| 600/600 [00:32<00:00, 18.20it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.53it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  7.57it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 0, Train Loss: 2.1858, Train NDCG: 0.7116, Valid NDCG: 0.7138


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 17.90it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.43it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:07<00:00,  8.44it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 1, Train Loss: 2.1582, Train NDCG: 0.7134, Valid NDCG: 0.7161


100%|███████████████████████████████████████████████████████| 600/600 [00:34<00:00, 17.57it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.25it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:07<00:00,  8.43it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 2, Train Loss: 2.1556, Train NDCG: 0.7151, Valid NDCG: 0.7161


100%|███████████████████████████████████████████████████████| 600/600 [00:32<00:00, 18.23it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.63it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.37it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 3, Train Loss: 2.1542, Train NDCG: 0.7157, Valid NDCG: 0.7189


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.03it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.40it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.02it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 4, Train Loss: 2.1531, Train NDCG: 0.7166, Valid NDCG: 0.7175


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.04it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.88it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.35it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 5, Train Loss: 2.1522, Train NDCG: 0.7159, Valid NDCG: 0.7150


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.10it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  7.06it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:07<00:00,  8.46it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 0, Epoch: 6, Train Loss: 2.1512, Train NDCG: 0.7159, Valid NDCG: 0.7174


100%|███████████████████████████████████████████████████████| 600/600 [00:32<00:00, 18.33it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  7.04it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.36it/s]
  0%|                                                                | 0/1000 [00:00<?, ?it/s]

KF: 0, Epoch: 7, Train Loss: 2.1502, Train NDCG: 0.7176, Valid NDCG: 0.7156


100%|█████████████████████████████████████████████████████| 1000/1000 [01:12<00:00, 13.81it/s]
100%|███████████████████████████████████████████████████████| 600/600 [00:32<00:00, 18.24it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.95it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.19it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 0, Train Loss: 2.1736, Train NDCG: 0.7106, Valid NDCG: 0.7154


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.03it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.72it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.22it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 1, Train Loss: 2.1577, Train NDCG: 0.7133, Valid NDCG: 0.7136


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.06it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.79it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  7.71it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 2, Train Loss: 2.1552, Train NDCG: 0.7135, Valid NDCG: 0.7138


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.04it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.91it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  7.80it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 3, Train Loss: 2.1537, Train NDCG: 0.7148, Valid NDCG: 0.7148


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.04it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.83it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  7.51it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 4, Train Loss: 2.1525, Train NDCG: 0.7145, Valid NDCG: 0.7185


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.07it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.70it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.24it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 5, Train Loss: 2.1516, Train NDCG: 0.7161, Valid NDCG: 0.7151


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.11it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.42it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  7.81it/s]
  0%|                                                                 | 0/600 [00:00<?, ?it/s]

KF: 1, Epoch: 6, Train Loss: 2.1505, Train NDCG: 0.7160, Valid NDCG: 0.7162


100%|███████████████████████████████████████████████████████| 600/600 [00:33<00:00, 18.16it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:09<00:00,  6.95it/s]
100%|█████████████████████████████████████████████████████████| 67/67 [00:08<00:00,  8.16it/s]
  0%|                                                                | 0/1000 [00:00<?, ?it/s]

KF: 1, Epoch: 7, Train Loss: 2.1496, Train NDCG: 0.7166, Valid NDCG: 0.7178


  0%|                                                                | 0/1000 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [26]:
ndcg_list

[0.7189372402328684, 0.7185490244900301]

In [27]:
print(f'Valid NDCG: {ndcg_sum/cnt_sum:.6f}')

Valid NDCG: 0.718743


In [None]:
def write_results(top_3_tags_all, fn):
    with open(fn, mode='w', newline='') as submit_file: # mode w:write 
        csv_writer = csv.writer(submit_file)
        header = ['chid', 'top1', 'top2', 'top3']
        csv_writer.writerow(header)
        for ID in range(len(top_3_tags_all)):
            row = [10000000+ID, str(top_3_tags_all[ID][0]), str(top_3_tags_all[ID][1]), str(top_3_tags_all[ID][2])]
            csv_writer.writerow(row)

In [None]:
top_3_ind = pred_sum.argsort(axis=1)[:,-3:][:,::-1]
top_3_tags = predictable_classes[top_3_ind]
write_results(top_3_tags, 'Result/720565.csv')

In [None]:
np.save('Result/720565.npy', pred_sum)