In [None]:
"""Colab Drive Connection"""

from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
!rm /etc/localtime
!ln -s /usr/share/zoneinfo/Asia/Seoul /etc/localtime
!date

Mon Feb 22 21:00:52 KST 2021


In [None]:
import warnings
import os 

from collections import defaultdict
from copy import deepcopy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

import networkx as nx
import scipy.cluster.hierarchy as sch
from scipy.cluster.hierarchy import fcluster

import random

from tqdm import tqdm_notebook

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F

# install datatable
!pip install datatable
import datatable as dt

from numba import njit

import gc

warnings.simplefilter(action="ignore")

# project_home = "/kaggle/input/jane-street-market-prediction"
# model_home = "/kaggle/working"
# data_home = project_home

project_home = "/gdrive/MyDrive/colab/jane-street-market-prediction"
data_home = os.path.join(project_home, "input/data")
model_home = os.path.join(project_home, "output/model")
gs_home = os.path.join(project_home, 'output/grid_search')

Collecting datatable
[?25l  Downloading https://files.pythonhosted.org/packages/80/cb/21810c43b687a19d194c372192049f535fba28c55ce76d37e7e407159c52/datatable-0.11.1-cp36-cp36m-manylinux2010_x86_64.whl (83.7MB)
[K     |████████████████████████████████| 83.7MB 64kB/s 
[?25hInstalling collected packages: datatable
Successfully installed datatable-0.11.1


In [None]:
entire_seed = 1029

def seed_torch(seed=1029):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
#     torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.deterministic = False
    
seed_torch(entire_seed)

In [None]:
train_file = os.path.join(data_home,'train.csv')
features_file = os.path.join(data_home,'features.csv')
example_test_file = os.path.join(data_home,'example_test.csv')
example_sample_submission_file = os.path.join(data_home,'example_sample_submission.csv')

train_data_datatable = dt.fread(train_file)

df_train = train_data_datatable.to_pandas()
df_features = pd.read_csv(features_file)
df_example_test = pd.read_csv(example_test_file)
df_example_sample_submission = pd.read_csv(example_sample_submission_file)

In [None]:
features = [ col for col in df_train.columns if "feature" in col ]
resps = [ col for col in df_train.columns if "resp" in col ]
target_resp = [resp_ for resp_ in resps if "_" not in resp_]
target = ["weight"] + target_resp + features 

In [None]:
"""
Reduce Memory Usage by 75%
https://www.kaggle.com/tomwarrens/nan-values-depending-on-time-of-day
"""

## Reduce Memory

def reduce_memory_usage(df):
    
    start_memory = df.memory_usage().sum() / 1024**2
    print(f"Memory usage of dataframe is {start_memory} MB")
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != 'object':
            c_min = df[col].min()
            c_max = df[col].max()
            
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            
            else:
#                 reducing float16 for calculating numpy.nanmean
#                 if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
#                     df[col] = df[col].astype(np.float16)
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    pass
        else:
            df[col] = df[col].astype('category')
    
    end_memory = df.memory_usage().sum() / 1024**2
    print(f"Memory usage of dataframe after reduction {end_memory} MB")
    print(f"Reduced by {100 * (start_memory - end_memory) / start_memory} % ")
    return df

df_train = reduce_memory_usage(df_train)
df_train.info()

Memory usage of dataframe is 2489.4869804382324 MB
Memory usage of dataframe after reduction 1247.0233011245728 MB
Reduced by 49.908422461199 % 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2390491 entries, 0 to 2390490
Columns: 138 entries, date to ts_id
dtypes: float32(135), int16(1), int32(1), int8(1)
memory usage: 1.2 GB


In [None]:
# drop before 85days
df_train = df_train.loc[df_train.date>85]
# drop weight 0 for training
df_train = df_train.loc[df_train.weight > 0]

# df_labels = df_train[['date','weight','resp_1','resp_2','resp_3','resp_4','resp']]

# df_train = df_train.drop(df_labels.columns,axis=1)

In [None]:
"""
The codes from 'Optimise Speed of Filling-NaN Function'
https://www.kaggle.com/gogo827jz/optimise-speed-of-filling-nan-function
"""

def for_loop(method, matrix, values):
    for i in range(matrix.shape[0]):
        matrix[i] = method(matrix[i], values)
    return matrix

def for_loop_ffill(method, matrix):
    tmp = np.zeros(matrix.shape[1],dtype=np.float32)
    for i in range(matrix.shape[0]):
        matrix[i] = method(matrix[i], tmp)
        tmp = matrix[i]
    return matrix

@njit
def fillna_npwhere_njit(array, values):
    if np.isnan(array.sum()):
        array = np.where(np.isnan(array), values, array)
    return array

In [None]:
# converting numpy for efficient calcualtion.
# ft 1~129
np_ft_train = df_train.loc[:,features[1:]].values
np_ft_train.shape

# ft 0
# np_train_ft0 = df_train.loc[:,features[0]].values

(1571415, 129)

In [None]:
f_mean = np.nanmean(np_ft_train,axis=0)

In [None]:
np_train = df_train.values

In [None]:
print('fillna_npwhere_njit (mean-filling):')
np_train[:,8:-1] = for_loop(fillna_npwhere_njit, np_train[:,8:-1], f_mean)

fillna_npwhere_njit (mean-filling):


In [None]:
dict_features = {col:idx for idx, col in enumerate(df_train.columns.tolist())}

In [None]:
np_d_w = np_train[:,:2]
# ['resp_1', 'resp_2', 'resp_3', 'resp_4', 'resp']
idx_resps = list()
for resp in resps:
    idx_col = dict_features[resp]
    idx_resps.append(idx_col)
np_resps = np_train[:,idx_resps]

In [None]:
resps_prcntls = [50, 49, 49, 50, 50]
resps_prcntls = [np.percentile(np_resps[:,idx], prcntls) for idx, prcntls in enumerate(resps_prcntls)]
resps_prcntls

[2.3540282199974172e-05,
 -2.6968382262566605e-05,
 -6.920687970705338e-05,
 7.239638944156468e-05,
 4.7192643251037225e-05]

In [None]:
list_resps = list()
for idx, resps_prcntl in enumerate(resps_prcntls):
    result = list(map(lambda x: 1 if x > resps_prcntl else 0, np_resps[:,idx]))
    list_resps.append(result)
np_targets = np.stack(list_resps).T

In [None]:
idx_target = [("resp_" not in key) and ("ts_" not in key) for key in dict_features.keys()]
idx_target = np.arange(np_train.shape[1])[idx_target]
X_np_train = np_train[:,idx_target]

In [None]:
X = X_np_train
y = np_targets

In [None]:
from collections import Counter, defaultdict
from sklearn import model_selection

# ---- StratifiedGroupKFold ----
class StratifiedGroupKFold(object):
    """
    StratifiedGroupKFold with random shuffle with a sklearn-like structure
    """

    def __init__(self, n_splits=4, shuffle=True, random_state=42):
        self.n_splits = n_splits
        self.shuffle = shuffle
        self.random_state = random_state

    def get_n_splits(self, X=None, y=None, group=None):
        return self.n_splits

    def split(self, X, y, group):
        labels_num = np.max(y) + 1
        y_counts_per_group = defaultdict(lambda: np.zeros(labels_num))
        y_distr = Counter()
        # groups = X[group].values
        groups = group
        for label, g in zip(y, groups):
            y_counts_per_group[g][label] += 1
            y_distr[label] += 1

        y_counts_per_fold = defaultdict(lambda: np.zeros(labels_num))
        groups_per_fold = defaultdict(set)

        def eval_y_counts_per_fold(y_counts, fold):
            y_counts_per_fold[fold] += y_counts
            std_per_label = []
            for label in range(labels_num):
                label_std = np.std([y_counts_per_fold[i][label] / y_distr[label] for i in range(self.n_splits)])
                std_per_label.append(label_std)
            y_counts_per_fold[fold] -= y_counts
            return np.mean(std_per_label)
        
        groups_and_y_counts = list(y_counts_per_group.items())
        random.Random(self.random_state).shuffle(groups_and_y_counts)

        for g, y_counts in sorted(groups_and_y_counts, key=lambda x: -np.std(x[1])):
            best_fold = None
            min_eval = None
            for i in range(self.n_splits):
                fold_eval = eval_y_counts_per_fold(y_counts, i)
                if min_eval is None or fold_eval < min_eval:
                    min_eval = fold_eval
                    best_fold = i
            y_counts_per_fold[best_fold] += y_counts
            groups_per_fold[best_fold].add(g)

        all_groups = set(groups)
        for i in range(self.n_splits):
            train_groups = all_groups - groups_per_fold[i]
            test_groups = groups_per_fold[i]

            train_idx = [i for i, g in enumerate(groups) if g in train_groups]
            test_idx = [i for i, g in enumerate(groups) if g in test_groups]

            yield train_idx, test_idx

In [None]:
cv = StratifiedGroupKFold(n_splits=10, random_state=entire_seed)

In [None]:
cv_idxes = [ (train_idx, test_idx) for train_idx, test_idx in cv.split(X, y[:,-1], group=X[:,0])]
for idx, cv_idx in enumerate(cv_idxes):
    train_idx, test_idx = cv_idx
    train_dates = np.unique(X[train_idx, 0]) 
    test_dates = np.unique(X[test_idx, 0])
    print(f"fold {idx+1}"+"*"*30)
    print(train_dates)
    print(test_dates)

fold 1******************************
[ 86.  87.  89.  90.  91.  93.  95.  96.  97.  99. 100. 101. 102. 104.
 105. 106. 107. 108. 109. 110. 111. 112. 113. 114. 115. 116. 117. 118.
 119. 120. 121. 122. 123. 124. 125. 126. 127. 128. 129. 130. 131. 132.
 133. 135. 136. 137. 138. 139. 140. 141. 142. 143. 144. 145. 146. 147.
 148. 149. 150. 152. 153. 154. 155. 157. 158. 159. 160. 162. 163. 164.
 165. 166. 167. 168. 169. 170. 171. 172. 173. 174. 175. 176. 177. 178.
 179. 180. 181. 182. 183. 184. 186. 187. 188. 189. 190. 191. 192. 193.
 194. 195. 196. 197. 198. 199. 200. 201. 202. 203. 204. 205. 206. 207.
 208. 209. 210. 211. 212. 213. 214. 215. 216. 217. 218. 219. 220. 221.
 223. 224. 225. 226. 228. 229. 230. 231. 232. 233. 234. 235. 237. 239.
 241. 242. 243. 244. 245. 246. 247. 248. 249. 250. 251. 252. 253. 254.
 255. 256. 257. 258. 259. 260. 261. 262. 265. 266. 267. 268. 269. 270.
 271. 272. 273. 274. 275. 276. 277. 278. 279. 280. 281. 282. 283. 284.
 285. 286. 287. 288. 291. 292. 293. 295.

In [None]:
class JaneDataset(Dataset):
    def __init__(self, np_X, np_y):
        super(JaneDataset,self).__init__()
        self.X = np_X
        self.y = np_y
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, index):
        # date, weight, resp
        X_util = self.X[index, :3]
        X = torch.tensor(self.X[index, 3:],dtype=torch.float)
        y = torch.tensor(self.y[index],dtype=torch.float)
        return X_util, X, y

In [None]:
def utility_score(X_d_w,X_r,y):
    # X for date, weight, resp numpy.array
    # y for binary action by random threshold or prediction
    
    # date
#     date_min, date_max = np.min(X_d_w[:,0]), np.max(X_d_w[:,0])
    unq_dates = np.unique(X_d_w[:,0])
    period = len(unq_dates)
#     dates = np.arange(date_min, date_max+1)
    
    list_p = list()
    
    for date in unq_dates:
        idx_date = X_d_w[:,0] == date
        X_d = X_d_w[idx_date,0]
        y_d = y[idx_date]
        w_d = X_d_w[idx_date,1]
        r_d = X_r[idx_date]
        
        p_d = w_d * r_d * y_d
        p = p_d.sum()
        
        list_p.append(p)
    
    np_p = np.array(list_p)
    

    t = np.sum(np_p) / np.sqrt(np.sum(np.power(np_p,2))) * np.sqrt(250/period)
    utility_score = min(max(t,0),6)*np_p.sum()
    return utility_score

In [None]:
class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):

        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score: #  + self.delta
            self.counter += 1
            # print('EarlyStopping counter: {} out of {}'.format(self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            # ema.apply_shadow()
            self.save_checkpoint(epoch_score, model, model_path)
            # ema.restore()
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(f"Validation score improved ({self.val_score:.4f} --> {epoch_score:.4f})")
            # if not DEBUG:
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [None]:
class EarlyStopping_GS:
    def __init__(self, patience=7, mode="max", delta=0.001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model):

        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model)
        elif score < self.best_score: #  + self.delta
            self.counter += 1
            # print('EarlyStopping counter: {} out of {}'.format(self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            # ema.apply_shadow()
            # print(f"Validation score improved ({self.val_score:.4f} --> {epoch_score:.4f}). Saving model!")
            self.save_checkpoint(epoch_score, model)
            # ema.restore()
            self.counter = 0

    def save_checkpoint(self, epoch_score, model):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(f"Validation score improved ({self.val_score:.4f} --> {epoch_score:.4f})")
            # if not DEBUG:
            # torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [None]:
epochs = 100
batch_size = 4096

device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [None]:
class ResnetLinear(nn.Module):
    def __init__(self, num_features, num_classes, df_features, device, verbose=False):
        super(ResnetLinear,self).__init__()

        self.hidden_layer = 256
        self.num_layers = 2
        self.decreasing = False
        
        self.f_act = nn.SiLU()
        self.dropout = 0.49627361377205387
        
        self.embed_dim = 0

        self.num_features = num_features
        self.num_classes = num_classes
        # self.hidden_layers = hidden_layers
        # self.dropout = dropout
        # self.embed_dim = 0
        self.hidden_layers = None
        self.emb_mode = None

        if verbose:
            print("ResnetLinear Trial")
            print(f"hidden_layer:{self.hidden_layer}; num_layers:{self.num_layers}; decreasing:{self.decreasing}; f_act:{self.f_act}; dropout:{self.dropout}; embed_dim:{self.embed_dim}")        

        if self.embed_dim == 0:
            self.emb_mode = False

        else:
            self.emb_mode = True

            # df_features tag num is 29(fixed value)
            self.n_feat_tags = 29
            # self.embed_dim = selfembed_dim
            self.device = device
            
            self.df_features = df_features.loc[:,df_features.columns[1:]]
            self.df_features["tag_29"] = np.array([1]+[0] * (self.df_features.shape[0]-1))
            self.df_features = self.df_features.astype("int8")
            self.features_tag_matrix = torch.tensor(self.df_features.values).to(self.device)
            
            self.n_feat_tags += 1
            self.tag_embedding = nn.Embedding(self.n_feat_tags+1, self.embed_dim)
            self.tag_weights = nn.Linear(self.n_feat_tags, 1)
            

        self.bn_d0 = nn.BatchNorm1d(self.num_features+ self.embed_dim)
                
        if self.decreasing:
          self.hidden_layers = [int(self.hidden_layer/2**(i)) for i in range(self.num_layers)]
        else:
          self.hidden_layers = [int(self.hidden_layer) for i in range(self.num_layers)]

        self.hidden_layers = [int(self.num_features + self.embed_dim)] + self.hidden_layers

        denses = list()
        
        for i in range(len(self.hidden_layers)-1):
            if i==0:
                denses.append(self.make_layers(self.hidden_layers[i], self.hidden_layers[i+1], self.dropout, self.f_act))
            else:
                denses.append(self.make_layers(self.hidden_layers[i-1]+self.hidden_layers[i], self.hidden_layers[i+1], self.dropout, self.f_act))

        self.denses = nn.Sequential(*denses)
        
        self.out_dense = nn.Linear(self.hidden_layers[-1] + self.hidden_layers[-2], self.num_classes)

    def make_layers(self, in_channels, out_channels, dropout=None, f_act=nn.ReLU()):
        layers = list()
        layers.append(nn.Linear(in_channels, out_channels))
        layers.append(nn.BatchNorm1d(out_channels))
        layers.append(f_act)
        
        if dropout:
            layers.append(nn.Dropout(dropout))
        
        module = nn.Sequential(*layers)
        
        return module
    
    # function to make embedding vector of Tag information per Features_0...129
    def features2emb(self):
        # one tag embedding to embed_dim dimension (1,embed_dim) per element
        all_tag_idxs = torch.LongTensor(np.arange(self.n_feat_tags)).to(self.device)
        tag_bools = self.features_tag_matrix
        f_emb = self.tag_embedding(all_tag_idxs).repeat(130,1,1)
        # f_emb에서 tag에 해당하는 값만 f_emb에 남김.
        f_emb = f_emb * tag_bools[:,:,None]
        
        # 각 feature 별로 먗개의 tag가 속하는가?
        s = torch.sum(tag_bools,dim=1)
        # 각 feature 별로 tag값에 해당하여 남겨진 embedding 값을 dimension 별로 합산(1,1,29) / 각 featrue별로 구해진 tag 개수와 division
        f_emb = torch.sum(f_emb, dim=-2) / s[:,None]
        
        return f_emb
        
    def forward(self, x):
        
        # if embedding
        if self.emb_mode:
            f_emb = self.features2emb()
            x = x.view(-1, self.num_features)
            x_emb = torch.matmul(x,f_emb)
            x = torch.hstack((x,x_emb))
        
        # num_features + embed_dim 
        x = self.bn_d0(x)
        
        x_prev = None
        x_now = None

        for idx, dense in enumerate(self.denses):
            if idx == 0:
                x_prev = x
                x_now = dense(x_prev)
                x = torch.cat([x_prev,x_now],1)
                x_prev = x_now
            else:
                x_now = dense(x)
                x = torch.cat([x_prev,x_now],1)
                x_prev = x_now

        x5 = self.out_dense(x)
        
        return x5

In [None]:
class FFN(nn.Module):
    def __init__(self, num_features, num_classes, hidden_layers, dropout, f_act, is_op_act =False):
        super(FFN,self).__init__()
        
        self.bn_d0 = nn.BatchNorm1d(num_features)

        self.hidden_layers = [num_features] + hidden_layers

        denses = list()
        for i in range(len(self.hidden_layers)-1):
            denses.append(self.make_layers(self.hidden_layers[i],self.hidden_layers[i+1],f_act,dropout))

        self.denses = nn.Sequential(*denses)

        self.out_dense = None
        
        if num_classes > 0:
            self.out_dense = nn.Linear(self.hidden_layers[-1], num_classes)
            
        self.out_activ = None
        
        if is_op_act:
            if num_classes == 1 or num_classes == 2:
                self.out_active = nn.Sigmoid()
            elif num_classes > 2:
                self.out_active = nn.Softmax(dim=-1)
    
    def make_layers(self, in_channels, out_channels, f_act, dropout):
        layers = list()
        layers.append(nn.Linear(in_channels, out_channels))
        layers.append(nn.BatchNorm1d(out_channels))
        layers.append(f_act)

        if dropout:
            layers.append(nn.Dropout(dropout))
            
        module = nn.Sequential(*layers)
        
        return module

    def forward(self, x):
        
        x = self.bn_d0(x)

        x = self.denses(x)

        if self.out_dense:
            x = self.out_dense(x)
        if self.out_activ:
            x = self.out_active(x)
            
        return x

class Emb_NN_Model(nn.Module):
    def __init__(self, num_features, num_tags, num_classes, df_features, device, verbose=False):
        super(Emb_NN_Model,self).__init__()

        self.num_features = num_features
        self.n_feat_tags = num_tags
        self.num_classes = num_classes

        # self.hidden_layers = hidden_layers
        # self.embed_dim = embed_dim
        self.hidden_layer = 256
        self.num_layers = 4
        self.decreasing = True

        if self.decreasing:
          self.hidden_layers = [int(self.hidden_layer/2**(i)) for i in range(self.num_layers)]
        else:
          self.hidden_layers = [int(self.hidden_layer) for i in range(self.num_layers)]

        self.f_act = nn.SiLU()
        self.dropout = 0.17971171427796284

        self.embed_dim = 5

        self.embed_mode = None

        if verbose:
            print("Embed-NN Trial")
            print(f"hidden_layer:{self.hidden_layer}; num_layers:{self.num_layers}; decraesing:{self.decreasing}; f_act:{self.f_act}; dropout:{self.dropout}; embed_dim:{self.embed_dim}")

        if self.embed_dim == 0:
            self.embed_mode = False
        if self.embed_dim > 0:
            self.embed_mode = True

            self.device = device
        
            self.df_features = df_features.loc[:,df_features.columns[1:]]
            self.df_features["tag_29"] = np.array([1]+[0] * (self.df_features.shape[0]-1))
            self.df_features = self.df_features.astype("int8")
            self.features_tag_matrix = torch.tensor(self.df_features.values).to(self.device)
        
            self.n_feat_tags += 1
            self.tag_embedding = nn.Embedding(self.n_feat_tags+1, self.embed_dim)
            self.tag_weights = nn.Linear(self.n_feat_tags, 1)
        
        self.ffn = FFN(num_features=(self.num_features+self.embed_dim), num_classes=0, hidden_layers=self.hidden_layers, f_act=self.f_act, dropout=self.dropout)
        self.dense = nn.Linear(self.hidden_layers[-1], self.num_classes)

    # function to make embedding vector of Tag information per Features_0...129
    def features2emb(self):
        # one tag embedding to embed_dim dimension (1,embed_dim) per element
        all_tag_idxs = torch.LongTensor(np.arange(self.n_feat_tags)).to(self.device)
        tag_bools = self.features_tag_matrix
        f_emb = self.tag_embedding(all_tag_idxs).repeat(130,1,1)
        # f_emb에서 tag에 해당하는 값만 f_emb에 남김.
        f_emb = f_emb * tag_bools[:,:,None]
        
        # 각 feature 별로 먗개의 tag가 속하는가?
        s = torch.sum(tag_bools,dim=1)
        # 각 feature 별로 tag값에 해당하여 남겨진 embedding 값을 dimension 별로 합산(1,1,29) / 각 featrue별로 구해진 tag 개수와 division
        f_emb = torch.sum(f_emb, dim=-2) / s[:,None]
        
        return f_emb
        
    def forward(self, x):
        if self.embed_mode:
            x = x.view(-1, self.num_features)
            # 130 X 5
            f_emb = self.features2emb()
            # N X 130 x 130 X 5 => N x 5 => 
            x_emb = torch.matmul(x, f_emb)    
            # N X 130 + N X 5 => 
            x = torch.hstack((x,x_emb))

        x = self.ffn(x)
        x = self.dense(x)
        
        return x

In [None]:
lr_emb = 0.000663767918321238
wd_emb = 2.6504094565959894e-07

lr_reslin = 2.9521544108896628e-05
wd_reslin = 5.679142529741758e-05

In [None]:
# NFOLDS = 1
# EARLYSTOP_NUM = 3
CACHE_PATH = model_home

dict_combine_loss = defaultdict(lambda: 0)
dict_combine_auc = defaultdict(lambda: 0)
dict_combine_util = defaultdict(lambda: 0)

train_idx, valid_idx = cv_idxes[0]
X_train = X[train_idx, :]
y_train = y[train_idx, :]

X_valid = X[valid_idx, :]
y_valid = y[valid_idx, :]

train_dataset = JaneDataset(X_train, y_train)
valid_dataset = JaneDataset(X_valid, y_valid)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

seed_torch(seed=entire_seed)
torch.cuda.empty_cache()

emb_model = Emb_NN_Model(130, 29, 5, df_features, device)
emb_model = emb_model.to(device)
emb_file_name = "Embed-nn.pth"
emb_file_path = os.path.join(CACHE_PATH, emb_file_name)
emb_model.load_state_dict(torch.load(emb_file_path))

reslin_model = ResnetLinear(130,5,df_features,device)
reslin_model = reslin_model.to(device)
reslin_file_name = "ResnetLinear.pth"
reslin_file_path = os.path.join(CACHE_PATH, reslin_file_name)
reslin_model.load_state_dict(torch.load(reslin_file_path))

criterion = nn.BCEWithLogitsLoss()

# es = EarlyStopping(EARLYSTOP_NUM, mode="max")

with torch.no_grad():
    emb_model.eval()
    reslin_model.eval()

    for idx, (X_utils, inputs, labels) in enumerate(tqdm_notebook(valid_dataloader)):

        X_d_w = X_utils[:,:-1].detach().cpu().numpy()
        X_r = X_utils[:,-1].detach().cpu().numpy()

        inputs = inputs.to(device)
        labels = labels.to(device)

        emb_outputs = emb_model(inputs)
        reslin_outputs = reslin_model(inputs)

        true = labels.detach().cpu().numpy()[:,-1]

        for i in range(1,10):
          emb_pct = i*0.1
          reslin_pct = 1-emb_pct
          
          outputs = (emb_pct * emb_outputs.detach()) + (reslin_pct * reslin_outputs.detach()) 

          combined_output = (emb_pct * emb_outputs.detach().sigmoid().cpu().numpy()[:,-1]) + (reslin_pct * reslin_outputs.detach().sigmoid().cpu().numpy()[:,-1])
          combined_target = np.array(list(map(lambda x: 1 if x > 0.5 else 0, combined_output)),dtype=np.float)
        
          # acc = (true == combined_target).sum() / outputs.shape[0]
          auc = roc_auc_score(true, outputs.cpu().numpy()[:,-1])
          util = utility_score(X_d_w,X_r,combined_target)

          dict_combine_auc[(emb_pct, reslin_pct)] += auc
          dict_combine_util[(emb_pct, reslin_pct)] += util

          loss = criterion(outputs, labels)
          dict_combine_loss[(emb_pct, reslin_pct)] += loss.detach().item() * inputs.size(0)

    # dict_combine_loss = running_loss / len(valid_dataloader.dataset)

    # dict_combine_auc = running_auc / len(valid_dataloader)


# print(f"EPOCH:{epoch+1}|{epochs}; loss(train/valid):{epoch_loss:.4f}/{valid_loss:.4f}; acc(train/valid):{epoch_acc:.4f}/{valid_acc:.4f}; auc(train/valid):{epoch_auc:.4f}/{valid_auc:.4f}; utility(train/valid):{epoch_util:.4f}/{valid_util:.4f}")

# model_weights = os.path.join(CACHE_PATH,f"tuned1-embed-nn_{_fold}.pth")
# es(valid_auc, model, model_path=model_weights)
# if es.early_stop:
#   print("Early stopping")
#   break