In [None]:
import pandas as pd
import numpy as np
import glob
import math
import pickle
import random
from collections import namedtuple


import torch
from torch import nn
from torch.distributions import Gamma
from torch.nn import functional as F
import torch.tensor as Tensor
import torch.nn.init as init
from torch import optim
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.optim.lr_scheduler import ExponentialLR

import networkx as nx
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy
from tqdm import tqdm_notebook
from multiprocessing import Pool
from collections import defaultdict

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from dateutil.relativedelta import relativedelta
from rangerlars import RangerLars


from xgboost.sklearn import XGBClassifier
import warnings
warnings.filterwarnings("ignore")
np.set_printoptions(suppress=True)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# 1 Transactions Loading
We firstly load wire transactions to extract usefull features.
Here we generate 6 features, listed as folllows.
+ Transact out counts in one month is large than 500,000
+ WireTrans Out counts in one month
+ Transact in total amount per month is large than 1,000,000
+ Total WireTrans Out in one month
+ Total WireTrans Out times in one month
+ Average WireTrans Out in one month
At last, we store the edges in the form of `pandas` per month.

In [None]:
CustInfo = pd.read_csv('../data/customerinformation.csv')
CustInfo['open_date'] = pd.to_datetime(CustInfo.Open_Date)

WireTrans = pd.read_csv('../data/new_wire.csv', index_col=0)
WireTrans['trans_date'] = pd.to_datetime(WireTrans.trandt)

In [None]:
# delete Non-IBW Customer_Segment_Code customerno
new_wire = WireTrans[WireTrans.customerno.isin(CustInfo[CustInfo.Customer_Segment_Code != 'IBW']['customerno'].values)]

# According Trf_Direction to build new feature  
new_wire['bnf_cust'] = new_wire.apply(lambda x: x['customerno'] if x['Trf_Direction'] == 'I' else np.nan ,axis=1)

new_wire['org_cust'] = new_wire.apply(lambda x: x['customerno'] if x['Trf_Direction'] == 'O' else np.nan ,axis=1)

In [None]:
class DateYM:
    def __init__(self, year, month):
        self.year = year
        self.month = month - 1    # 0 ~ 11, from Jan to Dec
        
    def export_tuple(self):
        return (self.year, self.month+1)
    
    def add_year(self, y):
        self.year += y
        
    def substract_year(self, y):
        self.year -= y
        
    def add_month(self, m):
        self.month += m
        self.year += math.floor(self.month / 12)
        self.month = self.month % 12
        
    def subtract_month(self, m):
        self.month -= m
        tmp_year = math.floor(self.month / 12)
        self.year += tmp_year
        self.month += -tmp_year * 12
        
    def is_larger_than(self, ym):
        return self.year*12 + self.month > ym.year*12 + ym.month
    
    def is_smaller_than(slef, ym):
        return self.year*12 + self.month < ym.year*12 + ym.month
    
    def is_equal(self, ym):
        return self.year*12 + self.month == ym.year*12 + ym.month

    
def list_date_tuples(from_date, to_date):
    ret = []
    tmp_date = DateYM(*from_date.export_tuple())
    while not tmp_date.is_larger_than(to_date):
        ret.append(tmp_date.export_tuple())
        tmp_date.add_month(1)
    return ret


def fetch_data_by_month(date_ym, trans_offset=6):
    year, month = date_ym
    
    from_date = pd.to_datetime("{}/{}/{}".format(month, 1, year))
    to_date = from_date + pd.DateOffset(months=1)
    offset_date = from_date - pd.DateOffset(months=trans_offset)
    
    # Get view: WireTrans
    view_wiretrans = WireTrans[(WireTrans.trans_date > offset_date) & 
                               (WireTrans.trans_date < to_date)] 

    # Get view: CustInfo
    view_customer = CustInfo[CustInfo.open_date < to_date]
    
    # Attach label onto CustInfo
    target_list = SARCase[(SARCase.Status_SAR == 4) & 
                          (SARCase.created_date > from_date) & 
                          (SARCase.created_date < to_date)]['customerno'].unique()

    view_customer['label'] = view_customer.apply(lambda x: 1 if x['customerno'] in target_list else 0, axis=1)
    view_customer  = view_customer.set_index('customerno')
    view_wiretrans = view_wiretrans.set_index('customerno')
    return view_wiretrans, view_customer

In [None]:
from_date_ym = DateYM(2017, 8)
to_date_ym = DateYM(2019, 6)
trans_out = 500000
trans_in  = 1000000

list_date_seq = list_date_tuples(from_date_ym, to_date_ym)
projectors_shape = dict()
for i, date_ym in zip(range(len(list_date_seq)), list_date_seq):
    print("Processing the data in {}".format(date_ym))
    
    view_wiretrans, view_customer = fetch_data_by_month(date_ym, trans_offset=1)
    view_wiretrans['edge_group'] = view_wiretrans.apply(lambda x: "{}_{}".format(x.org_cust, x.bnf_cust), axis=1)
    
    ## Transact out counts in one month is large than 500,000
    Large_TransOut_Count = view_wiretrans[view_wiretrans.WIRE_AMTOT > trans_out].edge_group.value_counts().rename('Large_TransOut_Count')
    
    ##　WireTrans Out counts in one month
    TransOut_Count = view_wiretrans[view_wiretrans.WIRE_AMTOT > 0].edge_group.value_counts().rename('TransOut_Count')
    
    ##　Transact in total amount per month is large than 1,000,000
    Total_Large_TransIn = view_wiretrans[view_wiretrans.WIRE_AMTIN > trans_in].groupby('edge_group').sum()['WIRE_AMTIN'].rename('Total_Large_TransIn')
    
    ## Total WireTrans Out in one month
    Total_WireTrans = view_wiretrans[view_wiretrans.WIRE_AMTOT > 0].groupby('edge_group').sum()['WIRE_AMTOT'].rename('Total_WireTrans')
    
    ## Total WireTrans Out times in one month
    Total_WireTrans_Times = view_wiretrans[view_wiretrans.WIRE_AMTOT > 0].groupby('edge_group').size().rename('Total_WireTrans_Times')
    
    ## Average WireTrans Out in one month
    Average_WireTrans = (Total_WireTrans/Total_WireTrans_Times).rename('Average_WireTrans')
    
    
    
    ## WireTrans in more than 8,000, and total amount is more than 60,000,000
    WireTransIn_8000 = view_wiretrans[view_wiretrans.WIRE_AMTIN > 8000].groupby('edge_group').sum()
    WireTransIn_8000 = WireTransIn_8000['WIRE_AMTIN'][WireTransIn_8000['WIRE_AMTIN']>60000000].to_frame()
    WireTransIn_8000['WireTransIn_8000'] = 1
    WireTransIn_8000 = WireTransIn_8000['WireTransIn_8000']
    
    ## 9 months
    view_wiretrans, view_customer = fetch_data_by_month(date_ym, trans_offset=9)
    view_wiretrans['edge_group'] = view_wiretrans.apply(lambda x: "{}_{}".format(x.org_cust, x.bnf_cust), axis=1)
    ## WireTrans more than 8,000 in nine months
    WireTrans_Out_9mon = view_wiretrans[view_wiretrans.WIRE_AMTOT > 8000].groupby('edge_group').sum()['WIRE_AMTOT'].rename('WireTrans_Out_9mon')
    
    ## Output
    output = pd.concat([Large_TransOut_Count, 
                        TransOut_Count, 
                        Total_Large_TransIn, 
                        Total_WireTrans, 
                        Total_WireTrans_Times, 
                        Average_WireTrans,
                        WireTransIn_8000,
                        WireTrans_Out_9mon], axis=1).fillna(0)
    output = pd.concat([pd.DataFrame([idx.split('_') for idx in output.index], index=output.index), output], axis=1)
    output.to_csv("./Edge_Attribute/Edge_attribue_{}-{}.csv".format(date_ym[0], date_ym[1]), index=False)

# 2 Conversion from `EdgeAttibute` to `EdgeEmbeddings` via `GammaVAE`
6 features are too many to run the embeddings extraction.
Thus, we run a `GammaVAE` to encode the 6 features into 3-dimensional embeddings.
Notice that the classic **VAE** is assmumed to be Gaussian distributed, which could be negative. However, the edge should be non-negative according to our defintion.
In light of this, the AE model we use should be non-negative. Intuitively, GammaVAE is Gamma distributed, which is non-negative. 

## 2.1 Paramters Setting

In [None]:
ModelConfig = namedtuple('ModelConfig', ['input_dims', 'latent_dims', 'hidden_dims', 'gamma_shape', 'prior_shape', 'prior_rate', 'prior_weight'])
model_config = ModelConfig(
    input_dims = 8,
    latent_dims = 3,
    hidden_dims = [16, 8, 5],
    gamma_shape = 8.,
    prior_shape = 2.0,
    prior_rate = 1.,
    prior_weight = 0.001,
)

TrainConfig = namedtuple('TrainConfig', ['training_epochs', 'batch_size', 'learning_rate'])
train_config = TrainConfig(
    training_epochs = 200,
    batch_size = 1000,
    learning_rate = 1e-5,
)

## 2.2 Definition of `GammaVAE`

In [None]:
class GammaVAE(nn.Module):

    def __init__(self,
                 in_channels: int,
                 latent_dim: int,
                 hidden_dims: list = None,
                 gamma_shape: float = 8.,
                 prior_shape: float = 2.0,
                 prior_rate: float = 1.,
                 prior_weight: float = 0.1,
                 **kwargs) -> None:
        super(GammaVAE, self).__init__()
        
        #
        # Parameters setting
        # --------------------------------------------------------------------------------------------------------------
        self.input_dim = in_channels
        self.latent_dim = latent_dim
        self.B = gamma_shape
        self.prior_alpha = torch.tensor([prior_shape])
        self.prior_beta = torch.tensor([prior_rate])
        self.prior_weight = prior_weight
        modules = []
        if hidden_dims is None:
            hidden_dims = [32, 64, 128, 256, 512]
        
        #
        # Build Encoder
        # --------------------------------------------------------------------------------------------------------------
        for h_dim in hidden_dims:
            modules.append(
                nn.Sequential(
                    nn.Linear(in_channels, out_features=h_dim),
                    nn.BatchNorm1d(h_dim),
                    nn.ELU(),
                ))
            in_channels = h_dim
        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Sequential(nn.Linear(hidden_dims[-1], latent_dim),
                                   nn.Softmax())
        self.fc_var = nn.Sequential(nn.Linear(hidden_dims[-1], latent_dim),
                                    nn.Softmax())

        #
        # Build Decoder
        # --------------------------------------------------------------------------------------------------------------
        modules = []
        self.decoder_input = nn.Sequential(nn.Linear(latent_dim, hidden_dims[-1]))
        hidden_dims = hidden_dims[::-1]
        for i in range(len(hidden_dims) - 1):
            modules.append(
                nn.Sequential(
                    nn.Linear(in_features=hidden_dims[i], out_features=hidden_dims[i + 1]),
                    nn.BatchNorm1d(hidden_dims[i + 1]),
                    nn.ELU(),
                ))
        self.decoder = nn.Sequential(*modules)
        self.final_layer = nn.Sequential(
            nn.Linear(in_features=hidden_dims[-1], out_features=self.input_dim),
            nn.BatchNorm1d(self.input_dim),
            nn.ELU(),
        )
        self.weight_init()

    def weight_init(self):
        for block in self._modules:
            for m in self._modules[block]:
                init_(m)

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)

        # Split the result into mu and var components of the latent Gaussian distribution
        alpha = self.fc_mu(result)
        beta = self.fc_var(result)

        return [alpha, beta]

    def decode(self, z: Tensor) -> Tensor:
        result = self.decoder_input(z)
        result = self.decoder(result)
        result = self.final_layer(result)
        return result

    def reparameterize(self, alpha: Tensor, beta: Tensor) -> Tensor:
        """
        Reparameterize the Gamma distribution by the shape augmentation trick.
        Reference:
        [1] https://arxiv.org/pdf/1610.05683.pdf

        :param alpha: (Tensor) Shape parameter of the latent Gamma
        :param beta: (Tensor) Rate parameter of the latent Gamma
        :return:
        """
        # Sample from Gamma to guarantee acceptance
        alpha_ = alpha.clone().detach()
        z_hat = Gamma(alpha_ + self.B, torch.ones_like(alpha_)).sample()

        # Compute the eps ~ N(0,1) that produces z_hat
        eps = self.inv_h_func(alpha + self.B , z_hat)
        z = self.h_func(alpha + self.B, eps)

        # When beta != 1, scale by beta
        return z / beta

    @staticmethod
    def h_func(alpha: Tensor, eps: Tensor) -> Tensor:
        """
        Reparameterize a sample eps ~ N(0, 1) so that h(z) ~ Gamma(alpha, 1)
        :param alpha: (Tensor) Shape parameter
        :param eps: (Tensor) Random sample to reparameterize
        :return: (Tensor)
        """

        z = (alpha - 1./3.) * (1 + eps / torch.sqrt(9. * alpha - 3.))**3
        return z

    @staticmethod
    def inv_h_func(alpha: Tensor, z: Tensor) -> Tensor:
        """
        Inverse reparameterize the given z into eps.
        :param alpha: (Tensor)
        :param z: (Tensor)
        :return: (Tensor)
        """
        eps = torch.sqrt(9. * alpha - 3.) * ((z / (alpha - 1./3.))**(1. / 3.) - 1.)
        return eps

    @staticmethod
    def I_function(a, b, c, d):
        return - c * d / a - b * torch.log(a) - torch.lgamma(b) + (b - 1) * (torch.digamma(d) + torch.log(c))

    def forward(self, input: Tensor, **kwargs) -> Tensor:
        alpha, beta = self.encode(input)
        z = self.reparameterize(alpha, beta)
        return [self.decode(z), input, alpha, beta]

    def vae_gamma_kl_loss(self, a, b, c, d):
        """
        https://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions
        b and d are Gamma shape parameters and
        a and c are scale parameters.
        (All, therefore, must be positive.)
        """

        a = 1 / a
        c = 1 / c
        losses = self.I_function(c, d, c, d) - self.I_function(a, b, c, d)
        return torch.sum(losses, dim=1)

    def loss_function(self, *args, **kwargs) -> dict:
        recons = args[0]
        input = args[1]
        alpha = args[2]
        beta = args[3]

        curr_device = input.device
        recons_loss = torch.mean(F.mse_loss(recons, input, reduction='none'), dim=(1,))

        self.prior_alpha = self.prior_alpha.to(curr_device)
        self.prior_beta = self.prior_beta.to(curr_device)

        kld_loss = self.vae_gamma_kl_loss(alpha, beta, self.prior_alpha, self.prior_beta)

        loss = (1 - self.prior_weight) * recons_loss + self.prior_weight * kld_loss
        loss = torch.mean(loss, dim = 0)
        # print(loss, recons_loss, kld_loss)
        return {'loss': loss}

    def sample(self, num_samples:int, current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the modelSay
        :return: (Tensor)
        """
        z = Gamma(self.prior_alpha, self.prior_beta).sample((num_samples, self.latent_dim))
        z = z.squeeze().to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]


def init_(m):
    if isinstance(m, (nn.Linear, nn.Conv2d)):
        init.orthogonal_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)
    elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
        m.weight.data.fill_(1)
        if m.bias is not None:
            m.bias.data.fill_(0)

## 2.3 Data Loading
Load data to train **GamaVAE**.

In [None]:
torch.cuda.set_device(0)

In [None]:
urls = glob.glob('./Edge_Attribute/*.csv')
dfs = [pd.read_csv(url, encoding = "ISO-8859-1", engine='python') for url in urls]
df_train = pd.concat(dfs[:16])
df_test  = pd.concat(dfs[16:])

edge_attr_train = df_train.iloc[:,2:]
edge_attr_test  = df_test.iloc[:,2:]
edge_attr_train.head()

In [None]:
edge_attr_train = np.log(edge_attr_train + 1.)
edge_attr_test = np.log(edge_attr_test + 1.)
edge_attr_train.head()

In [None]:
scaler = preprocessing.MinMaxScaler()
edge_attr_train_scaled = scaler.fit_transform(edge_attr_train)
edge_attr_test_scaled  = scaler.transform(edge_attr_test)

In [None]:
edge_attr_train_scaled = pd.DataFrame(edge_attr_train_scaled)
edge_attr_train_scaled = torch.tensor(edge_attr_train_scaled.values, dtype=torch.float32).cuda()

edge_attr_test_scaled = pd.DataFrame(edge_attr_test_scaled)
edge_attr_test_scaled = torch.tensor(edge_attr_test_scaled.values, dtype=torch.float32).cuda()

## 2.4 Model & Optimizer Initialization

In [None]:
vae = GammaVAE(in_channels=model_config.input_dims,
               latent_dim=model_config.latent_dims,
               hidden_dims=model_config.hidden_dims,
               gamma_shape=model_config.gamma_shape,
               prior_shape=model_config.prior_shape,
               prior_rate=model_config.prior_rate,
               prior_weight=model_config.prior_weight).cuda()

In [None]:
optimizer = torch.optim.Adam(vae.parameters(), lr=train_config.learning_rate)

## 2.5 Training

In [None]:
indice = torch.tensor(random.sample(range(edge_attr_train_scaled.shape[0]), train_config.batch_size))
batch_xs = edge_attr_train_scaled[indice]
x_output, x_input, x_alpha, x_beta = vae(batch_xs)
loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
train_loss_ini = loss_dict['loss']

indice = torch.tensor(random.sample(range(edge_attr_test_scaled.shape[0]), train_config.batch_size))
batch_xs = edge_attr_test_scaled[indice]
x_output, x_input, x_alpha, x_beta = vae(batch_xs)
loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
test_loss_ini = loss_dict['loss']

print("Epoch:", '%04d' % (0), 
      "Training loss=", "{:.9f}".format(train_loss_ini), 
      "Testing loss=", "{:.9f}".format(test_loss_ini))

In [None]:
for epoch in range(train_config.training_epochs):
    avg_cost = 0.
    total_batch = int(edge_attr_train_scaled.shape[0] / train_config.batch_size)
    
    train_loss = []
    test_loss = []
    avg_train_cost = 0.
    avg_test_cost = 0.
    for i in range(total_batch):
        
        ## train
        indice = torch.tensor(random.sample(range(edge_attr_train_scaled.shape[0]), train_config.batch_size))
        batch_xs = edge_attr_train_scaled[indice]

        x_output, x_input, x_alpha, x_beta = vae(batch_xs)
        loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
        train_loss.append(loss_dict['loss'] / edge_attr_train_scaled.shape[0] * train_config.batch_size)
        avg_train_cost += train_loss[-1]
        
        optimizer.zero_grad()
        train_loss[-1].backward()
        optimizer.step()
        
        ## valid
        indice = torch.tensor(random.sample(range(edge_attr_test_scaled.shape[0]), train_config.batch_size))
        batch_xs = edge_attr_test_scaled[indice]

        x_output, x_input, x_alpha, x_beta = vae(batch_xs)
        loss_dict = vae.loss_function(x_output, x_input, x_alpha, x_beta)
        test_loss.append(loss_dict['loss'] / edge_attr_train_scaled.shape[0] * train_config.batch_size)
        avg_test_cost += test_loss[-1]
     
    # Display logs per epoch step    
    print("Epoch:", '%04d' % (epoch+1), 
          "Training loss=", "{:.9f}".format(avg_train_cost), "Testing loss=", "{:.9f}".format(test_loss[-1]))
    

## 2.6 Generating `EdgeEmbeddings`

In [None]:
train_x_output, train_x_input, train_x_alpha, train_x_beta = vae(edge_attr_train_scaled)
train_mean = train_x_alpha / train_x_beta
train_mean[:10,:]

In [None]:
test_x_output, test_x_input, test_x_alpha, test_x_beta = vae(edge_attr_test_scaled)
test_mean = test_x_alpha / test_x_beta
test_mean[:10,:]

In [None]:
VAE_res = np.concatenate((train_mean.cpu().data.numpy(), test_mean.cpu().data.numpy()), 0)

In [None]:
count = 0
index = 0
for url in urls:
    date = url.split('_')[-1].split('.')[0]
    print (date, dfs[index].shape[0])
    tmp_df =  pd.concat([pd.DataFrame(dfs[index].iloc[:,:2].values), 
                         pd.DataFrame(VAE_res[count:count+dfs[index].shape[0],:])], axis=1)
    tmp_df.to_csv('./Edge_Attribute/Edge_Embedd_{}.csv'.format(date), header=False)
    count += dfs[index].shape[0]
    index += 1

# 3 Graph Embedding

## 3.1 Parameters Setting

In [None]:
# Model settings
latent_dim = 300
epochs = 600
lr = 100
lambda_V = 1e-5
lambda_w = 1e-4
param_alpha = 1
focal_alpha = 0.1
focal_gamma = 1.5
edge_attrs = ['dim_1', 'dim_2', 'dim_3']
L = 20

# Graph fetching settings
weight_type = 'embed'  # [weight_none, weight_log, count_larger8000]
offset = 0

# To store all the infomation of dynamic graphs
dyngraph_info = defaultdict(dict)

## 3.2 Fetch data by date

In [None]:
from_date_ym = DateYM(2018, 1)
to_date_ym = DateYM(2019, 6)
list_date_seq = list_date_tuples(from_date_ym, to_date_ym)
seq_length = len(list_date_seq)
print(list_date_seq)

## 3.3 Load Edge Embeddings

In [None]:
def get_edge_embed(embed_path, groupby_method='sum'):
    df = pd.read_csv(embed_path)
    df.columns = ['index', 'from_vertex', 'to_vertex', 'dim_1', 'dim_2', 'dim_3']
    df['vertex_index'] = df.apply(lambda x: str(set([x['from_vertex'], x['to_vertex']])), axis=1)
    if groupby_method == 'sum':
        df_new = df.groupby('vertex_index').sum().reset_index().reset_index()
    else:
        df_new = df.groupby('vertex_index').mean().reset_index().reset_index()
    df_new['from_vertex'] = df_new.apply(lambda x: x['vertex_index'][1:-1].split(',')[0][1:-1], axis=1)
    df_new['to_vertex'] = df_new.apply(lambda x: x['vertex_index'][1:-1].split(',')[1][2:-1], axis=1)
    
    return df_new

In [None]:
for year, month in list_date_seq:
    edge_embed_path = './Edge_Attribute/Edge_Embedd_{}-{}.csv'.format(year, month)
    edge_embed = get_edge_embed(edge_embed_path)
    dyngraph_info[(year, month)]['edge_embed'] = edge_embed

## 3.4 Vertex Indexing
To construct an indexing mapping between `entry_index` and `custimer_id`.

In [None]:
for year, month in list_date_seq:
    vertex_ids_t = set.union(set(dyngraph_info[(year, month)]['edge_embed']['from_vertex']),
                             set(dyngraph_info[(year, month)]['edge_embed']['to_vertex']))
    dict_int2key = dict(map(lambda x, y: (x, y), range(len(vertex_ids_t)), vertex_ids_t))
    dict_key2int = dict(map(lambda x, y: (y, x), range(len(vertex_ids_t)), vertex_ids_t))
    dyngraph_info[(year, month)]['int2key'] = dict_int2key
    dyngraph_info[(year, month)]['key2int'] = dict_key2int

In [None]:
for year, month in list_date_seq:
    with open('../data/index_mapping/int2key_{}{}.pickle'.format(str(year), str(month).zfill(2)), 'wb') as handle:
        pickle.dump(dyngraph_info[(year, month)]['int2key'], handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open('../data/index_mapping/key2int_{}{}.pickle'.format(str(year), str(month).zfill(2)), 'wb') as handle:
        pickle.dump(dyngraph_info[(year, month)]['key2int'], handle, protocol=pickle.HIGHEST_PROTOCOL)

## 3.5 Load Ground Truth Labels
- `FILTER`: required for monitoring 
- `announce`: suspicious account for AML
- `Y_SAR`: AML account

In [None]:
for year, month in list_date_seq:
    ctbc_path = '../data/aml_all_{}{}_y.csv'.format(str(year)[2:], str(month).zfill(2))
    ctbc_data = pd.read_csv(ctbc_path)
    
    dict_key2int_t = dyngraph_info[(year, month)]['key2int']
    pos_index = [dict_key2int_t[x] for x in list(ctbc_data[ctbc_data['Y_SAR'] == 1]['customerno']) if x in dict_key2int_t]
    y_t = np.zeros((len(dict_key2int_t), 1))
    y_t[pos_index, 0] = 1
    dyngraph_info[(year, month)]['y'] = y_t

    print("({}, {}): # of FILTER = {}, # of announce = {}, # of Y_SAR = {}, # of PosVertex = {}".
          format(year, month, 
                 sum(ctbc_data['FILTER']), sum(ctbc_data['announce']), 
                 sum(ctbc_data['Y_SAR']), int(sum(y_t))))

## 3.6 Load Graph by Month

In [None]:
for year, month in list_date_seq:
    edge_embed_t = dyngraph_info[(year, month)]['edge_embed']
    key2int_t = dyngraph_info[(year, month)]['key2int']
    int2key_t = dyngraph_info[(year, month)]['int2key']
    G_t = nx.from_pandas_edgelist(edge_embed_t, 'from_vertex', 'to_vertex', 
                                  edge_attr=edge_attrs,
                                  create_using=nx.MultiGraph())
    G_t = nx.relabel_nodes(G_t, key2int_t)
    L_t = nx.normalized_laplacian_matrix(G_t, nodelist=int2key_t)
    A_t = nx.to_scipy_sparse_matrix(G_t, nodelist=int2key_t)
    A_ts = [nx.to_scipy_sparse_matrix(G_t, weight=attr, nodelist=int2key_t) for attr in edge_attrs]
    dyngraph_info[(year, month)]['graph_laplacian'] = L_t
    dyngraph_info[(year, month)]['graph_adjacent'] = A_ts
    print('({}, {}): Finished graph constructuion, |V| = {}'.format(year, month, A_t.shape[0]))

## 3.7 Embedding Projection on a Dynamic graph

In [None]:
def sigmoid(x):
    if len(x.shape) > 0:
        x[x < -5] = -5.
    else:
        x = -5. if x < -5. else x
    return 1 / (1 + np.exp(-x))

In [None]:
def update_params(input_pkg):
    t, hetergraph = input_pkg
    L_t, A_ts = hetergraph
    date_ym = list_date_seq[t]
    
    key2int_t = dyngraph_info[date_ym]['key2int']
    int2key_t = dyngraph_info[date_ym]['int2key']
    
    y_t = dyngraph_info[date_ym]['y']
    V_t = dyngraph_info[date_ym]['vertex_embed']
    w1 = classifiers['w1']
    w2 = classifiers['w2']
    we = classifiers['we']
    theta = classifiers['theta']
    
    A_t = scipy.sparse.csr_matrix(A_ts[0].shape)
    for index_attr in range(we.shape[0]):
        tmpA_t = A_ts[index_attr]
        A_t += sigmoid(we[index_attr,0]) * tmpA_t
        
    if (t + 1) < seq_length:
        date_ym_plus = list_date_seq[t + 1]
        
        key2int_t_plus = dyngraph_info[date_ym_plus]['key2int']
        int2key_t_plus = dyngraph_info[date_ym_plus]['int2key']
        index_t_2_t_plus = [key2int_t_plus[x] if x in key2int_t_plus else -1 for x in int2key_t]
        
        # Get V_t_plus aligned in V_t index order
        V_t_plus_orig = dyngraph_info[date_ym_plus]['vertex_embed']
        V_t_plus_orig = np.append(V_t_plus_orig, np.zeros((1, V_t_plus_orig.shape[1])), axis=0)
        V_t_plus = V_t_plus_orig[index_t_2_t_plus]
        
        # Get y_t_plus aligned in y_t index order
        y_t_plus_orig = dyngraph_info[date_ym_plus]['y']
        y_t_plus_orig = np.append(y_t_plus_orig, np.zeros((1,1)), axis=0)
        y_t_plus = y_t_plus_orig[index_t_2_t_plus]
    else:
        V_t_plus = None
        y_t_plus = None
        key2int_t_plus = None
        int2key_t_plus = None

    if t > 0:
        date_ym_minus = list_date_seq[t - 1]
        
        key2int_t_minus = dyngraph_info[date_ym_minus]['key2int']
        int2key_t_minus = dyngraph_info[date_ym_minus]['int2key']
        index_t_2_t_minus = [key2int_t_minus[x] if x in key2int_t_minus else -1 for x in int2key_t]
        
        # Get V_t_plus aligned in V_t index order
        V_t_minus_orig = dyngraph_info[date_ym_minus]['vertex_embed']
        V_t_minus_orig = np.append(V_t_minus_orig, np.zeros((1, V_t_minus_orig.shape[1])), axis=0)
        V_t_minus = V_t_minus_orig[index_t_2_t_minus]
                
        y_t_minus_orig = dyngraph_info[date_ym_minus]['y']
        y_t_minus_orig = np.append(y_t_minus_orig, np.zeros((1,1)), axis=0)
        y_t_minus = y_t_minus_orig[index_t_2_t_minus] 
    else:
        V_t_minus = None
        y_t_minus = None
        key2int_t_minus = None
        int2key_t_minus = None
    
    # Create sparse matrix S
    vec_i, vec_j, vec_v = scipy.sparse.find(A_t)
    vec_data = vec_v * sigmoid(-np.sum(V_t[vec_i, :] * V_t[vec_j, :], axis=1))
    S_t = scipy.sparse.csr_matrix((vec_data, (vec_i, vec_j)), shape=A_t.shape)
    
    # Create sparse matrix R
    l = [[u]*L for u in range(A_t.shape[0])]
    smpl_i = [item for sublist in l for item in sublist]
    l = [list(np.random.choice(A_t.shape[0], min(L*A_t[u,:].nnz, A_t.shape[0]), replace=False)) for u in range(A_t.shape[0])]
    smpl_j = [item for sublist in l for item in sublist]
    smpl_index = list(set(zip(smpl_i, smpl_j)) | set(zip(vec_i, vec_j)))
    smpl_i, smpl_j = zip(*smpl_index)
    smpl_data = sigmoid(np.sum(V_t[smpl_i, :] * V_t[smpl_j, :], axis=1))
    R_t = scipy.sparse.csr_matrix((smpl_data, (smpl_i, smpl_j)), shape=A_t.shape)
    
    # Compute normalization term
    norm_graph = 1 / (vec_data.shape[0] + smpl_data.shape[0])
    norm_laplacian = 1 / (V_t.shape[0] * V_t.shape[1])
    norm_time = 1 / (V_t.shape[0] * V_t.shape[1])
    norm_pred = 10 / V_t.shape[0]
    
    # Update variables
    for itr in range(3):
        # Create vector z_t
        vec_one = np.ones((V_t.shape[0], 1))
        tmp_recur = np.cos(theta) * V_t + np.sin(theta) * V_t_minus if V_t_minus is not None else V_t
        tmp = np.dot(tmp_recur, w1) + 0.5 * np.dot(tmp_recur * tmp_recur, w2)
        pred_t = sigmoid(tmp)
        q_t = (vec_one - y_t) * (vec_one - pred_t) + y_t * pred_t + 1e-10
        q_t[q_t > 1] = 1.
        z_t = focal_alpha * (-focal_gamma * (1 - q_t) ** (focal_gamma - 1) * np.log(q_t) + \
                             1/q_t * (1 - q_t) ** focal_gamma) * \
              (2 * y_t - vec_one) * pred_t * (vec_one - pred_t)
        
        # Compute the gradient w.r.t. V_t
        grad_V_t = norm_graph * (-param_alpha * S_t * V_t + R_t * V_t) + \
                   lambda_V * V_t + \
                   norm_laplacian * L_t * L_t * V_t
        
        if V_t_plus is not None:
            tmp_recur = np.cos(theta) * V_t_plus + np.sin(theta) * V_t
            pred_t_plus = sigmoid(np.dot(tmp_recur, w1) + 0.5 * np.dot(tmp_recur**2, w2))
            q_t_plus = (vec_one - y_t_plus) * (vec_one - pred_t_plus) + y_t_plus * pred_t_plus + 1e-10
            q_t_plus[q_t_plus > 1] = 1.
            z_t_plus = focal_alpha * (-focal_gamma * (1 - q_t_plus) ** (focal_gamma - 1) * np.log(q_t_plus) + \
                                      1/q_t_plus * (1 - q_t_plus) ** focal_gamma) * \
                       (2 * y_t_plus - vec_one) * pred_t_plus * (vec_one - pred_t_plus)
            grad_V_t += norm_time * -(V_t_plus - V_t) + \
                        norm_pred * -(np.dot(np.cos(theta) * z_t + np.sin(theta) * z_t_plus, w1.T) + 
                                      np.sin(theta) * (z_t_plus * tmp_recur * w2.T))
        
        if V_t_minus is not None:
            tmp_recur = np.cos(theta) * V_t + np.sin(theta) * V_t_minus
            grad_V_t += norm_time * (V_t - V_t_minus) + \
                        norm_pred * -(z_t * tmp_recur * w2.T)
        V_t -= lr * grad_V_t
        
        # Compute the gradient w.r.t. w1 & w2
        if V_t_minus is not None:
            tmp_recur = np.cos(theta) * V_t + np.sin(theta) * V_t_minus
            grad_w1 = -norm_pred * np.dot(tmp_recur.T, z_t) + lambda_w * w1
            w1 -= lr * grad_w1
            grad_w2 = -norm_pred * 0.5 * np.dot((tmp_recur**2).T, z_t) + lambda_w * w2
            w2 -= lr * grad_w2
            
        # Compute the gradient w.r.t. we
        grad_we = np.zeros(we.shape)
        for index_attr in range(we.shape[0]):
            tmpA_t = A_ts[index_attr]
            tmp_is, tmp_js, tmp_vs = scipy.sparse.find(tmpA_t)
            tmp = np.mean(np.log(sigmoid(np.sum(V_t[tmp_is,:] * V_t[tmp_js,:], axis=1))) * tmp_vs)
            tmp_grad_sigma_we = sigmoid(we[index_attr,0]) * (1 - sigmoid(we[index_attr,0]))
            grad_we[index_attr,0] = -param_alpha * tmp * tmp_grad_sigma_we
        we -= lr * grad_we
            
        # Compute the gradient w.r.t. theta
        grad_theta = -np.sin(theta) * (np.dot(np.dot(z_t.T, V_t), w1) + \
                                       np.dot(np.dot(z_t.T, tmp_recur * V_t), w2))
        if V_t_minus is not None:
            grad_theta = np.cos(theta) * (np.dot(np.dot(z_t.T, V_t_minus), w1) + \
                                          np.dot(np.dot(z_t.T, tmp_recur * V_t_minus), w2))
        theta -= 0.1 * lr * norm_pred * grad_theta[0][0]
        
        # Compute the gradient w.r.t. V_t_plus
        if V_t_plus is not None:
            tmp_recur = np.cos(theta) * V_t_plus + np.sin(theta) * V_t
            pred_t_plus = sigmoid(np.dot(tmp_recur, w1) + 0.5 * np.dot(tmp_recur**2, w2))
            q_t_plus = (vec_one - y_t_plus) * (vec_one * pred_t_plus) + y_t_plus * pred_t_plus + 1e-10
            q_t_plus[q_t_plus > 1] = 1.
            z_t_plus = focal_alpha * (-focal_gamma * (1 - q_t_plus) ** (focal_gamma - 1) * np.log(q_t_plus) + \
                                      1/q_t_plus * (1 - q_t_plus) ** focal_gamma) * \
                       (2 * y_t_plus - vec_one) * pred_t_plus * (vec_one - pred_t_plus)
            grad_V_t_plus = norm_graph * (-param_alpha * S_t * V_t_plus + R_t * V_t_plus) + \
                            lambda_V * V_t_plus + \
                            norm_laplacian * L_t * L_t * V_t_plus + \
                            norm_time * (V_t_plus - V_t) + \
                            norm_pred * (-z_t_plus * tmp_recur * w2.T)
            V_t_plus -= lr * grad_V_t_plus
        
        # Compute the gradient w.r.t. V_t_minus
        if V_t_minus is not None:
            grad_V_t_minus = norm_graph * (-param_alpha * S_t * V_t_minus + R_t * V_t_minus) + \
                             lambda_V * V_t_minus + \
                             norm_laplacian * L_t * L_t * V_t_minus + \
                             norm_time * (V_t_minus - V_t)
            V_t_minus -= lr * grad_V_t_minus
        
    return date_ym, V_t, w1, w2, we, theta

In [None]:
def validate(input_pkg):
    t, hetergraph = input_pkg
    L_t, A_ts = hetergraph
    date_ym = list_date_seq[t]
    
    key2int_t = dyngraph_info[date_ym]['key2int']
    int2key_t = dyngraph_info[date_ym]['int2key']
    
    y_t = dyngraph_info[date_ym]['y']
    V_t = dyngraph_info[date_ym]['vertex_embed']
    w1 = classifiers['w1']
    w2 = classifiers['w2']
    we = classifiers['we']
    theta = classifiers['theta']
    
    A_t = scipy.sparse.csr_matrix(A_ts[0].shape)
    for index_attr in range(we.shape[0]):
        tmpA_t = A_ts[index_attr]
        A_t += sigmoid(we[index_attr,0]) * tmpA_t
    
    if (t + 1) < seq_length:
        date_ym_plus = list_date_seq[t + 1]
        
        key2int_t_plus = dyngraph_info[date_ym_plus]['key2int']
        int2key_t_plus = dyngraph_info[date_ym_plus]['int2key']
        index_t_2_t_plus = [key2int_t_plus[x] if x in key2int_t_plus else -1 for x in int2key_t]
        
        # Get V_t_plus aligned in V_t index order
        V_t_plus_orig = dyngraph_info[date_ym_plus]['vertex_embed']
        V_t_plus_orig = np.append(V_t_plus_orig, np.zeros((1, V_t_plus_orig.shape[1])), axis=0)
        V_t_plus = V_t_plus_orig[index_t_2_t_plus]
        
        # Get y_t_plus aligned in y_t index order
        y_t_plus_orig = dyngraph_info[date_ym_plus]['y']
        y_t_plus_orig = np.append(y_t_plus_orig, np.zeros((1,1)), axis=0)
        y_t_plus = y_t_plus_orig[index_t_2_t_plus]
    else:
        V_t_plus = None
        y_t_plus = None
        key2int_t_plus = None
        int2key_t_plus = None

    if t > 0:
        date_ym_minus = list_date_seq[t - 1]
        
        key2int_t_minus = dyngraph_info[date_ym_minus]['key2int']
        int2key_t_minus = dyngraph_info[date_ym_minus]['int2key']
        index_t_2_t_minus = [key2int_t_minus[x] if x in key2int_t_minus else -1 for x in int2key_t]
        
        # Get V_t_plus aligned in V_t index order
        V_t_minus_orig = dyngraph_info[date_ym_minus]['vertex_embed']
        V_t_minus_orig = np.append(V_t_minus_orig, np.zeros((1, V_t_minus_orig.shape[1])), axis=0)
        V_t_minus = V_t_minus_orig[index_t_2_t_minus]
                        
        y_t_minus_orig = dyngraph_info[date_ym_minus]['y']
        y_t_minus_orig = np.append(y_t_minus_orig, np.zeros((1,1)), axis=0)
        y_t_minus = y_t_minus_orig[index_t_2_t_minus] 
    else:
        V_t_minus = None
        y_t_minus = None
        key2int_t_minus = None
        int2key_t_minus = None
    
    # Create sparse matrix S
    vec_i, vec_j, vec_v = scipy.sparse.find(A_t)
    vec_data = np.log(sigmoid(np.sum(V_t[vec_i, :] * V_t[vec_j, :], axis=1)))
    
    # Create sparse matrix R
    l = [[u]*L for u in range(A_t.shape[0])]
    smpl_i = [item for sublist in l for item in sublist]
    l = [list(np.random.choice(A_t.shape[0], min(L*A_t[u,:].nnz, A_t.shape[0]), replace=False)) for u in range(A_t.shape[0])]
    smpl_j = [item for sublist in l for item in sublist]
    smpl_index = list(set(zip(smpl_i, smpl_j)) | set(zip(vec_i, vec_j)))
    smpl_i, smpl_j = zip(*smpl_index)
    smpl_data = np.log(sigmoid(-np.sum(V_t[smpl_i, :] * V_t[smpl_j, :], axis=1)))
    
    # Compute normalization term
    norm_graph = 1 / (vec_data.shape[0] + smpl_data.shape[0])
    norm_laplacian = 1 / (V_t.shape[0] * V_t.shape[1])
    norm_time = 1 / (V_t.shape[0] * V_t.shape[1])
    norm_pred = 10 / V_t.shape[0]
    
    loss_t_graph = norm_graph * (-param_alpha * np.sum(vec_v * vec_data) - np.sum(smpl_data)) + \
                   0.5 * lambda_V * np.sum(V_t ** 2)
    
    loss_t_laplacian = 0.5 * norm_laplacian * np.sum((L_t * V_t) ** 2)
    
    loss_t_time = 0
    if V_t_plus is not None:
        loss_t_time += 0.5 * norm_time * np.sum((V_t - V_t_plus) ** 2)
    if V_t_minus is not None:
        loss_t_time += 0.5 * norm_time * np.sum((V_t - V_t_minus) ** 2)
    
    tmp_recur = np.cos(theta) * V_t + np.sin(theta) * V_t_minus if V_t_minus is not None else V_t
    pred_t = sigmoid(np.dot(tmp_recur, w1) + 0.5 * np.dot(tmp_recur**2, w2))
    vec_one = np.ones((y_t.shape[0], 1))
    q_t = (vec_one - y_t) * (vec_one - pred_t) + y_t * pred_t + 1e-10
    q_t[q_t > 1] = 1.
    loss_t_pred = -norm_pred * np.sum(focal_alpha * (vec_one - q_t)**focal_gamma * np.log(q_t))
    
    loss_t = loss_t_graph + loss_t_laplacian + loss_t_time + loss_t_pred
    
    return loss_t, loss_t_graph, loss_t_laplacian, loss_t_time, loss_t_pred

In [None]:
#
# Variables Initialization
# ---------------------------------------------------------------------------------------------------------------
for year, month in list_date_seq:
    num_vertex_t = len(dyngraph_info[(year, month)]['key2int'])
    dyngraph_info[(year, month)]['vertex_embed'] = np.random.rand(num_vertex_t, latent_dim)

classifiers = {'w1': np.random.rand(latent_dim, 1),
               'w2': np.random.rand(latent_dim, 1),
               'we': np.random.rand(len(edge_attrs), 1),
               'theta': np.random.rand()}

#
# Model Training
# ---------------------------------------------------------------------------------------------------------------
list_input_pkg = []
for t in range(len(list_date_seq)):
    list_input_pkg.append((t, 
                           (dyngraph_info[list_date_seq[t]]['graph_laplacian'], 
                            dyngraph_info[list_date_seq[t]]['graph_adjacent'])
                          ))
num_cores = 9
print("Training ......")
for epoch in range(epochs):        
    #                              #
    # Embedding training           #
    # ---------------------------- #
    res_list = []
    with Pool(processes=num_cores) as p:
        max_ = seq_length
        with tqdm_notebook(total=max_) as pbar:
            for input_pkg, res in tqdm_notebook(enumerate(p.imap_unordered(update_params, list_input_pkg))):
                pbar.update()
                res_list.append(res)
    
    date_ym_seq, V_t_seq, w1_seq, w2_seq, we_seq, theta_seq = zip(*res_list)
    for index in range(len(date_ym_seq)):
        date_ym = date_ym_seq[index]
        dyngraph_info[date_ym]['vertex_embed'] = V_t_seq[index]
    
    classifiers['w1'] = np.mean(w1_seq, axis=0)
    classifiers['w2'] = np.mean(w2_seq, axis=0)
    tmp_mul = np.exp(np.mean(we_seq, axis=0))
    classifiers['we'] = tmp_mul / np.sum(tmp_mul)
    classifiers['theta'] = np.mean(theta_seq, axis=0)
    
    #                              #
    # Validation                   #
    # ---------------------------- #
    res_list = []
    with Pool(processes=num_cores) as p:
        max_ = seq_length
        with tqdm_notebook(total=max_) as pbar:
            for input_pkg, res in tqdm_notebook(enumerate(p.imap_unordered(validate, list_input_pkg))):
                pbar.update()
                res_list.append(res)
    
    # Save the checkpoint
    for t in range(seq_length):
        date_ym = list_date_seq[t]
        np.save('../data/vertex_embeddings/{}_heter_superv_recur_focal_logisticMF_embed_{}-{}'.format(
            weight_type, date_ym[0], date_ym[1]), dyngraph_info[(year, month)]['vertex_embed'])  
    
    vec_loss = np.mean(res_list, axis=0)
    degree = (classifiers['theta'] % (2*np.pi)) / (2*np.pi) * 360
    print("Epoch {} Loss => total: {:.4f} , g: {:.4f}, l: {:.4f}, t: {:.4f}, p: {:.4f}, theta: {:.3f}, theta_d: {:.3f}".
          format(epoch, vec_loss[0], vec_loss[1], vec_loss[2], vec_loss[3], vec_loss[4], classifiers['theta'], degree))

# 4 Data Partition

In [None]:
def get_embedding_by_date(year, month):    
    # Fetch graph embedding by customer ID
    embed_path = '../data/vertex_embeddings/embed_heter_superv_recur_focal_logisticMF_embed_{}-{}.npy'.format(year, month)
    E_t = np.load(embed_path)
    print(E_t.shape)
    embedding = pd.DataFrame(data=E_t, 
                             index=range(E_t.shape[0]),
                             columns=['embed_{}'.format(i) for i in range(E_t.shape[1])])
    with open("../data/index_mapping/int2key_{}{}.pickle".format(str(year), str(month).zfill(2)), 'rb') as f:
        int2key_t = pickle.load(f)
    embedding['customerno'] = embedding.apply(lambda x: int2key_t[x.name], axis=1)
    
    return embedding

def get_feature(year, month, pre_fix="", 
                use_feature_ctbc=True, use_feature_embed=True, use_feature_rankscore=True):
    from_date = pd.to_datetime("{}/{}/{}".format(month, 1, year))
    to_date = from_date + pd.DateOffset(months=1)
    print("Construct dataframe in {}.{}".format(year, month))
    
    # Get CTBC features
    if year == 2017:
        file_name = 'aml_all_{}{:02d}_y.csv'.format(str(2018)[2:], 1)
    else:
        file_name = 'aml_all_{}{:02d}_y.csv'.format(str(year)[2:], month)
    pdframe = pd.read_csv('../data/' + file_name)
    pdframe = pdframe.set_index('customerno')
    
    # Attach graph embedding onto the features, weight_type: [weight_none, weight_log, count_larger8000]
    embed = get_embedding_by_date(year, month)
    pdframe = pdframe.join(embed.set_index('customerno'), on='customerno')
    embed_col_names = list(embed.columns)
    embed_col_names.remove('customerno')
    pdframe.loc[:, embed_col_names] = pdframe.loc[:, embed_col_names].fillna(0)
    
    return pdframe

## 4.1 Get original features from CTBC AML table by month

In [None]:
from_date_ym = DateYM(2018, 1)
to_date_ym = DateYM(2019, 6)
list_date_seq = list_date_tuples(from_date_ym, to_date_ym)

AML_data = {}
embed_col_names = []
pdframe_pre = None
for date_ym in list_date_seq:
    pdframe = get_feature(*date_ym)
    AML_data[date_ym] = pd.get_dummies(pdframe, columns=['Customer_Type_Code', 'Customer_Category_Code'])

In [None]:
embed_feat = []
for x in list(pdframe.columns):
    if x.startswith('embed') and not x.endswith('_diff'):
        embed_feat.append(x)
print('The length of embed_feat = {}'.format(len(embed_feat)))

feature_feat = []
for x in list(pdframe.columns):
    if x.startswith('feature') or x.startswith('SARU'):
        feature_feat.append(x)
print('The length of feature_feat = {}'.format(len(feature_feat)))

## 4.2 MinMaxScalar

In [None]:
ScalarFrame = pd.DataFrame(index=feature_feat, columns=['max', 'min']).fillna(0)

for date in AML_data.keys():
    print (date)
    tmp_AML_data = AML_data.get(date).describe()
    for n in feature_feat:
        if tmp_AML_data.loc['max', n] > ScalarFrame.loc[n,'max']:
            ScalarFrame.loc[n,'max'] = tmp_AML_data.loc['max', n]
        if tmp_AML_data.loc['min', n] < ScalarFrame.loc[n,'min']:
            ScalarFrame.loc[n,'min'] = tmp_AML_data.loc['min', n]

## 4.3 Construct data by month

In [None]:
label    = {}
announce = {}
FILTER   = {}
GRUdict  = {}
for n in range(3, len(list_date_seq)):
    print (list_date_seq[n])
    GRUframe_tmp_1 = AML_data[list_date_seq[n]]    
    GRUframe_tmp_1 = GRUframe_tmp_1.drop('Y_SAR', axis=1)
    GRUframe_tmp_1[embed_feat] = GRUframe_tmp_1[embed_feat]
    GRUframe_tmp_1[feature_feat] = (GRUframe_tmp_1[feature_feat] - ScalarFrame.T[feature_feat].min()) /\
                                   (ScalarFrame.T[feature_feat].max() - ScalarFrame.T[feature_feat].min() + 1e-20)
    
    GRUframe_tmp_2 = AML_data[list_date_seq[n-1]]
    GRUframe_tmp_2 = GRUframe_tmp_2.drop('Y_SAR', axis=1)
    GRUframe_tmp_2[embed_feat] = GRUframe_tmp_2[embed_feat]
    GRUframe_tmp_2[feature_feat] = (GRUframe_tmp_2[feature_feat] - ScalarFrame.T[feature_feat].min()) /\
                                   (ScalarFrame.T[feature_feat].max() - ScalarFrame.T[feature_feat].min() + 1e-20)
    
    GRUframe_tmp_3 = AML_data[list_date_seq[n-2]]
    GRUframe_tmp_3 = GRUframe_tmp_3.drop('Y_SAR', axis=1)
    GRUframe_tmp_3[embed_feat] = GRUframe_tmp_3[embed_feat]
    GRUframe_tmp_3[feature_feat] = (GRUframe_tmp_3[feature_feat] - ScalarFrame.T[feature_feat].min()) /\
                                   (ScalarFrame.T[feature_feat].max() - ScalarFrame.T[feature_feat].min() + 1e-20)

    GRUframe_tmp_4 = AML_data[list_date_seq[n-3]]
    GRUframe_tmp_4 = GRUframe_tmp_4.drop('Y_SAR', axis=1)
    GRUframe_tmp_4[embed_feat] = GRUframe_tmp_4[embed_feat]
    GRUframe_tmp_4[feature_feat] = (GRUframe_tmp_4[feature_feat] - ScalarFrame.T[feature_feat].min()) /\
                                   (ScalarFrame.T[feature_feat].max() - ScalarFrame.T[feature_feat].min() + 1e-20)

    GRUdict[list_date_seq[n]] = np.hstack([GRUframe_tmp_1.values[:,np.newaxis, :], 
                                           GRUframe_tmp_2.values[:,np.newaxis, :], 
                                           GRUframe_tmp_3.values[:,np.newaxis, :], 
                                           GRUframe_tmp_4.values[:,np.newaxis, :]])
    label[list_date_seq[n]]    = AML_data[list_date_seq[n]]['Y_SAR'].values
    announce[list_date_seq[n]] = AML_data[list_date_seq[n]]['announce'].values
    FILTER[list_date_seq[n]]   = AML_data[list_date_seq[n]]['FILTER'].values

In [None]:
for year, month in list_date_seq[3:]:
    np.savez('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year, month),
             GRUdict[(year, month)], 
             label[(year, month)],
             announce[(year, month)], 
             FILTER[(year, month)])

# 5 Proposed Model

## 5.1 Parameters Setting

In [None]:
#
# Classifier
# ---------------------
## focal loss
alpha = 1
gamma_pos = 6
gamma_neg = 2
grad_clip = 1
lambda_l1 = 0
weight_decay = 0   # lambda_l2

#
# VAT
# ---------------------
vat_xi = 1e-6
vat_eps_pos = 1e2
vat_eps_neg = 1e-1
vat_ip = 1

#
# Training process
# ---------------------
train_batch_size = 128
test_batch_size = 32

#
# Optimizer
# ---------------------
optim_type = 'rlars'       # ['adam', 'rlars']
learn_rate = 1e-4
adam_beta1 = 0.9
adam_beta2 = 0.999

max_epochs = 1600

## 5.2 Model Declaration

### 5.2.1 Conv1D

In [None]:
class ConvNet(nn.Module):
    def __init__(self, fc_dims, in_dim=256, out_dim=1):
        super(ConvNet, self).__init__()
        
        self.in_dim = in_dim
        self.conv1_dim = math.ceil(self.in_dim)
        self.conv2_dim = math.ceil(self.in_dim * 2)
        self.conv3_dim = math.ceil(self.in_dim)
        self.outdim_en1 = fc_dims[0]
        self.outdim_en2 = fc_dims[1]
        self.out_dim = out_dim
        
        self.model_conv = nn.Sequential(
            nn.Conv1d(in_channels=in_dim, out_channels=self.conv1_dim, kernel_size=2),
            nn.BatchNorm1d(self.conv1_dim),
            nn.ReLU(),
            nn.Conv1d(in_channels=self.conv1_dim, out_channels=self.conv2_dim, kernel_size=2),
            nn.BatchNorm1d(self.conv2_dim),
            nn.ReLU(),
            nn.Conv1d(in_channels=self.conv2_dim, out_channels=self.conv3_dim, kernel_size=2),
            nn.BatchNorm1d(self.conv3_dim),
            nn.ReLU(),
        )
        dropout_1 = 1 - 0.1
        dropout_2 = 1 - 0.2
        dropout_3 = 1 - 0.2
        self.model_fc = nn.Sequential(
            # FC 1
            nn.Dropout(dropout_1),
            nn.Linear(in_features=self.conv3_dim, out_features=self.outdim_en1),
            nn.BatchNorm1d(self.outdim_en1),
            nn.ReLU(),
            # FC 2
            nn.Dropout(dropout_2),
            nn.Linear(in_features=self.outdim_en1, out_features=self.outdim_en2),
            nn.BatchNorm1d(self.outdim_en2),
            nn.ReLU(),
            # FC 3
            nn.Dropout(dropout_3),
            nn.Linear(in_features=self.outdim_en2, out_features=self.out_dim),
            nn.Sigmoid(),
            nn.Softmax()
        )
        
        print("conv_dim = [{}, {}, {}]".format(self.conv1_dim, self.conv2_dim, self.conv3_dim))
        print("fc_dim = [{}/{}, {}/{}, {}/{}, {}]".format(self.conv3_dim, dropout_1, 
                                                          self.outdim_en1, dropout_2, 
                                                          self.outdim_en2, dropout_3, self.out_dim))
        
    def forward(self, x):
        x = self.model_conv(x)
        x = self.model_fc(x.view(x.shape[0], -1))
        return x
    
    def get_trainable_parameters(self):
        return (param for param in self.parameters() if param.requires_grad)

### 5.2.2 Focal Loss

In [None]:
class FocalLoss2(nn.Module):
    def __init__(self, alpha=0.01, gamma_pos=3, gamma_neg=2, logits=False, reduce=True):
        super(FocalLoss2, self).__init__()
        self.alpha = alpha
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg
        self.logits = logits
        self.reduce = reduce
    
    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        gamma_diff = self.gamma_pos - self.gamma_neg
        F_loss_pos = self.alpha * targets * (1-pt)**self.gamma_pos * BCE_loss
        F_loss_pos = torch.mean(pt)**(-gamma_diff) * F_loss_pos
        F_loss_neg = self.alpha * (1 - targets) * (1-pt)**self.gamma_neg * BCE_loss
        F_loss = 1 * F_loss_pos + 0.9 * F_loss_neg
        
        avg_F_loss_pos = torch.sum(F_loss_pos) / torch.sum(targets)
        avg_F_loss_neg = torch.sum(F_loss_neg) / torch.sum(1-targets)
        
        if self.reduce:
            return torch.mean(F_loss), avg_F_loss_pos, avg_F_loss_neg
        else:
            return F_loss, F_loss_pos, F_loss_neg

### 5.2.3 Virtual Adversarial Training

In [None]:
import contextlib


@contextlib.contextmanager
def _disable_tracking_bn_stats(model):

    def switch_attr(m):
        if hasattr(m, 'track_running_stats'):
            m.track_running_stats ^= True
            
    model.apply(switch_attr)
    yield
    model.apply(switch_attr)

    
def _l2_normalize(d):
    d_reshaped = d.view(d.shape[0], -1, *(1 for _ in range(d.dim() - 2)))
    d /= torch.norm(d_reshaped, dim=1, keepdim=True) + 1e-8
    return d


class VATLoss2(nn.Module):

    def __init__(self, xi=1e-6, eps_pos=100, eps_neg=1., ip=1):
        """VAT loss
        :param xi: hyperparameter of VAT (default: 10.0)
        :param eps_pos: hyperparameter of VAT (default: 100.0)
        :param eps_neg: hyperparameter of VAT (default: 0.1)
        :param ip: iteration times of computing adv noise (default: 1)
        """
        super(VATLoss2, self).__init__()
        self.xi = xi
        self.eps_pos = eps_pos
        self.eps_neg = eps_neg
        self.ip = ip

    def forward(self, model, x, y):
        with torch.no_grad():
            pred = F.softmax(model(x), dim=1)

        # Prepare random unit tensor
        d = torch.rand(x.shape).sub(0.5).to(x.device)
        d = _l2_normalize(d)

        with _disable_tracking_bn_stats(model):
            # Calculate adversarial direction
            for _ in range(self.ip):
                d.requires_grad_()
                pred_hat = model(x + self.xi * d)
                logp_hat = F.log_softmax(pred_hat, dim=1)
                # adv_distance = F.kl_div(logp_hat, pred, reduction='batchmean')  # for PyTorch v1.0
                adv_distance = F.kl_div(logp_hat, pred)                           # for PyTorch v0.4
                adv_distance.backward()
                d = _l2_normalize(d.grad)
                model.zero_grad()
    
            # calc LDS
            r_adv = d * (self.eps_pos * y + self.eps_neg * (1-y)).reshape(-1, 1, 1)
            pred_hat = model(x + r_adv)
            logp_hat = F.log_softmax(pred_hat, dim=1)
            # lds = F.kl_div(logp_hat, pred, reduction='batchmean')  # for PyTorch v1.0
            lds = F.kl_div(logp_hat, pred)                           # for PyTorch v1.0

        return lds

## 5.3 Data Preparation

### 5.3.1 Training / Testing partition setting

In [None]:
training_date  = [(2018, 7), (2018, 8), (2018, 9),(2018, 10), (2018, 11), (2018, 12)]
testing_date   = [(2019, 1), (2019, 2), (2019, 3), (2019, 4), (2019, 5), (2019, 6)]

### 5.3.2 Data loading

In [None]:
data = dict(map(lambda ym: (ym, 
                            np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(*ym), allow_pickle=True)), 
                training_date + testing_date))

In [None]:
train_data = list(map(lambda ym: data[ym], training_date))
test_data = list(map(lambda ym: data[ym], testing_date))

X_train_ = np.concatenate([data['arr_0'] for data in train_data])
y_train_ = np.concatenate([data['arr_1'] for data in train_data])
training_announce = np.concatenate([data['arr_2'] for data in train_data])
training_FILTER = np.concatenate([data['arr_3'] for data in train_data])

X_test_ = np.concatenate([data['arr_0'] for data in test_data])
y_test_ = np.concatenate([data['arr_1'] for data in test_data])
testing_announce = np.concatenate([data['arr_2'] for data in test_data])
testing_FILTER = np.concatenate([data['arr_3'] for data in test_data])

### 5.3.3 Extracting announced data

In [None]:
X_train = X_train_[training_announce == 1]
y_train = y_train_[training_announce == 1]

X_test = X_test_[testing_announce == 1]
y_test = y_test_[testing_announce == 1]

### 5.3.4 Magical rescaling

In [None]:
X_train[:,:,0] = 0.1 * np.log10(1e4*X_train[:,:,0]**3 + 1e-10) + 1
X_train[:,:,2] = 0.1 * np.log10(1e4*X_train[:,:,2]**5 + 1e-10) + 1
X_train[:,:,3] = 0.1 * np.log10(1e4*X_train[:,:,3]**2 + 1e-10) + 1
X_train[:,:,6] = 0.1 * np.log10(1e0*X_train[:,:,6]**3 + 1e-10) + 1
X_train[:,:,7] = 0.1 * np.log10(1e8*X_train[:,:,7]**5 + 1e-10) + 1
X_train[:,:,8] = 0.1 * np.log10(1e8*X_train[:,:,8]**3 + 1e-10) + 1
X_train[:,:,9] = 0.1 * np.log10(1e8*X_train[:,:,9]**6 + 1e-10) + 1
X_train[:,:,10] = 0.1 * np.log10(1e8*X_train[:,:,10]**2 + 1e-10) + 1
X_train[:,:,12] = 0.1 * np.log10(1e9*X_train[:,:,12]**3 + 1e-10) + 1
X_train[:,:,13] = 0.1 * np.log10(1e9*X_train[:,:,13]**5 + 1e-10) + 1
X_train[:,:,14] = 0.1 * np.log10(1e9*X_train[:,:,14]**3.5 + 1e-10) + 1
X_train[:,:,15] = 0.1 * np.log10(1e9*X_train[:,:,15]**6 + 1e-10) + 1
X_train[:,:,16] = 0.1 * np.log10(1e15*X_train[:,:,16]**3 + 1e-10) + 1
X_train[:,:,18] = 0.1 * np.log10(1e15*X_train[:,:,18]**4 + 1e-10) + 1
X_train[:,:,20] = 0.1 * np.log10(1e10*X_train[:,:,20]**8 + 1e-10) + 1
X_train[:,:,21] = 0.1 * np.log10(1e8*X_train[:,:,21]**3 + 1e-10) + 1
X_train[:,:,22] = 0.1 * np.log10(1e20*X_train[:,:,22]**4 + 1e-10) + 1
X_train[:,:,23] = 0.1 * np.log10(1e20*X_train[:,:,23]**4 + 1e-10) + 1
X_train[:,:,24] = 0.1 * np.log10(1e10*X_train[:,:,24]**5 + 1e-10) + 1
X_train[:,:,26] = 0.1 * np.log10(1e10*X_train[:,:,26]**7 + 1e-10) + 1
X_train[:,:,27] = 0.1 * np.log10(1e10*X_train[:,:,27]**3 + 1e-10) + 1
X_train[:,:,29] = 0.1 * np.log10(1e10*X_train[:,:,29]**7 + 1e-10) + 1
X_train[:,:,30] = 0.1 * np.log10(1e10*X_train[:,:,30]**3 + 1e-10) + 1
X_train[:,:,32] = 0.1 * np.log10(1e10*X_train[:,:,32]**3 + 1e-10) + 1
X_train[:,:,33] = 0.1 * np.log10(1e10*X_train[:,:,33]**2.5 + 1e-10) + 1
X_train[:,:,34] = 0.1 * np.log10(1e10*X_train[:,:,34]**3 + 1e-10) + 1
X_train[:,:,35] = 0.1 * np.log10(1e20*X_train[:,:,35]**4 + 1e-10) + 1
X_train[:,:,36] = 0.1 * np.log10(1e18*X_train[:,:,36]**5 + 1e-10) + 1
X_train[:,:,37] = 0.1 * np.log10(1e20*X_train[:,:,37]**4 + 1e-10) + 1
X_train[:,:,38] = 0.1 * np.log10(1e20*X_train[:,:,38]**3 + 1e-10) + 1
X_train[:,:,39] = 0.1 * np.log10(1e20*X_train[:,:,39]**3 + 1e-10) + 1
X_train[:,:,40] = 0.1 * np.log10(1e20*X_train[:,:,40]**3 + 1e-10) + 1
X_train[:,:,41] = 0.1 * np.log10(1e20*X_train[:,:,41]**3 + 1e-10) + 1
X_train[:,:,42] = 0.1 * np.log10(1e20*X_train[:,:,42]**3 + 1e-10) + 1
X_train[:,:,43] = 0.1 * np.log10(1e20*X_train[:,:,43]**3 + 1e-10) + 1
X_train[:,:,44] = 0.1 * np.log10(1e20*X_train[:,:,44]**3 + 1e-10) + 1
X_train[:,:,45] = 0.1 * np.log10(1e20*X_train[:,:,45]**3 + 1e-10) + 1
X_train[:,:,46] = 0.1 * np.log10(1e20*X_train[:,:,46]**3 + 1e-10) + 1
X_train[:,:,47] = 0.1 * np.log10(1e20*X_train[:,:,47]**3 + 1e-10) + 1
X_train[:,:,51] = 0.1 * np.log10(1e20*X_train[:,:,51]**3 + 1e-10) + 1
X_train[:,:,52] = 0.1 * np.log10(1e20*X_train[:,:,52]**3 + 1e-10) + 1
X_train[:,:,53] = 0.1 * np.log10(1e20*X_train[:,:,53]**3 + 1e-10) + 1
X_train[:,:,54] = 0.1 * np.log10(1e20*X_train[:,:,54]**3 + 1e-10) + 1
X_train[:,:,57] = 0.1 * np.log10(1e20*X_train[:,:,57]**20 + 1e-10) + 1
X_train[:,:,58] = 0.1 * np.log10(1e20*X_train[:,:,58]**10 + 1e-10) + 1
X_train[:,:,59] = 0.1 * np.log10(1e20*X_train[:,:,59]**8 + 1e-10) + 1
X_train[:,:,60] = 0.1 * np.log10(1e20*X_train[:,:,60]**6 + 1e-10) + 1
X_train[:,:,61] = 0.1 * np.log10(1e20*X_train[:,:,61]**6 + 1e-10) + 1
X_train[:,:,62] = 0.1 * np.log10(1e20*X_train[:,:,62]**5 + 1e-10) + 1
X_train[:,:,63] = 0.1 * np.log10(1e20*X_train[:,:,63]**3 + 1e-10) + 1
X_train[:,:,64] = 0.1 * np.log10(1e20*X_train[:,:,64]**3 + 1e-10) + 1
X_train[:,:,65] = 0.1 * np.log10(1e20*X_train[:,:,65]**3 + 1e-10) + 1
X_train[:,:,66] = 0.1 * np.log10(1e20*X_train[:,:,66]**3 + 1e-10) + 1
X_train[:,:,67] = 0.1 * np.log10(1e20*X_train[:,:,67]**1.5 + 1e-10) + 1
X_train[:,:,68] = 0.1 * np.log10(1e20*X_train[:,:,68]**1.5 + 1e-10) + 1
X_train[:,:,69] = 0.1 * np.log10(1e20*X_train[:,:,69]**1.5 + 1e-10) + 1
X_train[:,:,70] = 0.1 * np.log10(1e20*X_train[:,:,70]**1.5 + 1e-10) + 1
X_train[:,:,71] = 0.1 * np.log10(1e20*X_train[:,:,71]**3 + 1e-10) + 1
X_train[:,:,72] = 0.1 * np.log10(1e20*X_train[:,:,72]**3 + 1e-10) + 1
X_train[:,:,73] = 0.1 * np.log10(1e20*X_train[:,:,73]**3 + 1e-10) + 1
X_train[:,:,74] = 0.1 * np.log10(1e20*X_train[:,:,74]**3 + 1e-10) + 1
X_train[:,:,75] = 0.1 * np.log10(1e20*X_train[:,:,75]**3 + 1e-10) + 1
X_train[:,:,76] = 0.1 * np.log10(1e20*X_train[:,:,76]**3 + 1e-10) + 1
X_train[:,:,77] = 0.1 * np.log10(1e15*X_train[:,:,77]**8 + 1e-10) + 1
X_train[:,:,78] = 0.1 * np.log10(1e15*X_train[:,:,78]**8 + 1e-10) + 1
X_train[:,:,79] = 0.1 * np.log10(1e20*X_train[:,:,79]**5 + 1e-10) + 1
X_train[:,:,80] = 0.1 * np.log10(1e20*X_train[:,:,80]**6 + 1e-10) + 1
X_train[:,:,81] = 0.1 * np.log10(1e20*X_train[:,:,81]**8 + 1e-10) + 1
X_train[:,:,82] = 0.1 * np.log10(1e20*X_train[:,:,82]**10 + 1e-10) + 1
X_train[:,:,83] = 0.1 * np.log10(1e20*X_train[:,:,83]**9.5 + 1e-10) + 1
X_train[:,:,84] = 0.1 * np.log10(1e20*X_train[:,:,84]**9.5 + 1e-10) + 1
X_train[:,:,85] = 0.1 * np.log10(1e20*X_train[:,:,85]**3 + 1e-10) + 1
X_train[:,:,86] = 0.1 * np.log10(1e20*X_train[:,:,86]**13 + 1e-10) + 1
X_train[:,:,87] = 0.1 * np.log10(1e20*X_train[:,:,87]**10 + 1e-10) + 1
X_train[:,:,88] = 0.1 * np.log10(1e20*X_train[:,:,88]**9 + 1e-10) + 1
X_train[:,:,89] = 0.1 * np.log10(1e20*X_train[:,:,89]**8 + 1e-10) + 1
X_train[:,:,91] = X_train[:,:,91] ** 5
X_train[:,:,93] = X_train[:,:,93] ** 2
X_train[:,:,94] = X_train[:,:,94] ** 2
X_train[:,:,96] = 0.1 * np.log10(1e20*X_train[:,:,96]**4 + 1e-10) + 1
X_train[:,:,97] = 0.1 * np.log10(1e20*X_train[:,:,97]**9 + 1e-10) + 1
X_train[:,:,98] = 0.1 * np.log10(1e20*X_train[:,:,98]**8 + 1e-10) + 1
X_train[:,:,99] = 0.1 * np.log10(1e20*X_train[:,:,99]**7 + 1e-10) + 1
X_train[:,:,100] = 0.1 * np.log10(1e20*X_train[:,:,100]**7 + 1e-10) + 1
X_train[:,:,101] = 0.1 * np.log10(1e20*X_train[:,:,101]**5 + 1e-10) + 1
X_train[:,:,102] = 0.1 * np.log10(1e20*X_train[:,:,102]**4 + 1e-10) + 1
X_train[:,:,103] = 0.1 * np.log10(1e20*X_train[:,:,103]**4 + 1e-10) + 1
X_train[:,:,104] = 0.1 * np.log10(1e20*X_train[:,:,104]**10 + 1e-10) + 1
X_train[:,:,106] = 0.1 * np.log10(1e20*X_train[:,:,106]**4 + 1e-10) + 1
X_train[:,:,107] = 0.1 * np.log10(1e20*X_train[:,:,107]**8 + 1e-10) + 1
X_train[:,:,108] = 0.1 * np.log10(1e20*X_train[:,:,108]**7 + 1e-10) + 1
X_train[:,:,109] = 0.1 * np.log10(1e20*X_train[:,:,109]**7 + 1e-10) + 1
X_train[:,:,110] = 0.1 * np.log10(1e20*X_train[:,:,110]**7 + 1e-10) + 1
X_train[:,:,111] = 0.1 * np.log10(1e20*X_train[:,:,111]**6 + 1e-10) + 1
X_train[:,:,112] = 0.1 * np.log10(1e20*X_train[:,:,112]**5 + 1e-10) + 1
X_train[:,:,113] = 0.1 * np.log10(1e20*X_train[:,:,113]**5 + 1e-10) + 1
X_train[:,:,114] = 0.1 * np.log10(1e20*X_train[:,:,114]**8 + 1e-10) + 1
X_train[:,:,115] = 0.1 * np.log10(1e20*X_train[:,:,115]**7 + 1e-10) + 1
X_train[:,:,116] = 0.1 * np.log10(1e20*X_train[:,:,116]**6 + 1e-10) + 1
X_train[:,:,117] = 0.1 * np.log10(1e20*X_train[:,:,117]**6 + 1e-10) + 1
X_train[:,:,118] = 0.1 * np.log10(1e20*X_train[:,:,118]**5.5 + 1e-10) + 1
X_train[:,:,119] = 0.1 * np.log10(1e20*X_train[:,:,119]**4 + 1e-10) + 1
X_train[:,:,124] = 0.1 * np.log10(1e20*X_train[:,:,124]**6 + 1e-10) + 1
X_train[:,:,125] = 0.1 * np.log10(1e20*X_train[:,:,125]**7.5 + 1e-10) + 1
X_train[:,:,126] = 0.1 * np.log10(1e20*X_train[:,:,126]**11 + 1e-10) + 1
X_train[:,:,127] = 0.1 * np.log10(1e20*X_train[:,:,127]**8 + 1e-10) + 1
X_train[:,:,128] = 0.1 * np.log10(1e20*X_train[:,:,128]**8 + 1e-10) + 1
X_train[:,:,129] = 0.1 * np.log10(1e20*X_train[:,:,129]**8 + 1e-10) + 1
X_train[:,:,130] = 0.1 * np.log10(1e20*X_train[:,:,130]**8 + 1e-10) + 1
X_train[:,:,132] = 0.1 * np.log10(1e12*X_train[:,:,132]**12 + 1e-10) + 1
X_train[:,:,133] = 0.1 * np.log10(1e20*X_train[:,:,133]**5.5 + 1e-10) + 1
X_train[:,:,140] = 0.1 * np.log10(1e20*X_train[:,:,140]**4 + 1e-10) + 1
X_train[:,:,141] = 0.1 * np.log10(1e20*X_train[:,:,141]**4 + 1e-10) + 1
X_train[:,:,142] = 0.1 * np.log10(1e20*X_train[:,:,142]**4 + 1e-10) + 1
X_train[:,:,143] = 0.1 * np.log10(1e20*X_train[:,:,143]**4 + 1e-10) + 1
X_train[:,:,144] = 0.1 * np.log10(1e20*X_train[:,:,144]**10 + 1e-10) + 1
X_train[:,:,146] = 0.1 * np.log10(1e20*X_train[:,:,146]**10 + 1e-10) + 1
X_train[:,:,147] = 0.1 * np.log10(1e20*X_train[:,:,147]**9 + 1e-10) + 1
X_train[:,:,148] = 0.1 * np.log10(1e20*X_train[:,:,148]**8.5 + 1e-10) + 1
X_train[:,:,149] = 0.1 * np.log10(1e20*X_train[:,:,149]**9 + 1e-10) + 1
X_train[:,:,150] = 0.1 * np.log10(1e20*X_train[:,:,150]**7.5 + 1e-10) + 1
X_train[:,:,151] = 0.1 * np.log10(1e20*X_train[:,:,151]**12 + 1e-10) + 1
X_train[:,:,152] = 0.1 * np.log10(1e20*X_train[:,:,152]**20 + 1e-10) + 1
X_train[:,:,153] = 0.1 * np.log10(1e20*X_train[:,:,153]**11 + 1e-10) + 1
X_train[:,:,154] = 0.1 * np.log10(1e20*X_train[:,:,154]**20 + 1e-10) + 1
X_train[:,:,155] = 0.1 * np.log10(1e20*X_train[:,:,155]**16 + 1e-10) + 1
X_train[:,:,156] = 0.1 * np.log10(1e20*X_train[:,:,156]**10 + 1e-10) + 1
X_train[:,:,157] = 0.1 * np.log10(1e20*X_train[:,:,157]**11 + 1e-10) + 1
X_train[:,:,158] = 0.1 * np.log10(1e20*X_train[:,:,158]**9 + 1e-10) + 1
X_train[:,:,159] = 0.1 * np.log10(1e20*X_train[:,:,159]**3.5 + 1e-10) + 1
X_train[:,:,160] = 0.1 * np.log10(1e20*X_train[:,:,160]**11 + 1e-10) + 1
X_train[:,:,166] = 0.1 * np.log10(1e20*X_train[:,:,166]**11 + 1e-10) + 1
X_train[:,:,168] = 0.1 * np.log10(1e20*X_train[:,:,168]**14 + 1e-10) + 1
X_train[:,:,173] = 0.1 * np.log10(1e30*X_train[:,:,173]**7 + 1e-10) + 1
X_train[:,:,174] = 0.1 * np.log10(1e30*X_train[:,:,174]**4 + 1e-10) + 1
X_train[:,:,178] = 0.1 * np.log10(1e30*X_train[:,:,178]**4 + 1e-10) + 1
X_train[:,:,179] = 0.1 * np.log10(1e30*X_train[:,:,179]**4 + 1e-10) + 1
X_train[:,:,181] = 0.1 * np.log10(1e30*X_train[:,:,181]**7 + 1e-10) + 1
X_train[:,:,182] = 0.1 * np.log10(1e30*X_train[:,:,182]**2 + 1e-10) + 1
X_train[:,:,183] = 0.1 * np.log10(1e30*X_train[:,:,183]**2 + 1e-10) + 1
X_train[:,:,185] = 0.1 * np.log10(1e30*X_train[:,:,185]**2 + 1e-10) + 1
X_train[:,:,186] = 0.1 * np.log10(1e30*X_train[:,:,186]**2 + 1e-10) + 1
X_train[:,:,187] = 0.1 * np.log10(1e30*X_train[:,:,187]**2 + 1e-10) + 1
X_train[:,:,190] = 0.1 * np.log10(1e10*X_train[:,:,190]**4 + 1e-10) + 1
X_train[:,:,191] = 0.1 * np.log10(1e20*X_train[:,:,191]**6 + 1e-10) + 1
X_train[:,:,192] = 0.1 * np.log10(1e20*X_train[:,:,192]**4 + 1e-10) + 1
X_train[:,:,195] = 0.1 * np.log10(1e20*X_train[:,:,195]**4 + 1e-10) + 1
X_train[:,:,196] = 0.1 * np.log10(1e20*X_train[:,:,196]**6 + 1e-10) + 1
X_train[:,:,197] = 0.1 * np.log10(1e20*X_train[:,:,197]**4 + 1e-10) + 1
X_train[:,:,198] = 0.1 * np.log10(1e20*X_train[:,:,198]**6 + 1e-10) + 1
X_train[:,:,200] = 0.1 * np.log10(1e20*X_train[:,:,200]**6 + 1e-10) + 1
X_train[:,:,201] = 0.1 * np.log10(1e20*X_train[:,:,201]**5 + 1e-10) + 1
X_train[:,:,202] = 0.1 * np.log10(1e20*X_train[:,:,202]**5 + 1e-10) + 1
X_train[:,:,203] = 0.1 * np.log10(1e20*X_train[:,:,203]**5 + 1e-10) + 1
X_train[:,:,204] = 0.1 * np.log10(1e30*X_train[:,:,204]**3 + 1e-10) + 1
X_train[:,:,205] = 0.1 * np.log10(1e30*X_train[:,:,205]**3 + 1e-10) + 1
X_train[:,:,206] = 0.1 * np.log10(1e20*X_train[:,:,206]**7 + 1e-10) + 1
X_train[:,:,207] = 0.1 * np.log10(1e30*X_train[:,:,207]**2 + 1e-10) + 1
X_train[:,:,208] = 0.1 * np.log10(1e30*X_train[:,:,208]**2 + 1e-10) + 1
X_train[:,:,209] = 0.1 * np.log10(1e30*X_train[:,:,209]**2 + 1e-10) + 1
X_train[:,:,213] = 0.1 * np.log10(1e30*X_train[:,:,213]**2 + 1e-10) + 1
X_train[:,:,214] = 0.1 * np.log10(1e7*X_train[:,:,214]**2 + 1e-10) + 1
X_train[:,:,215] = 0.1 * np.log10(1e15*X_train[:,:,215]**2 + 1e-10) + 1
X_train[:,:,216] = 0.1 * np.log10(1e15*X_train[:,:,216]**2 + 1e-10) + 1
X_train[:,:,217] = 0.1 * np.log10(1e15*X_train[:,:,217]**2 + 1e-10) + 1
X_train[:,:,218] = 0.1 * np.log10(1e15*X_train[:,:,218]**2 + 1e-10) + 1
X_train[:,:,220] = 0.1 * np.log10(1e15*X_train[:,:,220]**2 + 1e-10) + 1

### 5.3.5 Converting Numpy tensor into PyTorch tensor

In [None]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

train_data = []
for i in range(len(X_train)):
    train_data.append((X_train[i], y_train[i]))
    
test_data = []
for i in range(len(X_test)):
    test_data.append((X_test[i], y_test[i]))

train_dataloader = DataLoader(train_data, shuffle=True, batch_size=train_batch_size)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=test_batch_size)

## 5.4 Model Initialization

### 5.4.1 Classifier / Focal Loss initialization
Notice that the dimensions per layer are recommended to set to a smaller value due to the risk of overfitting. 

In [None]:
classifier = ConvNet(fc_dims=[128, 64], in_dim=X_train.shape[2], out_dim=2).cuda()

focal_loss = FocalLoss2(alpha, gamma_pos, gamma_neg)
if optim_type == "adam":
    optim_clsfr = optim.Adam(filter(lambda p: p.requires_grad, classifier.parameters()), 
                             lr=learn_rate)
else:
    optim_clsfr = RangerLars(filter(lambda p: p.requires_grad, classifier.parameters()),
                             lr=learn_rate)

### 5.4.2 VAT Loss initialization

In [None]:
vat_loss2 = VATLoss2(xi=vat_xi, eps_pos=vat_eps_pos, eps_neg=vat_eps_neg, ip=vat_ip)

## 5.5 Training / Testing

In [None]:
def train(epoch, dataloader, clip_grad_norm=0, lambda_l1=1e-3):
    label_list = []
    pred_y_list = []
    
    clsf_loss_batch = []
    clsf_loss_pos_batch = []
    clsf_loss_neg_batch = []
    vat_batch = []
    for batch_idx, (data, target) in enumerate(dataloader):
        if data.size()[0] != dataloader.batch_size:
            continue
        data, target = Variable(data.cuda()), Variable(target.cuda())
        tmp = target.reshape(-1, 1)
        onehot_target = torch.cat([1-tmp, tmp], dim=1)
                
        #
        # Update classifier on real samples
        # ------------------------------------------------------------------------------------
        optim_clsfr.zero_grad()
        
        # VAT Loss
        vat_kld = vat_loss2(classifier, data.permute(0, 2, 1), target)     # for Conv1D
        
        # Focal Loss
        pred_y = classifier(data.permute(0, 2, 1)).squeeze(-1)
        clsf_loss, clsf_loss_pos, clsf_loss_neg = focal_loss(pred_y, onehot_target)
        
        # L1 Regularization Loss
        regularizer_loss_l1 = 0
        for param in classifier.parameters():
            regularizer_loss_l1 += torch.sum(torch.abs(param))
        
        # Total Loss
        loss = clsf_loss + 10 * vat_kld + lambda_l1 * regularizer_loss_l1
        
        loss.backward()
        
        # Gradient clipping
        if clip_grad_norm > 0:
            torch.nn.utils.clip_grad_norm_(classifier.parameters(), max_norm=clip_grad_norm)
        
        optim_clsfr.step()
        
        #
        # Record the losses
        # ------------------------------------------------------------------------------------
        pred_yy = torch.nn.functional.softmax(pred_y, dim=1)[:, 1]
        vat_batch.append(vat_kld)
        clsf_loss_batch.append(clsf_loss)
        if torch.sum(target) > 0:
            clsf_loss_pos_batch.append(clsf_loss_pos)
        clsf_loss_neg_batch.append(clsf_loss_neg)
        
        label_list += list(target.cpu().detach().numpy())
        pred_y_list += list(pred_yy.cpu().detach().numpy())
    
    vat_loss_avg = sum(vat_batch) / len(vat_batch)
    clsf_loss_avg = sum(clsf_loss_batch) / len(clsf_loss_batch)
    clsf_loss_pos_avg = sum(clsf_loss_pos_batch) / len(clsf_loss_pos_batch)
    clsf_loss_neg_avg = sum(clsf_loss_neg_batch) / len(clsf_loss_neg_batch)
    
    return np.array(label_list), np.array(pred_y_list), clsf_loss_avg, clsf_loss_pos_avg, clsf_loss_neg_avg, vat_loss_avg

In [None]:
def infer(dataloader):
    label_list = []
    pred_y_list = []   
    
    clsf_loss_batch = []
    clsf_loss_pos_batch = []
    clsf_loss_neg_batch = []
    for batch_idx, (data, target) in enumerate(dataloader):
        if data.size()[0] != dataloader.batch_size:
            continue
        data, target = Variable(data.cuda()), Variable(target.cuda())
        tmp = target.reshape(-1, 1)
        onehot_target = torch.cat([1-tmp, tmp], dim=1)
        
        # Update classifier
        pred_y = classifier(data.permute(0, 2, 1)).squeeze(-1)     # for Conv1D
        # pred_y_softmax = torch.nn.functional.softmax(pred_y, dim=1)[:, 1]
        
        clsf_loss, clsf_loss_pos, clsf_loss_neg = focal_loss(pred_y, onehot_target)
        clsf_loss_batch.append(clsf_loss)
        if torch.sum(target) > 0:
            clsf_loss_pos_batch.append(clsf_loss_pos)
        clsf_loss_neg_batch.append(clsf_loss_neg)
        
        label_list += list(target.cpu().detach().numpy())
        pred_y_list += list(pred_y[:, 1].cpu().detach().numpy())
    
    clsf_loss_avg = sum(clsf_loss_batch) / len(clsf_loss_batch)
    clsf_loss_pos_avg = sum(clsf_loss_pos_batch) / len(clsf_loss_pos_batch)
    clsf_loss_neg_avg = sum(clsf_loss_neg_batch) / len(clsf_loss_neg_batch)
    
    return np.array(label_list), np.array(pred_y_list), clsf_loss_avg, clsf_loss_pos_avg, clsf_loss_neg_avg

In [None]:
def evaluate(y_true, y_pred):
    prec = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return prec, recall, f1

In [None]:
print('Parameter Setting ----------------------------------------------------------------------')
print('Model = VAT + Conv1D')
print('conv1d use activation = {}'.format(True))
print('graph_emdeding = Heter_AutoEncoder')
print('alpha = {}'.format(alpha))
print('gamma_pos = {}'.format(gamma_pos))
print('gamma_neg = {}'.format(gamma_neg))
print('learn_rate = {}'.format(learn_rate))
print('train_batch_size = {}'.format(train_batch_size))
print('test_batch_size = {}'.format(test_batch_size))
print('max_epochs = {}'.format(max_epochs))
print('vat_xi = {}'.format(vat_xi))
print('vat_eps_pos = {}'.format(vat_eps_pos))
print('vat_eps_neg = {}'.format(vat_eps_neg))
print('vat_ip = {}'.format(vat_ip))
print('optim_type = {}'.format(optim_type))
print('weight_decay = {}'.format(weight_decay))
print('lambda_l1 = {}'.format(lambda_l1))
print('\n')

train_history_loss = []
train_history_auc = []
max_f1score = 0
for epoch in range(max_epochs):
    print('Epoch {} -------------------------------------------------------------------------'.format(epoch))
    
    #
    # Training
    # ------------------------------------------------------------------------------------
    classifier.train()
    label_train, pred_y_train, clsf_loss_train, clsf_loss_pos_train, \
        clsf_loss_neg_train, vat_loss_train = train(epoch, train_dataloader, 
                                                    clip_grad_norm=grad_clip, lambda_l1=lambda_l1)
    
    auc_train = roc_auc_score(label_train, pred_y_train)
    train_history_loss.append(clsf_loss_train)
    train_history_auc.append(auc_train)
    print('    Training => auc: {:.6f}, clsf_pos: {}, clsf_neg: {}, vat_loss: {}'.
          format(auc_train, clsf_loss_pos_train, clsf_loss_neg_train, vat_loss_train))
    thres = np.min(pred_y_train[label_train==1]) - 1e-6
    print("                Threshold is set to {}".format(thres))
    print("                Min. Probailities on train is {}".format(np.min(pred_y_train)))
    
    y_predict_bin = np.array(pred_y_train > thres, dtype=int)
    prec_train, recall_train, f1_train = evaluate(label_train, y_predict_bin)
    print('                prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.
          format(prec_train, recall_train, f1_train))
    
    if epoch % 1 == 0:
        #
        # Testing
        # ------------------------------------------------------------------------------------        
        with torch.no_grad():
            classifier.eval()
            label_test, pred_y_test, clsf_loss_test, clsf_loss_pos_test, clsf_loss_neg_test = infer(test_dataloader)    
        
        auc = roc_auc_score(label_test, pred_y_test)
        
        print("            Min. Probailities on test set with label 1: {}".format(np.min(pred_y_test[label_test==1])))
        print("            Min. Probailities on test set             : {}".format(np.min(pred_y_test)))
        
        y_predict_bin = np.array(pred_y_test > thres, dtype=int)
        prec, recall_bytrain, f1_bytrain = evaluate(label_test, y_predict_bin)
        print('    Testing by train ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall_bytrain, f1_bytrain, clsf_loss_test))
        
        y_predict_bin = np.array(pred_y_test >= np.min(pred_y_test[label_test==1]), dtype=int)
        prec, recall_bytest, f1_bytest = evaluate(label_test, y_predict_bin)
        print('    Testing by test ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall_bytest, f1_bytest, clsf_loss_test))
        
        if clsf_loss_pos_train < 0.005 and f1_bytrain > max_f1score and recall_bytrain == 1:
            max_f1score = f1_bytrain
            torch.save({'epoch': epoch,
                        'model_state_dict': classifier.state_dict(),
                        'optimizer_state_dict': optim_clsfr.state_dict(),
                        'loss': focal_loss,
                       }, 
                       '../../user_data/CloudMile/data/saved_models/' + \
                       'FeatEmbed-Conv1Dsmall_v2_eps{}{}_focal{}{}_{}_lr{}_epoch{}_Part2_{}'.
                       format(int(-math.log10(vat_eps_pos)), 
                              int(-math.log10(vat_eps_neg)), 
                              gamma_pos, gamma_neg, 
                              optim_type, int(-math.log10(learn_rate)), max_epochs, f1_bytrain))


# 6 Inference Performance

In [None]:
def plotting(pred_y_list, label_list, save_path=False, stat='Train'):
    ###########################################################
    plt.ylabel('Log Count')
    plt.xlabel('Score')
    plt.yscale('log')
    
    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.spines['left'].set_color('none')
    ax.spines['bottom'].set_color('none')
    
    MIN = 0.4
    MAX = 0.5
    BIN = 100
    plt.grid(color = '#9999CC')
    plt.hist(pred_y_list[np.where(label_list == 0)], 
             bins=[n/(10*BIN) for n in range(int(10*MIN)*BIN, int(10*MAX)*BIN)], 
             label='Negative',
             color='#598987')
    plt.hist(pred_y_list[np.where(label_list == 1)], 
             bins=[n/(10*BIN) for n in range(int(10*MIN)*BIN, int(10*MAX)*BIN)], 
             label='Positive', 
             color='#FFD000')
    plt.legend(loc='upper right')
    if save_path:
        plt.savefig("result/{}_{}.jpg".format(save_path, stat), dpi=1000, quality=100)
    plt.show()

In [None]:
def performance(PATH, save_path=False):
    model = ConvNet(fc_dims=[128, 64], in_dim=X_train.shape[2], out_dim=2).cuda()
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']

    model.eval()
    
    ##### Training #####
    Training_label_list = []
    Training_pred_y_list = []

    for batch_idx, (data, target) in tqdm_notebook(enumerate(train_dataloader)):
        if data.size()[0] != train_dataloader.batch_size:
            continue
        data, target = Variable(data.cuda()), Variable(target.cuda())
        Training_pred_y = model(data.permute(0, 2, 1)).squeeze(-1)     # for Conv1D
        Training_pred_y = torch.nn.functional.softmax(Training_pred_y, dim=1)[:, 1]

        Training_label_list += list(target.cpu().detach().numpy())
        Training_pred_y_list += list(Training_pred_y.cpu().detach().numpy())
    
    
    Training_label_list = np.array(Training_label_list)
    Training_pred_y_list = np.array(Training_pred_y_list)
    auc_train = roc_auc_score(Training_label_list, Training_pred_y_list)
    
    ##### Testing #####
    Testing_label_list = []
    Testing_pred_y_list = []
    for batch_idx, (data, target) in tqdm_notebook(enumerate(test_dataloader)):
        if data.size()[0] != test_dataloader.batch_size:
            continue
        data, target = Variable(data.cuda()), Variable(target.cuda())        
        Testing_pred_y = model(data.permute(0, 2, 1)).squeeze(-1)     # for Conv1D
        Testing_pred_y = torch.nn.functional.softmax(Testing_pred_y, dim=1)[:, 1]

        Testing_label_list += list(target.cpu().detach().numpy())
        Testing_pred_y_list += list(Testing_pred_y.cpu().detach().numpy())
    
    Testing_label_list = np.array(Testing_label_list)
    Testing_pred_y_list = np.array(Testing_pred_y_list)
    auc_test = roc_auc_score(Testing_label_list, Testing_pred_y_list)
    
    ### Performance 
    ratio = .05
    thres1 = np.min(Training_pred_y_list[np.where(Training_label_list == 1)])
    thres2 = sorted(Testing_pred_y_list, reverse=True)[int(Testing_label_list.sum() + int(len(Testing_label_list)*ratio))]
    
    print ('Cut1: {}'.format(thres1))
    print ('Cut2: {}'.format(thres2))

    print("Recall of Training = 1 : {}".format(thres1 > np.min(Training_pred_y_list)))
    print('Training => auc: {:.6f}'.format(auc_train))
    print ("Training Treshold: {}".format(thres1))
    
    y_predict_bin = np.array(Training_pred_y_list >= thres1, dtype=int)
    prec_train, recall_train, f1_train = evaluate(Training_label_list, y_predict_bin)
    print('prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.format(prec_train, recall_train, f1_train))
    print ("Total Positve: {}".format(int(sum(Training_label_list))))
    num_cand = np.sum(Training_pred_y_list >= thres1)
    print ("Total Candidate: {}".format(num_cand))
    print('----------------------------------------------')
    
    ## High Risk
    print("High Risk Positve: {}".format(np.sum(Training_label_list[Training_pred_y_list >= thres1])))
    print("High Risk Candidate: {}".format(np.sum(Training_pred_y_list >= thres1)))
    print("High Risk Prec: {}".format(np.sum(Training_label_list[np.where(Training_pred_y_list >= thres1)])/\
                                      np.sum(Training_pred_y_list >= thres1)))
    print('----------------------------------------------')
    ## Medium Risk
    print("Medium Risk Positve: {}".format(np.sum(Training_label_list[(Training_pred_y_list < thres1) &\
                                                                     (Training_pred_y_list >= thres2)])))
    print("Medium Risk Candidate: {}".format(np.sum((Training_pred_y_list < thres1) &\
                                                    (Training_pred_y_list >= thres2))))
    print("Medium Risk Prec: {}".format(np.sum(Training_label_list[np.where((Training_pred_y_list < thres1) &\
                                                                           (Training_pred_y_list >= thres2))])/\
                                                             np.sum((Training_pred_y_list < thres1) &\
                                                                    (Training_pred_y_list >= thres2))))
    
    plotting(Training_pred_y_list, Training_label_list, save_path)
#     pd.DataFrame([Training_pred_y_list, Training_label_list, training_announce.tolist()], 
#                  index=['score', 'label', 'annouce']).T.to_csv('result/Training_result.csv', index=None)
    ####　----------------------------------------------
    print("Recall of Testing = 1 : {}".format(thres2 <= np.min(Testing_pred_y_list[Testing_label_list == 1])))
    print('Testing => auc: {:.6f}'.format(auc_test))
    print("Threshold is set to {}".format(thres2))
    
    y_predict_bin = np.array(Testing_pred_y_list >= thres2, dtype=int)
    prec_test, recall_test, f1_test = evaluate(Testing_label_list, y_predict_bin)
    print('prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.format(prec_test, recall_test, f1_test))
    print ("Total Positve: {}".format(int(sum(Testing_label_list))))
    print("Total Candidate: {}".format(np.sum(Testing_pred_y_list >= thres2)))
    print('----------------------------------------------')
    
    ## High Risk
    print("High Risk Positve: {}".format(np.sum(Testing_label_list[Testing_pred_y_list >= thres1])))
    print("High Risk Candidate: {}".format(np.sum(Testing_pred_y_list >= thres1)))
    print("High Risk Prec: {}".format(np.sum(Testing_label_list[np.where(Testing_pred_y_list >= thres1)])/\
                                      np.sum(Testing_pred_y_list >= thres1)))
    print('----------------------------------------------')
    ## Medium Risk
    print("Medium Risk Positve: {}".format(np.sum(Testing_label_list[(Testing_pred_y_list < thres1) &\
                                                                     (Testing_pred_y_list >= thres2)])))
    print("Medium Risk Candidate: {}".format(np.sum((Testing_pred_y_list < thres1) &\
                                                    (Testing_pred_y_list >= thres2))))
    print("Medium Risk Prec: {}".format(np.sum(Testing_label_list[np.where((Testing_pred_y_list < thres1) &\
                                                                           (Testing_pred_y_list >= thres2))])/\
                                                             np.sum((Testing_pred_y_list < thres1) &\
                                                                    (Testing_pred_y_list >= thres2))))

    
    plotting(Testing_pred_y_list, Testing_label_list, save_path, stat='Test')
#     pd.DataFrame([Testing_pred_y_list, Testing_label_list, testing_announce.tolist()], 
#                  index=['score', 'label', 'annouce']).T.to_csv('result/Testing_result.csv', index=None)
#     return Training_label_list, Training_pred_y_list, Testing_label_list, Testing_pred_y_list

In [None]:
PATH = '{model path}'
performance(PATH, 'FeatEmbed-Golden-LastSoftmax')

# 7 Other Model
##  7.1 XGBoost

In [None]:
train_data= [np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year, month), 
                     allow_pickle=True) 
             for year, month in training_date]

test_data= [np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year,  month), 
                    allow_pickle=True) 
            for year, month in testing_date]

X_train = np.concatenate([data['arr_0'] for data in train_data])
y_train = np.concatenate([data['arr_1'] for data in train_data])
training_announce = np.concatenate([data['arr_2'] for data in train_data])
training_FILTER = np.concatenate([data['arr_3'] for data in train_data])

X_test = np.concatenate([data['arr_0'] for data in test_data])
y_test = np.concatenate([data['arr_1'] for data in test_data])
testing_announce = np.concatenate([data['arr_2'] for data in test_data])
testing_FILTER = np.concatenate([data['arr_3'] for data in test_data])

In [None]:
X_train = X_train[:,0,:]
X_test = X_test[:,0,:]

## Get Feat
X_train = np.hstack([X_train[:,:314], X_train[:,-14:]])
X_test = np.hstack([X_test[:,:314], X_test[:,-14:]])

In [None]:
### Announce Extract
X_train = X_train[training_announce == 1]
y_train = y_train[training_announce == 1]

X_test = X_test[testing_announce == 1]
y_test = y_test[testing_announce == 1]

##### Model initial & training 

In [None]:
clf = XGBClassifier(n_estimators=100,
                    learning_rate= 0.1,     
                    max_depth=5, 
                    subsample=1,
                    gamma=0, 
                    reg_lambda=1, 
                    max_delta_step=0,
                    colsample_bytree=1, 
                    min_child_weight=1, 
                    seed=1000)

clf.fit(X=X_train, y=y_train, eval_set=[(X_test, y_test)] , eval_metric='auc', verbose=True)

In [None]:
pred_y_train = clf.predict_proba(X_train)[:,1]
pred_y_test  = clf.predict_proba(X_test)[:,1]

In [None]:
def plotting(pred_y_list, label_list, save_path=False, stat='Train'):
    ###########################################################
    plt.ylabel('Log Count')
    plt.xlabel('Score')
    plt.yscale('log')
    
    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.spines['left'].set_color('none')
    ax.spines['bottom'].set_color('none')
    
    plt.grid(color = '#9999CC')
    plt.hist(pred_y_list[np.where(label_list == 0)], 
             bins=[n/200 for n in range(200)], 
             label='Negative',
             color='#598987')
    plt.hist(pred_y_list[np.where(label_list == 1)], 
             bins=[n/200 for n in range(200)], 
             label='Positive', 
             color='#FFD000')
    plt.legend(loc='upper right')
    if save_path:
        plt.savefig("result/{}_{}_1.jpg".format(save_path, stat), dpi=1000, quality=100)
    plt.show()
    ###########################################################
    plt.ylabel('Log Count')
    plt.xlabel('Score')
    plt.yscale('log')
    
    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.spines['left'].set_color('none')
    ax.spines['bottom'].set_color('none')
    
    plt.grid(color = '#9999CC')
    plt.hist(pred_y_list[np.where(label_list == 0)], 
             bins=[n/1000 for n in range(35)], 
             label='Negative', 
             color='#598987')
    plt.hist(pred_y_list[np.where(label_list == 1)], 
             bins=[n/1000 for n in range(35)], 
             label='Positive', 
             color='#FFD000')
    plt.legend(loc='upper right')
    if save_path:
        plt.savefig("result/{}_{}_2.jpg".format(save_path, stat), dpi=1000, quality=100)
    plt.show()

def performance(y_train, pred_y_train, y_test, pred_y_test, save_path=False):   
    auc_train = roc_auc_score(y_train, pred_y_train)
    auc_test  = roc_auc_score(y_test, pred_y_test)
    
    thres1 = np.min(pred_y_train[np.where(y_train == 1)])
    print('Training => auc: {:.6f}'.format(auc_train))
    print("Threshold is set to {}".format(thres1))
    
    y_predict_bin = np.array(pred_y_train >= thres1, dtype=int)
    prec_train, recall_train, f1_train = evaluate(y_train, y_predict_bin)
    print('prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.format(prec_train, recall_train, f1_train))
    
    print ("Total Positve: {}".format(len(np.where(y_train == 1)[0])))
    num_cand = np.sum(pred_y_train >= thres1)
    print ("Total Candidate: {}".format(num_cand))
    plotting(pred_y_train, y_train, save_path)
    
#     pd.DataFrame([pred_y_train, y_train, training_announce.tolist()], 
#                  index=['score', 'label', 'annouce']).T.to_csv('result/Feat_XGB_Training_res_Part2.csv', index=None)
    
    ##### Testing #####
    ratio = 0.05
    thres2 = sorted(pred_y_test, reverse=True)[int(y_test.sum() + int(len(y_test)*ratio))]
    print("Recall of Testing = 1 : {}".format(thres2 > np.min(pred_y_test)))
    print('Testing => auc: {:.6f}'.format(auc_test))
    print("Threshold is set to {}".format(thres2))
    
    y_predict_bin = np.array(pred_y_test >= thres2, dtype=int)
    prec_test, recall_test, f1_test = evaluate(y_test, y_predict_bin)
    print('prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.format(prec_test, recall_test, f1_test))
    print("Total Positve: {}".format(np.sum(y_test)))
    print("Total Candidate: {}".format(np.sum(pred_y_test >= thres2)))
    print('----------------------------------------------')
    
    ## High Risk
    print("High Risk Positve: {}".format(np.sum(y_test[pred_y_test > thres1])))
    print("High Risk Candidate: {}".format(np.sum(pred_y_test > thres1)))
    print("High Risk Prec: {}".format(np.sum(y_test[np.where(pred_y_test > thres1)])/np.sum(pred_y_test > thres1)))
    print('----------------------------------------------')
    ## Medium Risk
    print("Medium Risk Positve: {}".format(np.sum(y_test[(pred_y_test <= thres1) & (pred_y_test > thres2)])))
    print("Medium Risk Candidate: {}".format(np.sum((pred_y_test <= thres1) & (pred_y_test > thres2))))
    print("Medium Risk Prec: {}".format(np.sum(y_test[np.where((pred_y_test <= thres1) & (pred_y_test > thres2))])/\
                                                             np.sum((pred_y_test <= thres1) & (pred_y_test > thres2))))
    plotting(pred_y_test, y_test, save_path, stat='Test')
#     pd.DataFrame([pred_y_test, y_test, testing_announce.tolist()], 
#                  index=['score', 'label', 'annouce']).T.to_csv('result/Feat_XGB_Testing_res_Part2.csv', index=None)

In [None]:
func(y_train, pred_y_train, y_test, pred_y_test, save_path='Feat_XGB')

##  7.2  Graph Embedding + XGBoost

In [None]:
train_data= [np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year, month), 
                     allow_pickle=True) 
             for year, month in training_date]

test_data= [np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year,  month), 
                    allow_pickle=True) 
            for year, month in testing_date]

X_train = np.concatenate([data['arr_0'] for data in train_data])
y_train = np.concatenate([data['arr_1'] for data in train_data])
training_announce = np.concatenate([data['arr_2'] for data in train_data])
training_FILTER = np.concatenate([data['arr_3'] for data in train_data])

X_test = np.concatenate([data['arr_0'] for data in test_data])
y_test = np.concatenate([data['arr_1'] for data in test_data])
testing_announce = np.concatenate([data['arr_2'] for data in test_data])
testing_FILTER = np.concatenate([data['arr_3'] for data in test_data])

In [None]:
X_train = X_train[:,0,:]
X_test = X_test[:,0,:]

In [None]:
### Announce Extract
X_train = X_train[training_announce == 1]
y_train = y_train[training_announce == 1]

X_test = X_test[testing_announce == 1]
y_test = y_test[testing_announce == 1]

##### Model initial & training 

In [None]:
clf = XGBClassifier(n_estimators=100,
                    learning_rate= 0.1,     
                    max_depth=5, 
                    subsample=1,
                    gamma=0, 
                    reg_lambda=1, 
                    max_delta_step=0,
                    colsample_bytree=1, 
                    min_child_weight=1, 
                    seed=1000)

clf.fit(X=X_train, y=y_train, eval_set=[(X_test, y_test)] , eval_metric='auc', verbose=True)

pred_y_train = clf.predict_proba(X_train)[:,1]
pred_y_test  = clf.predict_proba(X_test)[:,1]

In [None]:
func(y_train, pred_y_train, y_test, pred_y_test, save_path='FeatEmbed_XGB')

##  7.3  Graph Embedding + Sequential + XGBoost

In [None]:
train_data= [np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year, month), 
                     allow_pickle=True) 
             for year, month in training_date]

test_data= [np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(year,  month), 
                    allow_pickle=True) 
            for year, month in testing_date]

X_train = np.concatenate([data['arr_0'] for data in train_data])
y_train = np.concatenate([data['arr_1'] for data in train_data])
training_announce = np.concatenate([data['arr_2'] for data in train_data])
training_FILTER = np.concatenate([data['arr_3'] for data in train_data])

X_test = np.concatenate([data['arr_0'] for data in test_data])
y_test = np.concatenate([data['arr_1'] for data in test_data])
testing_announce = np.concatenate([data['arr_2'] for data in test_data])
testing_FILTER = np.concatenate([data['arr_3'] for data in test_data])

In [None]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

In [None]:
### Announce Extract
X_train = X_train[training_announce == 1]
y_train = y_train[training_announce == 1]

X_test = X_test[testing_announce == 1]
y_test = y_test[testing_announce == 1]

##### Model initial & training 

In [None]:
clf = XGBClassifier(n_estimators=100,
                    learning_rate= 0.1,     
                    max_depth=5, 
                    subsample=1,
                    gamma=0, 
                    reg_lambda=1, 
                    max_delta_step=0,
                    colsample_bytree=1, 
                    min_child_weight=1, 
                    seed=1000)

clf.fit(X=X_train, y=y_train, eval_set=[(X_test, y_test)] , eval_metric='auc', verbose=True)

In [None]:
pred_y_train = clf.predict_proba(X_train)[:,1]
pred_y_test  = clf.predict_proba(X_test)[:,1]

In [None]:
func(y_train, pred_y_train, y_test, pred_y_test, save_path='FeatEmbed-ts_XGB')

##  7.4   Sequential + 1D Conv
### Data loading

In [None]:
data = dict(map(lambda ym: (ym, 
                            np.load('../../user_data/CloudMile/data/data_{}_{}.npz'.format(*ym), allow_pickle=True)), 
                training_date + testing_date))

In [None]:
train_data = list(map(lambda ym: data[ym], training_date))
test_data = list(map(lambda ym: data[ym], testing_date))

X_train_ = np.concatenate([data['arr_0'] for data in train_data])
y_train_ = np.concatenate([data['arr_1'] for data in train_data])
training_announce = np.concatenate([data['arr_2'] for data in train_data])
training_FILTER = np.concatenate([data['arr_3'] for data in train_data])

X_test_ = np.concatenate([data['arr_0'] for data in test_data])
y_test_ = np.concatenate([data['arr_1'] for data in test_data])
testing_announce = np.concatenate([data['arr_2'] for data in test_data])
testing_FILTER = np.concatenate([data['arr_3'] for data in test_data])

### Extracting announced data

In [None]:
X_train = X_train_[training_announce == 1]
y_train = y_train_[training_announce == 1]

X_test = X_test_[testing_announce == 1]
y_test = y_test_[testing_announce == 1]

### Magical rescaling

In [None]:
X_train[:,:,0] = 0.1 * np.log10(1e4*X_train[:,:,0]**3 + 1e-10) + 1
X_train[:,:,2] = 0.1 * np.log10(1e4*X_train[:,:,2]**5 + 1e-10) + 1
X_train[:,:,3] = 0.1 * np.log10(1e4*X_train[:,:,3]**2 + 1e-10) + 1
X_train[:,:,6] = 0.1 * np.log10(1e0*X_train[:,:,6]**3 + 1e-10) + 1
X_train[:,:,7] = 0.1 * np.log10(1e8*X_train[:,:,7]**5 + 1e-10) + 1
X_train[:,:,8] = 0.1 * np.log10(1e8*X_train[:,:,8]**3 + 1e-10) + 1
X_train[:,:,9] = 0.1 * np.log10(1e8*X_train[:,:,9]**6 + 1e-10) + 1
X_train[:,:,10] = 0.1 * np.log10(1e8*X_train[:,:,10]**2 + 1e-10) + 1
X_train[:,:,12] = 0.1 * np.log10(1e9*X_train[:,:,12]**3 + 1e-10) + 1
X_train[:,:,13] = 0.1 * np.log10(1e9*X_train[:,:,13]**5 + 1e-10) + 1
X_train[:,:,14] = 0.1 * np.log10(1e9*X_train[:,:,14]**3.5 + 1e-10) + 1
X_train[:,:,15] = 0.1 * np.log10(1e9*X_train[:,:,15]**6 + 1e-10) + 1
X_train[:,:,16] = 0.1 * np.log10(1e15*X_train[:,:,16]**3 + 1e-10) + 1
X_train[:,:,18] = 0.1 * np.log10(1e15*X_train[:,:,18]**4 + 1e-10) + 1
X_train[:,:,20] = 0.1 * np.log10(1e10*X_train[:,:,20]**8 + 1e-10) + 1
X_train[:,:,21] = 0.1 * np.log10(1e8*X_train[:,:,21]**3 + 1e-10) + 1
X_train[:,:,22] = 0.1 * np.log10(1e20*X_train[:,:,22]**4 + 1e-10) + 1
X_train[:,:,23] = 0.1 * np.log10(1e20*X_train[:,:,23]**4 + 1e-10) + 1
X_train[:,:,24] = 0.1 * np.log10(1e10*X_train[:,:,24]**5 + 1e-10) + 1
X_train[:,:,26] = 0.1 * np.log10(1e10*X_train[:,:,26]**7 + 1e-10) + 1
X_train[:,:,27] = 0.1 * np.log10(1e10*X_train[:,:,27]**3 + 1e-10) + 1
X_train[:,:,29] = 0.1 * np.log10(1e10*X_train[:,:,29]**7 + 1e-10) + 1
X_train[:,:,30] = 0.1 * np.log10(1e10*X_train[:,:,30]**3 + 1e-10) + 1
X_train[:,:,32] = 0.1 * np.log10(1e10*X_train[:,:,32]**3 + 1e-10) + 1
X_train[:,:,33] = 0.1 * np.log10(1e10*X_train[:,:,33]**2.5 + 1e-10) + 1
X_train[:,:,34] = 0.1 * np.log10(1e10*X_train[:,:,34]**3 + 1e-10) + 1
X_train[:,:,35] = 0.1 * np.log10(1e20*X_train[:,:,35]**4 + 1e-10) + 1
X_train[:,:,36] = 0.1 * np.log10(1e18*X_train[:,:,36]**5 + 1e-10) + 1
X_train[:,:,37] = 0.1 * np.log10(1e20*X_train[:,:,37]**4 + 1e-10) + 1
X_train[:,:,38] = 0.1 * np.log10(1e20*X_train[:,:,38]**3 + 1e-10) + 1
X_train[:,:,39] = 0.1 * np.log10(1e20*X_train[:,:,39]**3 + 1e-10) + 1
X_train[:,:,40] = 0.1 * np.log10(1e20*X_train[:,:,40]**3 + 1e-10) + 1
X_train[:,:,41] = 0.1 * np.log10(1e20*X_train[:,:,41]**3 + 1e-10) + 1
X_train[:,:,42] = 0.1 * np.log10(1e20*X_train[:,:,42]**3 + 1e-10) + 1
X_train[:,:,43] = 0.1 * np.log10(1e20*X_train[:,:,43]**3 + 1e-10) + 1
X_train[:,:,44] = 0.1 * np.log10(1e20*X_train[:,:,44]**3 + 1e-10) + 1
X_train[:,:,45] = 0.1 * np.log10(1e20*X_train[:,:,45]**3 + 1e-10) + 1
X_train[:,:,46] = 0.1 * np.log10(1e20*X_train[:,:,46]**3 + 1e-10) + 1
X_train[:,:,47] = 0.1 * np.log10(1e20*X_train[:,:,47]**3 + 1e-10) + 1
X_train[:,:,51] = 0.1 * np.log10(1e20*X_train[:,:,51]**3 + 1e-10) + 1
X_train[:,:,52] = 0.1 * np.log10(1e20*X_train[:,:,52]**3 + 1e-10) + 1
X_train[:,:,53] = 0.1 * np.log10(1e20*X_train[:,:,53]**3 + 1e-10) + 1
X_train[:,:,54] = 0.1 * np.log10(1e20*X_train[:,:,54]**3 + 1e-10) + 1
X_train[:,:,57] = 0.1 * np.log10(1e20*X_train[:,:,57]**20 + 1e-10) + 1
X_train[:,:,58] = 0.1 * np.log10(1e20*X_train[:,:,58]**10 + 1e-10) + 1
X_train[:,:,59] = 0.1 * np.log10(1e20*X_train[:,:,59]**8 + 1e-10) + 1
X_train[:,:,60] = 0.1 * np.log10(1e20*X_train[:,:,60]**6 + 1e-10) + 1
X_train[:,:,61] = 0.1 * np.log10(1e20*X_train[:,:,61]**6 + 1e-10) + 1
X_train[:,:,62] = 0.1 * np.log10(1e20*X_train[:,:,62]**5 + 1e-10) + 1
X_train[:,:,63] = 0.1 * np.log10(1e20*X_train[:,:,63]**3 + 1e-10) + 1
X_train[:,:,64] = 0.1 * np.log10(1e20*X_train[:,:,64]**3 + 1e-10) + 1
X_train[:,:,65] = 0.1 * np.log10(1e20*X_train[:,:,65]**3 + 1e-10) + 1
X_train[:,:,66] = 0.1 * np.log10(1e20*X_train[:,:,66]**3 + 1e-10) + 1
X_train[:,:,67] = 0.1 * np.log10(1e20*X_train[:,:,67]**1.5 + 1e-10) + 1
X_train[:,:,68] = 0.1 * np.log10(1e20*X_train[:,:,68]**1.5 + 1e-10) + 1
X_train[:,:,69] = 0.1 * np.log10(1e20*X_train[:,:,69]**1.5 + 1e-10) + 1
X_train[:,:,70] = 0.1 * np.log10(1e20*X_train[:,:,70]**1.5 + 1e-10) + 1
X_train[:,:,71] = 0.1 * np.log10(1e20*X_train[:,:,71]**3 + 1e-10) + 1
X_train[:,:,72] = 0.1 * np.log10(1e20*X_train[:,:,72]**3 + 1e-10) + 1
X_train[:,:,73] = 0.1 * np.log10(1e20*X_train[:,:,73]**3 + 1e-10) + 1
X_train[:,:,74] = 0.1 * np.log10(1e20*X_train[:,:,74]**3 + 1e-10) + 1
X_train[:,:,75] = 0.1 * np.log10(1e20*X_train[:,:,75]**3 + 1e-10) + 1
X_train[:,:,76] = 0.1 * np.log10(1e20*X_train[:,:,76]**3 + 1e-10) + 1
X_train[:,:,77] = 0.1 * np.log10(1e15*X_train[:,:,77]**8 + 1e-10) + 1
X_train[:,:,78] = 0.1 * np.log10(1e15*X_train[:,:,78]**8 + 1e-10) + 1
X_train[:,:,79] = 0.1 * np.log10(1e20*X_train[:,:,79]**5 + 1e-10) + 1
X_train[:,:,80] = 0.1 * np.log10(1e20*X_train[:,:,80]**6 + 1e-10) + 1
X_train[:,:,81] = 0.1 * np.log10(1e20*X_train[:,:,81]**8 + 1e-10) + 1
X_train[:,:,82] = 0.1 * np.log10(1e20*X_train[:,:,82]**10 + 1e-10) + 1
X_train[:,:,83] = 0.1 * np.log10(1e20*X_train[:,:,83]**9.5 + 1e-10) + 1
X_train[:,:,84] = 0.1 * np.log10(1e20*X_train[:,:,84]**9.5 + 1e-10) + 1
X_train[:,:,85] = 0.1 * np.log10(1e20*X_train[:,:,85]**3 + 1e-10) + 1
X_train[:,:,86] = 0.1 * np.log10(1e20*X_train[:,:,86]**13 + 1e-10) + 1
X_train[:,:,87] = 0.1 * np.log10(1e20*X_train[:,:,87]**10 + 1e-10) + 1
X_train[:,:,88] = 0.1 * np.log10(1e20*X_train[:,:,88]**9 + 1e-10) + 1
X_train[:,:,89] = 0.1 * np.log10(1e20*X_train[:,:,89]**8 + 1e-10) + 1
X_train[:,:,91] = X_train[:,:,91] ** 5
X_train[:,:,93] = X_train[:,:,93] ** 2
X_train[:,:,94] = X_train[:,:,94] ** 2
X_train[:,:,96] = 0.1 * np.log10(1e20*X_train[:,:,96]**4 + 1e-10) + 1
X_train[:,:,97] = 0.1 * np.log10(1e20*X_train[:,:,97]**9 + 1e-10) + 1
X_train[:,:,98] = 0.1 * np.log10(1e20*X_train[:,:,98]**8 + 1e-10) + 1
X_train[:,:,99] = 0.1 * np.log10(1e20*X_train[:,:,99]**7 + 1e-10) + 1
X_train[:,:,100] = 0.1 * np.log10(1e20*X_train[:,:,100]**7 + 1e-10) + 1
X_train[:,:,101] = 0.1 * np.log10(1e20*X_train[:,:,101]**5 + 1e-10) + 1
X_train[:,:,102] = 0.1 * np.log10(1e20*X_train[:,:,102]**4 + 1e-10) + 1
X_train[:,:,103] = 0.1 * np.log10(1e20*X_train[:,:,103]**4 + 1e-10) + 1
X_train[:,:,104] = 0.1 * np.log10(1e20*X_train[:,:,104]**10 + 1e-10) + 1
X_train[:,:,106] = 0.1 * np.log10(1e20*X_train[:,:,106]**4 + 1e-10) + 1
X_train[:,:,107] = 0.1 * np.log10(1e20*X_train[:,:,107]**8 + 1e-10) + 1
X_train[:,:,108] = 0.1 * np.log10(1e20*X_train[:,:,108]**7 + 1e-10) + 1
X_train[:,:,109] = 0.1 * np.log10(1e20*X_train[:,:,109]**7 + 1e-10) + 1
X_train[:,:,110] = 0.1 * np.log10(1e20*X_train[:,:,110]**7 + 1e-10) + 1
X_train[:,:,111] = 0.1 * np.log10(1e20*X_train[:,:,111]**6 + 1e-10) + 1
X_train[:,:,112] = 0.1 * np.log10(1e20*X_train[:,:,112]**5 + 1e-10) + 1
X_train[:,:,113] = 0.1 * np.log10(1e20*X_train[:,:,113]**5 + 1e-10) + 1
X_train[:,:,114] = 0.1 * np.log10(1e20*X_train[:,:,114]**8 + 1e-10) + 1
X_train[:,:,115] = 0.1 * np.log10(1e20*X_train[:,:,115]**7 + 1e-10) + 1
X_train[:,:,116] = 0.1 * np.log10(1e20*X_train[:,:,116]**6 + 1e-10) + 1
X_train[:,:,117] = 0.1 * np.log10(1e20*X_train[:,:,117]**6 + 1e-10) + 1
X_train[:,:,118] = 0.1 * np.log10(1e20*X_train[:,:,118]**5.5 + 1e-10) + 1
X_train[:,:,119] = 0.1 * np.log10(1e20*X_train[:,:,119]**4 + 1e-10) + 1
X_train[:,:,124] = 0.1 * np.log10(1e20*X_train[:,:,124]**6 + 1e-10) + 1
X_train[:,:,125] = 0.1 * np.log10(1e20*X_train[:,:,125]**7.5 + 1e-10) + 1
X_train[:,:,126] = 0.1 * np.log10(1e20*X_train[:,:,126]**11 + 1e-10) + 1
X_train[:,:,127] = 0.1 * np.log10(1e20*X_train[:,:,127]**8 + 1e-10) + 1
X_train[:,:,128] = 0.1 * np.log10(1e20*X_train[:,:,128]**8 + 1e-10) + 1
X_train[:,:,129] = 0.1 * np.log10(1e20*X_train[:,:,129]**8 + 1e-10) + 1
X_train[:,:,130] = 0.1 * np.log10(1e20*X_train[:,:,130]**8 + 1e-10) + 1
X_train[:,:,132] = 0.1 * np.log10(1e12*X_train[:,:,132]**12 + 1e-10) + 1
X_train[:,:,133] = 0.1 * np.log10(1e20*X_train[:,:,133]**5.5 + 1e-10) + 1
X_train[:,:,140] = 0.1 * np.log10(1e20*X_train[:,:,140]**4 + 1e-10) + 1
X_train[:,:,141] = 0.1 * np.log10(1e20*X_train[:,:,141]**4 + 1e-10) + 1
X_train[:,:,142] = 0.1 * np.log10(1e20*X_train[:,:,142]**4 + 1e-10) + 1
X_train[:,:,143] = 0.1 * np.log10(1e20*X_train[:,:,143]**4 + 1e-10) + 1
X_train[:,:,144] = 0.1 * np.log10(1e20*X_train[:,:,144]**10 + 1e-10) + 1
X_train[:,:,146] = 0.1 * np.log10(1e20*X_train[:,:,146]**10 + 1e-10) + 1
X_train[:,:,147] = 0.1 * np.log10(1e20*X_train[:,:,147]**9 + 1e-10) + 1
X_train[:,:,148] = 0.1 * np.log10(1e20*X_train[:,:,148]**8.5 + 1e-10) + 1
X_train[:,:,149] = 0.1 * np.log10(1e20*X_train[:,:,149]**9 + 1e-10) + 1
X_train[:,:,150] = 0.1 * np.log10(1e20*X_train[:,:,150]**7.5 + 1e-10) + 1
X_train[:,:,151] = 0.1 * np.log10(1e20*X_train[:,:,151]**12 + 1e-10) + 1
X_train[:,:,152] = 0.1 * np.log10(1e20*X_train[:,:,152]**20 + 1e-10) + 1
X_train[:,:,153] = 0.1 * np.log10(1e20*X_train[:,:,153]**11 + 1e-10) + 1
X_train[:,:,154] = 0.1 * np.log10(1e20*X_train[:,:,154]**20 + 1e-10) + 1
X_train[:,:,155] = 0.1 * np.log10(1e20*X_train[:,:,155]**16 + 1e-10) + 1
X_train[:,:,156] = 0.1 * np.log10(1e20*X_train[:,:,156]**10 + 1e-10) + 1
X_train[:,:,157] = 0.1 * np.log10(1e20*X_train[:,:,157]**11 + 1e-10) + 1
X_train[:,:,158] = 0.1 * np.log10(1e20*X_train[:,:,158]**9 + 1e-10) + 1
X_train[:,:,159] = 0.1 * np.log10(1e20*X_train[:,:,159]**3.5 + 1e-10) + 1
X_train[:,:,160] = 0.1 * np.log10(1e20*X_train[:,:,160]**11 + 1e-10) + 1
X_train[:,:,166] = 0.1 * np.log10(1e20*X_train[:,:,166]**11 + 1e-10) + 1
X_train[:,:,168] = 0.1 * np.log10(1e20*X_train[:,:,168]**14 + 1e-10) + 1
X_train[:,:,173] = 0.1 * np.log10(1e30*X_train[:,:,173]**7 + 1e-10) + 1
X_train[:,:,174] = 0.1 * np.log10(1e30*X_train[:,:,174]**4 + 1e-10) + 1
X_train[:,:,178] = 0.1 * np.log10(1e30*X_train[:,:,178]**4 + 1e-10) + 1
X_train[:,:,179] = 0.1 * np.log10(1e30*X_train[:,:,179]**4 + 1e-10) + 1
X_train[:,:,181] = 0.1 * np.log10(1e30*X_train[:,:,181]**7 + 1e-10) + 1
X_train[:,:,182] = 0.1 * np.log10(1e30*X_train[:,:,182]**2 + 1e-10) + 1
X_train[:,:,183] = 0.1 * np.log10(1e30*X_train[:,:,183]**2 + 1e-10) + 1
X_train[:,:,185] = 0.1 * np.log10(1e30*X_train[:,:,185]**2 + 1e-10) + 1
X_train[:,:,186] = 0.1 * np.log10(1e30*X_train[:,:,186]**2 + 1e-10) + 1
X_train[:,:,187] = 0.1 * np.log10(1e30*X_train[:,:,187]**2 + 1e-10) + 1
X_train[:,:,190] = 0.1 * np.log10(1e10*X_train[:,:,190]**4 + 1e-10) + 1
X_train[:,:,191] = 0.1 * np.log10(1e20*X_train[:,:,191]**6 + 1e-10) + 1
X_train[:,:,192] = 0.1 * np.log10(1e20*X_train[:,:,192]**4 + 1e-10) + 1
X_train[:,:,195] = 0.1 * np.log10(1e20*X_train[:,:,195]**4 + 1e-10) + 1
X_train[:,:,196] = 0.1 * np.log10(1e20*X_train[:,:,196]**6 + 1e-10) + 1
X_train[:,:,197] = 0.1 * np.log10(1e20*X_train[:,:,197]**4 + 1e-10) + 1
X_train[:,:,198] = 0.1 * np.log10(1e20*X_train[:,:,198]**6 + 1e-10) + 1
X_train[:,:,200] = 0.1 * np.log10(1e20*X_train[:,:,200]**6 + 1e-10) + 1
X_train[:,:,201] = 0.1 * np.log10(1e20*X_train[:,:,201]**5 + 1e-10) + 1
X_train[:,:,202] = 0.1 * np.log10(1e20*X_train[:,:,202]**5 + 1e-10) + 1
X_train[:,:,203] = 0.1 * np.log10(1e20*X_train[:,:,203]**5 + 1e-10) + 1
X_train[:,:,204] = 0.1 * np.log10(1e30*X_train[:,:,204]**3 + 1e-10) + 1
X_train[:,:,205] = 0.1 * np.log10(1e30*X_train[:,:,205]**3 + 1e-10) + 1
X_train[:,:,206] = 0.1 * np.log10(1e20*X_train[:,:,206]**7 + 1e-10) + 1
X_train[:,:,207] = 0.1 * np.log10(1e30*X_train[:,:,207]**2 + 1e-10) + 1
X_train[:,:,208] = 0.1 * np.log10(1e30*X_train[:,:,208]**2 + 1e-10) + 1
X_train[:,:,209] = 0.1 * np.log10(1e30*X_train[:,:,209]**2 + 1e-10) + 1
X_train[:,:,213] = 0.1 * np.log10(1e30*X_train[:,:,213]**2 + 1e-10) + 1
X_train[:,:,214] = 0.1 * np.log10(1e7*X_train[:,:,214]**2 + 1e-10) + 1
X_train[:,:,215] = 0.1 * np.log10(1e15*X_train[:,:,215]**2 + 1e-10) + 1
X_train[:,:,216] = 0.1 * np.log10(1e15*X_train[:,:,216]**2 + 1e-10) + 1
X_train[:,:,217] = 0.1 * np.log10(1e15*X_train[:,:,217]**2 + 1e-10) + 1
X_train[:,:,218] = 0.1 * np.log10(1e15*X_train[:,:,218]**2 + 1e-10) + 1
X_train[:,:,220] = 0.1 * np.log10(1e15*X_train[:,:,220]**2 + 1e-10) + 1

In [None]:
## Get Feat
X_train = np.dstack([X_train[:,:,:314], X_train[:,:,-14:]])
X_test = np.dstack([X_test[:,:,:314], X_test[:,:,-14:]])

### Converting Numpy tensor into PyTorch tensor

In [None]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

train_data = []
for i in range(len(X_train)):
    train_data.append((X_train[i], y_train[i]))
    
test_data = []
for i in range(len(X_test)):
    test_data.append((X_test[i], y_test[i]))

train_dataloader = DataLoader(train_data, shuffle=True, batch_size=train_batch_size)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=test_batch_size)

## Model Initialization

### Classifier / Focal Loss initialization
Notice that the dimensions per layer are recommended to set to a smaller value due to the risk of overfitting. 

In [None]:
classifier = ConvNet(fc_dims=[128, 64], in_dim=X_train.shape[2], out_dim=2).cuda()

focal_loss = FocalLoss2(alpha, gamma_pos, gamma_neg)
if optim_type == "adam":
    optim_clsfr = optim.Adam(filter(lambda p: p.requires_grad, classifier.parameters()), 
                             lr=learn_rate)
else:
    optim_clsfr = RangerLars(filter(lambda p: p.requires_grad, classifier.parameters()),
                             lr=learn_rate)

### VAT Loss initialization

In [None]:
vat_loss2 = VATLoss2(xi=vat_xi, eps_pos=vat_eps_pos, eps_neg=vat_eps_neg, ip=vat_ip)

## Training / Testing

In [None]:
print('Parameter Setting ----------------------------------------------------------------------')
print('Model = VAT + Conv1D')
print('conv1d use activation = {}'.format(True))
print('graph_emdeding = Heter_AutoEncoder')
print('alpha = {}'.format(alpha))
print('gamma_pos = {}'.format(gamma_pos))
print('gamma_neg = {}'.format(gamma_neg))
print('learn_rate = {}'.format(learn_rate))
print('train_batch_size = {}'.format(train_batch_size))
print('test_batch_size = {}'.format(test_batch_size))
print('max_epochs = {}'.format(max_epochs))
print('vat_xi = {}'.format(vat_xi))
print('vat_eps_pos = {}'.format(vat_eps_pos))
print('vat_eps_neg = {}'.format(vat_eps_neg))
print('vat_ip = {}'.format(vat_ip))
print('optim_type = {}'.format(optim_type))
print('weight_decay = {}'.format(weight_decay))
print('lambda_l1 = {}'.format(lambda_l1))
print('\n')

train_history_loss = []
train_history_auc = []
max_f1score = 0
for epoch in range(max_epochs):
    print('Epoch {} -------------------------------------------------------------------------'.format(epoch))
    
    #
    # Training
    # ------------------------------------------------------------------------------------
    classifier.train()
    label_train, pred_y_train, clsf_loss_train, clsf_loss_pos_train, \
        clsf_loss_neg_train, vat_loss_train = train(epoch, train_dataloader, 
                                                    clip_grad_norm=grad_clip, lambda_l1=lambda_l1)
    
    auc_train = roc_auc_score(label_train, pred_y_train)
    train_history_loss.append(clsf_loss_train)
    train_history_auc.append(auc_train)
    print('    Training => auc: {:.6f}, clsf_pos: {}, clsf_neg: {}, vat_loss: {}'.
          format(auc_train, clsf_loss_pos_train, clsf_loss_neg_train, vat_loss_train))
    thres = np.min(pred_y_train[label_train==1]) - 1e-6
    print("                Threshold is set to {}".format(thres))
    print("                Min. Probailities on train is {}".format(np.min(pred_y_train)))
    
    y_predict_bin = np.array(pred_y_train > thres, dtype=int)
    prec_train, recall_train, f1_train = evaluate(label_train, y_predict_bin)
    print('                prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.
          format(prec_train, recall_train, f1_train))
    
    if epoch % 1 == 0:
        #
        # Testing
        # ------------------------------------------------------------------------------------        
        with torch.no_grad():
            classifier.eval()
            label_test, pred_y_test, clsf_loss_test, clsf_loss_pos_test, clsf_loss_neg_test = infer(test_dataloader)    
        
        auc = roc_auc_score(label_test, pred_y_test)
        
        print("            Min. Probailities on test set with label 1: {}".format(np.min(pred_y_test[label_test==1])))
        print("            Min. Probailities on test set             : {}".format(np.min(pred_y_test)))
        
        y_predict_bin = np.array(pred_y_test > thres, dtype=int)
        prec, recall_bytrain, f1_bytrain = evaluate(label_test, y_predict_bin)
        print('    Testing by train ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall_bytrain, f1_bytrain, clsf_loss_test))
        
        y_predict_bin = np.array(pred_y_test >= np.min(pred_y_test[label_test==1]), dtype=int)
        prec, recall_bytest, f1_bytest = evaluate(label_test, y_predict_bin)
        print('    Testing by test ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall_bytest, f1_bytest, clsf_loss_test))
        
        if clsf_loss_pos_train < 0.005 and f1_bytrain > max_f1score and recall_bytrain == 1:
            max_f1score = f1_bytrain
            torch.save({'epoch': epoch,
                        'model_state_dict': classifier.state_dict(),
                        'optimizer_state_dict': optim_clsfr.state_dict(),
                        'loss': focal_loss,
                       }, 
                       '../../user_data/CloudMile/data/saved_models/' + \
                       'Feat-ts-Conv1Dsmall_v2_eps{}{}_focal{}{}_{}_lr{}_epoch{}_Part2_{}'.
                       format(int(-math.log10(vat_eps_pos)), 
                              int(-math.log10(vat_eps_neg)), 
                              gamma_pos, gamma_neg, 
                              optim_type, int(-math.log10(learn_rate)), max_epochs, f1_bytrain))


In [None]:
PATH = '{model path}'
performance(PATH, 'Feat-Golden-LastSoftmax')

##  7.5   Proposed Model performance

In [None]:
X_train = X_train_[training_announce == 1]
y_train = y_train_[training_announce == 1]

X_test = X_test_[testing_announce == 1]
y_test = y_test_[testing_announce == 1]

### Magical rescaling

In [None]:
X_train[:,:,0] = 0.1 * np.log10(1e4*X_train[:,:,0]**3 + 1e-10) + 1
X_train[:,:,2] = 0.1 * np.log10(1e4*X_train[:,:,2]**5 + 1e-10) + 1
X_train[:,:,3] = 0.1 * np.log10(1e4*X_train[:,:,3]**2 + 1e-10) + 1
X_train[:,:,6] = 0.1 * np.log10(1e0*X_train[:,:,6]**3 + 1e-10) + 1
X_train[:,:,7] = 0.1 * np.log10(1e8*X_train[:,:,7]**5 + 1e-10) + 1
X_train[:,:,8] = 0.1 * np.log10(1e8*X_train[:,:,8]**3 + 1e-10) + 1
X_train[:,:,9] = 0.1 * np.log10(1e8*X_train[:,:,9]**6 + 1e-10) + 1
X_train[:,:,10] = 0.1 * np.log10(1e8*X_train[:,:,10]**2 + 1e-10) + 1
X_train[:,:,12] = 0.1 * np.log10(1e9*X_train[:,:,12]**3 + 1e-10) + 1
X_train[:,:,13] = 0.1 * np.log10(1e9*X_train[:,:,13]**5 + 1e-10) + 1
X_train[:,:,14] = 0.1 * np.log10(1e9*X_train[:,:,14]**3.5 + 1e-10) + 1
X_train[:,:,15] = 0.1 * np.log10(1e9*X_train[:,:,15]**6 + 1e-10) + 1
X_train[:,:,16] = 0.1 * np.log10(1e15*X_train[:,:,16]**3 + 1e-10) + 1
X_train[:,:,18] = 0.1 * np.log10(1e15*X_train[:,:,18]**4 + 1e-10) + 1
X_train[:,:,20] = 0.1 * np.log10(1e10*X_train[:,:,20]**8 + 1e-10) + 1
X_train[:,:,21] = 0.1 * np.log10(1e8*X_train[:,:,21]**3 + 1e-10) + 1
X_train[:,:,22] = 0.1 * np.log10(1e20*X_train[:,:,22]**4 + 1e-10) + 1
X_train[:,:,23] = 0.1 * np.log10(1e20*X_train[:,:,23]**4 + 1e-10) + 1
X_train[:,:,24] = 0.1 * np.log10(1e10*X_train[:,:,24]**5 + 1e-10) + 1
X_train[:,:,26] = 0.1 * np.log10(1e10*X_train[:,:,26]**7 + 1e-10) + 1
X_train[:,:,27] = 0.1 * np.log10(1e10*X_train[:,:,27]**3 + 1e-10) + 1
X_train[:,:,29] = 0.1 * np.log10(1e10*X_train[:,:,29]**7 + 1e-10) + 1
X_train[:,:,30] = 0.1 * np.log10(1e10*X_train[:,:,30]**3 + 1e-10) + 1
X_train[:,:,32] = 0.1 * np.log10(1e10*X_train[:,:,32]**3 + 1e-10) + 1
X_train[:,:,33] = 0.1 * np.log10(1e10*X_train[:,:,33]**2.5 + 1e-10) + 1
X_train[:,:,34] = 0.1 * np.log10(1e10*X_train[:,:,34]**3 + 1e-10) + 1
X_train[:,:,35] = 0.1 * np.log10(1e20*X_train[:,:,35]**4 + 1e-10) + 1
X_train[:,:,36] = 0.1 * np.log10(1e18*X_train[:,:,36]**5 + 1e-10) + 1
X_train[:,:,37] = 0.1 * np.log10(1e20*X_train[:,:,37]**4 + 1e-10) + 1
X_train[:,:,38] = 0.1 * np.log10(1e20*X_train[:,:,38]**3 + 1e-10) + 1
X_train[:,:,39] = 0.1 * np.log10(1e20*X_train[:,:,39]**3 + 1e-10) + 1
X_train[:,:,40] = 0.1 * np.log10(1e20*X_train[:,:,40]**3 + 1e-10) + 1
X_train[:,:,41] = 0.1 * np.log10(1e20*X_train[:,:,41]**3 + 1e-10) + 1
X_train[:,:,42] = 0.1 * np.log10(1e20*X_train[:,:,42]**3 + 1e-10) + 1
X_train[:,:,43] = 0.1 * np.log10(1e20*X_train[:,:,43]**3 + 1e-10) + 1
X_train[:,:,44] = 0.1 * np.log10(1e20*X_train[:,:,44]**3 + 1e-10) + 1
X_train[:,:,45] = 0.1 * np.log10(1e20*X_train[:,:,45]**3 + 1e-10) + 1
X_train[:,:,46] = 0.1 * np.log10(1e20*X_train[:,:,46]**3 + 1e-10) + 1
X_train[:,:,47] = 0.1 * np.log10(1e20*X_train[:,:,47]**3 + 1e-10) + 1
X_train[:,:,51] = 0.1 * np.log10(1e20*X_train[:,:,51]**3 + 1e-10) + 1
X_train[:,:,52] = 0.1 * np.log10(1e20*X_train[:,:,52]**3 + 1e-10) + 1
X_train[:,:,53] = 0.1 * np.log10(1e20*X_train[:,:,53]**3 + 1e-10) + 1
X_train[:,:,54] = 0.1 * np.log10(1e20*X_train[:,:,54]**3 + 1e-10) + 1
X_train[:,:,57] = 0.1 * np.log10(1e20*X_train[:,:,57]**20 + 1e-10) + 1
X_train[:,:,58] = 0.1 * np.log10(1e20*X_train[:,:,58]**10 + 1e-10) + 1
X_train[:,:,59] = 0.1 * np.log10(1e20*X_train[:,:,59]**8 + 1e-10) + 1
X_train[:,:,60] = 0.1 * np.log10(1e20*X_train[:,:,60]**6 + 1e-10) + 1
X_train[:,:,61] = 0.1 * np.log10(1e20*X_train[:,:,61]**6 + 1e-10) + 1
X_train[:,:,62] = 0.1 * np.log10(1e20*X_train[:,:,62]**5 + 1e-10) + 1
X_train[:,:,63] = 0.1 * np.log10(1e20*X_train[:,:,63]**3 + 1e-10) + 1
X_train[:,:,64] = 0.1 * np.log10(1e20*X_train[:,:,64]**3 + 1e-10) + 1
X_train[:,:,65] = 0.1 * np.log10(1e20*X_train[:,:,65]**3 + 1e-10) + 1
X_train[:,:,66] = 0.1 * np.log10(1e20*X_train[:,:,66]**3 + 1e-10) + 1
X_train[:,:,67] = 0.1 * np.log10(1e20*X_train[:,:,67]**1.5 + 1e-10) + 1
X_train[:,:,68] = 0.1 * np.log10(1e20*X_train[:,:,68]**1.5 + 1e-10) + 1
X_train[:,:,69] = 0.1 * np.log10(1e20*X_train[:,:,69]**1.5 + 1e-10) + 1
X_train[:,:,70] = 0.1 * np.log10(1e20*X_train[:,:,70]**1.5 + 1e-10) + 1
X_train[:,:,71] = 0.1 * np.log10(1e20*X_train[:,:,71]**3 + 1e-10) + 1
X_train[:,:,72] = 0.1 * np.log10(1e20*X_train[:,:,72]**3 + 1e-10) + 1
X_train[:,:,73] = 0.1 * np.log10(1e20*X_train[:,:,73]**3 + 1e-10) + 1
X_train[:,:,74] = 0.1 * np.log10(1e20*X_train[:,:,74]**3 + 1e-10) + 1
X_train[:,:,75] = 0.1 * np.log10(1e20*X_train[:,:,75]**3 + 1e-10) + 1
X_train[:,:,76] = 0.1 * np.log10(1e20*X_train[:,:,76]**3 + 1e-10) + 1
X_train[:,:,77] = 0.1 * np.log10(1e15*X_train[:,:,77]**8 + 1e-10) + 1
X_train[:,:,78] = 0.1 * np.log10(1e15*X_train[:,:,78]**8 + 1e-10) + 1
X_train[:,:,79] = 0.1 * np.log10(1e20*X_train[:,:,79]**5 + 1e-10) + 1
X_train[:,:,80] = 0.1 * np.log10(1e20*X_train[:,:,80]**6 + 1e-10) + 1
X_train[:,:,81] = 0.1 * np.log10(1e20*X_train[:,:,81]**8 + 1e-10) + 1
X_train[:,:,82] = 0.1 * np.log10(1e20*X_train[:,:,82]**10 + 1e-10) + 1
X_train[:,:,83] = 0.1 * np.log10(1e20*X_train[:,:,83]**9.5 + 1e-10) + 1
X_train[:,:,84] = 0.1 * np.log10(1e20*X_train[:,:,84]**9.5 + 1e-10) + 1
X_train[:,:,85] = 0.1 * np.log10(1e20*X_train[:,:,85]**3 + 1e-10) + 1
X_train[:,:,86] = 0.1 * np.log10(1e20*X_train[:,:,86]**13 + 1e-10) + 1
X_train[:,:,87] = 0.1 * np.log10(1e20*X_train[:,:,87]**10 + 1e-10) + 1
X_train[:,:,88] = 0.1 * np.log10(1e20*X_train[:,:,88]**9 + 1e-10) + 1
X_train[:,:,89] = 0.1 * np.log10(1e20*X_train[:,:,89]**8 + 1e-10) + 1
X_train[:,:,91] = X_train[:,:,91] ** 5
X_train[:,:,93] = X_train[:,:,93] ** 2
X_train[:,:,94] = X_train[:,:,94] ** 2
X_train[:,:,96] = 0.1 * np.log10(1e20*X_train[:,:,96]**4 + 1e-10) + 1
X_train[:,:,97] = 0.1 * np.log10(1e20*X_train[:,:,97]**9 + 1e-10) + 1
X_train[:,:,98] = 0.1 * np.log10(1e20*X_train[:,:,98]**8 + 1e-10) + 1
X_train[:,:,99] = 0.1 * np.log10(1e20*X_train[:,:,99]**7 + 1e-10) + 1
X_train[:,:,100] = 0.1 * np.log10(1e20*X_train[:,:,100]**7 + 1e-10) + 1
X_train[:,:,101] = 0.1 * np.log10(1e20*X_train[:,:,101]**5 + 1e-10) + 1
X_train[:,:,102] = 0.1 * np.log10(1e20*X_train[:,:,102]**4 + 1e-10) + 1
X_train[:,:,103] = 0.1 * np.log10(1e20*X_train[:,:,103]**4 + 1e-10) + 1
X_train[:,:,104] = 0.1 * np.log10(1e20*X_train[:,:,104]**10 + 1e-10) + 1
X_train[:,:,106] = 0.1 * np.log10(1e20*X_train[:,:,106]**4 + 1e-10) + 1
X_train[:,:,107] = 0.1 * np.log10(1e20*X_train[:,:,107]**8 + 1e-10) + 1
X_train[:,:,108] = 0.1 * np.log10(1e20*X_train[:,:,108]**7 + 1e-10) + 1
X_train[:,:,109] = 0.1 * np.log10(1e20*X_train[:,:,109]**7 + 1e-10) + 1
X_train[:,:,110] = 0.1 * np.log10(1e20*X_train[:,:,110]**7 + 1e-10) + 1
X_train[:,:,111] = 0.1 * np.log10(1e20*X_train[:,:,111]**6 + 1e-10) + 1
X_train[:,:,112] = 0.1 * np.log10(1e20*X_train[:,:,112]**5 + 1e-10) + 1
X_train[:,:,113] = 0.1 * np.log10(1e20*X_train[:,:,113]**5 + 1e-10) + 1
X_train[:,:,114] = 0.1 * np.log10(1e20*X_train[:,:,114]**8 + 1e-10) + 1
X_train[:,:,115] = 0.1 * np.log10(1e20*X_train[:,:,115]**7 + 1e-10) + 1
X_train[:,:,116] = 0.1 * np.log10(1e20*X_train[:,:,116]**6 + 1e-10) + 1
X_train[:,:,117] = 0.1 * np.log10(1e20*X_train[:,:,117]**6 + 1e-10) + 1
X_train[:,:,118] = 0.1 * np.log10(1e20*X_train[:,:,118]**5.5 + 1e-10) + 1
X_train[:,:,119] = 0.1 * np.log10(1e20*X_train[:,:,119]**4 + 1e-10) + 1
X_train[:,:,124] = 0.1 * np.log10(1e20*X_train[:,:,124]**6 + 1e-10) + 1
X_train[:,:,125] = 0.1 * np.log10(1e20*X_train[:,:,125]**7.5 + 1e-10) + 1
X_train[:,:,126] = 0.1 * np.log10(1e20*X_train[:,:,126]**11 + 1e-10) + 1
X_train[:,:,127] = 0.1 * np.log10(1e20*X_train[:,:,127]**8 + 1e-10) + 1
X_train[:,:,128] = 0.1 * np.log10(1e20*X_train[:,:,128]**8 + 1e-10) + 1
X_train[:,:,129] = 0.1 * np.log10(1e20*X_train[:,:,129]**8 + 1e-10) + 1
X_train[:,:,130] = 0.1 * np.log10(1e20*X_train[:,:,130]**8 + 1e-10) + 1
X_train[:,:,132] = 0.1 * np.log10(1e12*X_train[:,:,132]**12 + 1e-10) + 1
X_train[:,:,133] = 0.1 * np.log10(1e20*X_train[:,:,133]**5.5 + 1e-10) + 1
X_train[:,:,140] = 0.1 * np.log10(1e20*X_train[:,:,140]**4 + 1e-10) + 1
X_train[:,:,141] = 0.1 * np.log10(1e20*X_train[:,:,141]**4 + 1e-10) + 1
X_train[:,:,142] = 0.1 * np.log10(1e20*X_train[:,:,142]**4 + 1e-10) + 1
X_train[:,:,143] = 0.1 * np.log10(1e20*X_train[:,:,143]**4 + 1e-10) + 1
X_train[:,:,144] = 0.1 * np.log10(1e20*X_train[:,:,144]**10 + 1e-10) + 1
X_train[:,:,146] = 0.1 * np.log10(1e20*X_train[:,:,146]**10 + 1e-10) + 1
X_train[:,:,147] = 0.1 * np.log10(1e20*X_train[:,:,147]**9 + 1e-10) + 1
X_train[:,:,148] = 0.1 * np.log10(1e20*X_train[:,:,148]**8.5 + 1e-10) + 1
X_train[:,:,149] = 0.1 * np.log10(1e20*X_train[:,:,149]**9 + 1e-10) + 1
X_train[:,:,150] = 0.1 * np.log10(1e20*X_train[:,:,150]**7.5 + 1e-10) + 1
X_train[:,:,151] = 0.1 * np.log10(1e20*X_train[:,:,151]**12 + 1e-10) + 1
X_train[:,:,152] = 0.1 * np.log10(1e20*X_train[:,:,152]**20 + 1e-10) + 1
X_train[:,:,153] = 0.1 * np.log10(1e20*X_train[:,:,153]**11 + 1e-10) + 1
X_train[:,:,154] = 0.1 * np.log10(1e20*X_train[:,:,154]**20 + 1e-10) + 1
X_train[:,:,155] = 0.1 * np.log10(1e20*X_train[:,:,155]**16 + 1e-10) + 1
X_train[:,:,156] = 0.1 * np.log10(1e20*X_train[:,:,156]**10 + 1e-10) + 1
X_train[:,:,157] = 0.1 * np.log10(1e20*X_train[:,:,157]**11 + 1e-10) + 1
X_train[:,:,158] = 0.1 * np.log10(1e20*X_train[:,:,158]**9 + 1e-10) + 1
X_train[:,:,159] = 0.1 * np.log10(1e20*X_train[:,:,159]**3.5 + 1e-10) + 1
X_train[:,:,160] = 0.1 * np.log10(1e20*X_train[:,:,160]**11 + 1e-10) + 1
X_train[:,:,166] = 0.1 * np.log10(1e20*X_train[:,:,166]**11 + 1e-10) + 1
X_train[:,:,168] = 0.1 * np.log10(1e20*X_train[:,:,168]**14 + 1e-10) + 1
X_train[:,:,173] = 0.1 * np.log10(1e30*X_train[:,:,173]**7 + 1e-10) + 1
X_train[:,:,174] = 0.1 * np.log10(1e30*X_train[:,:,174]**4 + 1e-10) + 1
X_train[:,:,178] = 0.1 * np.log10(1e30*X_train[:,:,178]**4 + 1e-10) + 1
X_train[:,:,179] = 0.1 * np.log10(1e30*X_train[:,:,179]**4 + 1e-10) + 1
X_train[:,:,181] = 0.1 * np.log10(1e30*X_train[:,:,181]**7 + 1e-10) + 1
X_train[:,:,182] = 0.1 * np.log10(1e30*X_train[:,:,182]**2 + 1e-10) + 1
X_train[:,:,183] = 0.1 * np.log10(1e30*X_train[:,:,183]**2 + 1e-10) + 1
X_train[:,:,185] = 0.1 * np.log10(1e30*X_train[:,:,185]**2 + 1e-10) + 1
X_train[:,:,186] = 0.1 * np.log10(1e30*X_train[:,:,186]**2 + 1e-10) + 1
X_train[:,:,187] = 0.1 * np.log10(1e30*X_train[:,:,187]**2 + 1e-10) + 1
X_train[:,:,190] = 0.1 * np.log10(1e10*X_train[:,:,190]**4 + 1e-10) + 1
X_train[:,:,191] = 0.1 * np.log10(1e20*X_train[:,:,191]**6 + 1e-10) + 1
X_train[:,:,192] = 0.1 * np.log10(1e20*X_train[:,:,192]**4 + 1e-10) + 1
X_train[:,:,195] = 0.1 * np.log10(1e20*X_train[:,:,195]**4 + 1e-10) + 1
X_train[:,:,196] = 0.1 * np.log10(1e20*X_train[:,:,196]**6 + 1e-10) + 1
X_train[:,:,197] = 0.1 * np.log10(1e20*X_train[:,:,197]**4 + 1e-10) + 1
X_train[:,:,198] = 0.1 * np.log10(1e20*X_train[:,:,198]**6 + 1e-10) + 1
X_train[:,:,200] = 0.1 * np.log10(1e20*X_train[:,:,200]**6 + 1e-10) + 1
X_train[:,:,201] = 0.1 * np.log10(1e20*X_train[:,:,201]**5 + 1e-10) + 1
X_train[:,:,202] = 0.1 * np.log10(1e20*X_train[:,:,202]**5 + 1e-10) + 1
X_train[:,:,203] = 0.1 * np.log10(1e20*X_train[:,:,203]**5 + 1e-10) + 1
X_train[:,:,204] = 0.1 * np.log10(1e30*X_train[:,:,204]**3 + 1e-10) + 1
X_train[:,:,205] = 0.1 * np.log10(1e30*X_train[:,:,205]**3 + 1e-10) + 1
X_train[:,:,206] = 0.1 * np.log10(1e20*X_train[:,:,206]**7 + 1e-10) + 1
X_train[:,:,207] = 0.1 * np.log10(1e30*X_train[:,:,207]**2 + 1e-10) + 1
X_train[:,:,208] = 0.1 * np.log10(1e30*X_train[:,:,208]**2 + 1e-10) + 1
X_train[:,:,209] = 0.1 * np.log10(1e30*X_train[:,:,209]**2 + 1e-10) + 1
X_train[:,:,213] = 0.1 * np.log10(1e30*X_train[:,:,213]**2 + 1e-10) + 1
X_train[:,:,214] = 0.1 * np.log10(1e7*X_train[:,:,214]**2 + 1e-10) + 1
X_train[:,:,215] = 0.1 * np.log10(1e15*X_train[:,:,215]**2 + 1e-10) + 1
X_train[:,:,216] = 0.1 * np.log10(1e15*X_train[:,:,216]**2 + 1e-10) + 1
X_train[:,:,217] = 0.1 * np.log10(1e15*X_train[:,:,217]**2 + 1e-10) + 1
X_train[:,:,218] = 0.1 * np.log10(1e15*X_train[:,:,218]**2 + 1e-10) + 1
X_train[:,:,220] = 0.1 * np.log10(1e15*X_train[:,:,220]**2 + 1e-10) + 1

### Converting Numpy tensor into PyTorch tensor

In [None]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

train_data = []
for i in range(len(X_train)):
    train_data.append((X_train[i], y_train[i]))
    
test_data = []
for i in range(len(X_test)):
    test_data.append((X_test[i], y_test[i]))

train_dataloader = DataLoader(train_data, shuffle=True, batch_size=train_batch_size)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=test_batch_size)

## Model Initialization

### Classifier / Focal Loss initialization
Notice that the dimensions per layer are recommended to set to a smaller value due to the risk of overfitting. 

In [None]:
classifier = ConvNet(fc_dims=[128, 64], in_dim=X_train.shape[2], out_dim=2).cuda()

focal_loss = FocalLoss2(alpha, gamma_pos, gamma_neg)
if optim_type == "adam":
    optim_clsfr = optim.Adam(filter(lambda p: p.requires_grad, classifier.parameters()), 
                             lr=learn_rate)
else:
    optim_clsfr = RangerLars(filter(lambda p: p.requires_grad, classifier.parameters()),
                             lr=learn_rate)

## Training / Testing

In [None]:
print('Parameter Setting ----------------------------------------------------------------------')
print('Model = VAT + Conv1D')
print('conv1d use activation = {}'.format(True))
print('graph_emdeding = Heter_AutoEncoder')
print('alpha = {}'.format(alpha))
print('gamma_pos = {}'.format(gamma_pos))
print('gamma_neg = {}'.format(gamma_neg))
print('learn_rate = {}'.format(learn_rate))
print('train_batch_size = {}'.format(train_batch_size))
print('test_batch_size = {}'.format(test_batch_size))
print('max_epochs = {}'.format(max_epochs))
print('vat_xi = {}'.format(vat_xi))
print('vat_eps_pos = {}'.format(vat_eps_pos))
print('vat_eps_neg = {}'.format(vat_eps_neg))
print('vat_ip = {}'.format(vat_ip))
print('optim_type = {}'.format(optim_type))
print('weight_decay = {}'.format(weight_decay))
print('lambda_l1 = {}'.format(lambda_l1))
print('\n')

train_history_loss = []
train_history_auc = []
max_f1score = 0
for epoch in range(max_epochs):
    print('Epoch {} -------------------------------------------------------------------------'.format(epoch))
    
    #
    # Training
    # ------------------------------------------------------------------------------------
    classifier.train()
    label_train, pred_y_train, clsf_loss_train, clsf_loss_pos_train, \
        clsf_loss_neg_train, vat_loss_train = train(epoch, train_dataloader, 
                                                    clip_grad_norm=grad_clip, lambda_l1=lambda_l1)
    
    auc_train = roc_auc_score(label_train, pred_y_train)
    train_history_loss.append(clsf_loss_train)
    train_history_auc.append(auc_train)
    print('    Training => auc: {:.6f}, clsf_pos: {}, clsf_neg: {}, vat_loss: {}'.
          format(auc_train, clsf_loss_pos_train, clsf_loss_neg_train, vat_loss_train))
    thres = np.min(pred_y_train[label_train==1]) - 1e-6
    print("                Threshold is set to {}".format(thres))
    print("                Min. Probailities on train is {}".format(np.min(pred_y_train)))
    
    y_predict_bin = np.array(pred_y_train > thres, dtype=int)
    prec_train, recall_train, f1_train = evaluate(label_train, y_predict_bin)
    print('                prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.
          format(prec_train, recall_train, f1_train))
    
    if epoch % 1 == 0:
        #
        # Testing
        # ------------------------------------------------------------------------------------        
        with torch.no_grad():
            classifier.eval()
            label_test, pred_y_test, clsf_loss_test, clsf_loss_pos_test, clsf_loss_neg_test = infer(test_dataloader)    
        
        auc = roc_auc_score(label_test, pred_y_test)
        
        print("            Min. Probailities on test set with label 1: {}".format(np.min(pred_y_test[label_test==1])))
        print("            Min. Probailities on test set             : {}".format(np.min(pred_y_test)))
        
        y_predict_bin = np.array(pred_y_test > thres, dtype=int)
        prec, recall_bytrain, f1_bytrain = evaluate(label_test, y_predict_bin)
        print('    Testing by train ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall_bytrain, f1_bytrain, clsf_loss_test))
        
        y_predict_bin = np.array(pred_y_test >= np.min(pred_y_test[label_test==1]), dtype=int)
        prec, recall_bytest, f1_bytest = evaluate(label_test, y_predict_bin)
        print('    Testing by test ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall_bytest, f1_bytest, clsf_loss_test))
        
        if clsf_loss_pos_train < 0.005 and f1_bytrain > max_f1score and recall_bytrain == 1:
            max_f1score = f1_bytrain
            torch.save({'epoch': epoch,
                        'model_state_dict': classifier.state_dict(),
                        'optimizer_state_dict': optim_clsfr.state_dict(),
                        'loss': focal_loss,
                       }, 
                       '../../user_data/CloudMile/data/saved_models/' + \
                       'FeatEmbed-Conv1Dsmall_v2_eps{}{}_focal{}{}_{}_lr{}_epoch{}_Part2_{}'.
                       format(int(-math.log10(vat_eps_pos)), 
                              int(-math.log10(vat_eps_neg)), 
                              gamma_pos, gamma_neg, 
                              optim_type, int(-math.log10(learn_rate)), max_epochs, f1_bytrain))


PATH = '{model path}'
performance(PATH, 'FeatEmbed-Golden-LastSoftmax')

# 8 recall@N

In [23]:
def recallN(df, N):
    return df.head(N)['label'].sum()/ df['label'].sum()

### example

In [27]:
df = pd.read_csv('/Users/matthewliu/AML-CTBC/Feat-ts-Training_result.csv')
df = df.sort_values('score', ascending=False)
df.head()

Unnamed: 0,score,label,annouce
355,0.523318,1.0,0.0
1057,0.523318,1.0,0.0
2034,0.523318,1.0,0.0
2035,0.523318,1.0,0.0
1091,0.523318,1.0,0.0


In [26]:
recallN(df, 100)

1.0