In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, gc
import termcolor

import math, random
import pickle
import datetime, time
from tqdm import tqdm_notebook as tqdm

import torch 
from torch import nn
from torch import optim

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold
from sklearn.cluster import KMeans


RANDOM_SEED = 2020

torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)
%matplotlib inline

cpu


In [2]:
def mish(input):
    return input * torch.tanh(nn.functional.softplus(input))

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)
    
class residual_conv1d(nn.Module):

    def __init__(self, in_channel):
        super(residual_conv1d, self).__init__()
        
        self.mish = Mish()
        self.layer = nn.Sequential(
            nn.Conv1d(in_channel, in_channel, 1),
            Mish(),
            nn.Conv1d(in_channel, in_channel, 1)
        )

    def forward(self, x):
        x = x+self.layer(x)
        x = self.mish(x)
        return x

class Conv_1d_Net(nn.Module):

    def __init__(self, in_channel):
        super(Conv_1d_Net, self).__init__()
        
        self.layer_1 = nn.Sequential(
            nn.Conv1d(in_channel, 2*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(2*in_channel)
        )
        
        self.layer_2 = nn.Sequential(
            nn.Conv1d(2*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
        
        self.layer_3 = nn.Sequential(
            nn.Conv1d(4*in_channel, 8*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(8*in_channel)
        )
       
         
        self.avgpool1d = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(8*in_channel, 8*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(8*in_channel, 16*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(16*in_channel, 28)
        ) 

    def forward(self, x):
        #_in = x.size()[1]
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        #x = self.layer_4(x)
        x = self.avgpool1d(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
    
    
from torch.optim.optimizer import Optimizer
import math

class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

In [3]:
def do_preprocessing(d_cols, train_df, calendar_df, sell_prices_df):
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_validation'
    
    event_type_1 = pd.get_dummies(calendar_df.event_type_1)
    event_type_1.columns = [f'{col}_event_type_1' for col in event_type_1.columns]
    event_type_2 = pd.get_dummies(calendar_df.event_type_1)
    event_type_2.columns = [f'{col}_event_type_2' for col in event_type_2.columns]
    calendar_data = pd.concat([
        calendar_df.drop(columns=['event_name_1', 'event_type_1', 'event_name_2', 'event_type_2'])[['wday', 'd','month','snap_CA', 'snap_TX', 'snap_WI']],
        event_type_1,
        event_type_2
    ], axis=1)
    calendar_data = calendar_data.set_index('d').T
    
    
    
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    
    
    is_sell = price_data.notnull().astype(float).T
    price_data = price_data.fillna(0)
    
    train_df = train_df.T
    train_df.columns = train_df.loc['id', :].values
    train_df = train_df.T
    id_cols = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']
    
    train_df[d_cols] = train_df[d_cols].astype(float)
    item_id = train_df.groupby(['item_id'])[d_cols].transform('mean') 
    print('complete  item_id')
    item_id_state_id = train_df.groupby(['item_id', 'state_id'])[d_cols].transform('mean')
    print('complete  item_id  state_id')
    item_id_store_id  = train_df.groupby(['item_id', 'store_id'])[d_cols].transform('mean')
    print('complete  item_id  store_id')
    dept_id_store_id = train_df.groupby(['dept_id', 'store_id'])[d_cols].transform('mean')
    print('complete  dept_id  store_id')

    return train_df[id_cols+d_cols], calendar_df, calendar_data[d_cols], price_data.T[d_cols], is_sell[d_cols],\
                item_id, item_id_state_id, item_id_store_id, dept_id_store_id
                


class preprocessing():
    def __init__(self, path):
        self.path=path
        self.df = pd.read_csv(self.path+'sales_train_validation.csv')
        self.df['all_id'] = 1
        self.calendar_df = pd.read_csv(self.path+'calendar.csv')
        self.sell_prices_df = pd.read_csv(self.path+'sell_prices.csv')
        self.sample_submission_df = pd.read_csv(self.path+'sample_submission.csv')
        
        self.d_cols = self.df.columns[self.df.columns.str.startswith('d_')].values.tolist()
        self.d_cols = self.d_cols[-1200:]
        
        self.train_df, self.calendar_df, self.calendar_data, self.price_data, self.is_sell,\
        self.item_id, self.item_id_state_id, self.item_id_store_id, self.dept_id_store_id = do_preprocessing(
            self.d_cols, self.df, self.calendar_df, self.sell_prices_df
        )

In [4]:
gc.collect()

0

In [5]:
path = '/Users/kanoumotoharu/Downloads/m5-forecasting-accuracy/'
PreProcessing = preprocessing(path)

HBox(children=(IntProgress(value=0, max=30490), HTML(value='')))




KeyboardInterrupt: 

In [None]:
class Normal_Dataset_Train(torch.utils.data.Dataset):
    def __init__(self, PreProcessing, d_cols, data_size=100):
        self.PreProcessing = PreProcessing
        self.datanum = len(PreProcessing.train_df)
        self.d_cols = d_cols
        self.data_size=data_size
        
    def __len__(self):
        return self.datanum

    def __getitem__(self, idx):
        
        a = random.randint(0,len(self.d_cols)-self.data_size-28)
        
        #cols = self.d_cols[a:a+self.data_size]
        #next_cols = self.d_cols[a+28:a+self.data_size+28]
        
        #True_cols = self.d_cols[a+self.data_size:a+self.data_size+28]
        
        state = self.PreProcessing.train_df.iloc[idx,:]['state_id']
        calendar_cols = [
            'wday', 'month', f'snap_{state}', 'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1',
            'Sporting_event_type_1', 'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
        ]
        
        x = np.vstack((
            self.PreProcessing.train_df.iloc[[idx],6+a:6+a+self.data_size].values.astype(float),
            self.PreProcessing.item_id.iloc[[idx],a:a+self.data_size].values.astype(float),
            self.PreProcessing.item_id_state_id.iloc[[idx],a:a+self.data_size].values.astype(float),
            self.PreProcessing.item_id_store_id.iloc[[idx],a:a+self.data_size].values.astype(float),
            self.PreProcessing.dept_id_store_id.iloc[[idx],a:a+self.data_size].values.astype(float),
            
            self.PreProcessing.price_data.iloc[[idx],a:a+self.data_size].values.astype(float),
            self.PreProcessing.price_data.iloc[[idx],a+28:a+self.data_size+28].values.astype(float),
            
            self.PreProcessing.is_sell.iloc[[idx],a:a+self.data_size].values.astype(float),
            self.PreProcessing.is_sell.iloc[[idx],a+28:a+self.data_size+28].values.astype(float),
            
            self.PreProcessing.calendar_data.loc[calendar_cols, :].values.astype(float)[:, a:a+self.data_size],
            self.PreProcessing.calendar_data.loc[calendar_cols, :].values.astype(float)[:, a+28:a+self.data_size+28]
        ))
        y = self.PreProcessing.train_df.iloc[[idx],6+a+self.data_size:6+a+self.data_size+28].values.astype(float)
        
        x = torch.FloatTensor(x)
        y = torch.FloatTensor(y)
        return x, y
    
    
    
class Normal_Dataset_Test(torch.utils.data.Dataset):
    def __init__(self, PreProcessing, d_cols, data_size=100):
        self.PreProcessing = PreProcessing
        self.datanum = len(PreProcessing.train_df)
        self.d_cols_len = len(d_cols)
        #_int = int(d_cols[-1].replace('d_', ''))
        #next_dcols = [f'd_{i}' for i in range(_int+1, _int+1+28)]
        #self.d_cols = d_cols[-100:]+next_dcols
                   
        
        self.data_size=data_size
        
    def __len__(self):
        return self.datanum

    def __getitem__(self, idx):
        
        #cols = self.d_cols[:100]
        #next_cols = self.d_cols[28:100+28]
        
        #True_cols = self.d_cols[-28:]
        
        state = self.PreProcessing.train_df.iloc[idx,:]['state_id']
        calendar_cols = [
            'wday', 'month', f'snap_{state}', 'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1',
            'Sporting_event_type_1', 'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
        ]
        
        x = np.vstack((
            self.PreProcessing.train_df.iloc[[idx],6+self.d_cols_len-self.data_size:6+self.d_cols_len].values.astype(float),
            self.PreProcessing.item_id.iloc[[idx],self.d_cols_len-self.data_size:self.d_cols_len].values.astype(float),
            self.PreProcessing.item_id_state_id.iloc[[idx],self.d_cols_len-self.data_size:self.d_cols_len].values.astype(float),
            self.PreProcessing.item_id_store_id.iloc[[idx],self.d_cols_len-self.data_size:self.d_cols_len].values.astype(float),
            self.PreProcessing.dept_id_store_id.iloc[[idx],self.d_cols_len-self.data_size:self.d_cols_len].values.astype(float),
            
            self.PreProcessing.price_data.iloc[[idx],self.d_cols_len-self.data_size:self.d_cols_len].values.astype(float),
            self.PreProcessing.price_data.iloc[[idx],self.d_cols_len+28-self.data_size:self.d_cols_len+28].values.astype(float),
            
            self.PreProcessing.is_sell.iloc[[idx],self.d_cols_len-self.data_size:self.d_cols_len].values.astype(float),
            self.PreProcessing.is_sell.iloc[[idx],self.d_cols_len+28-self.data_size:self.d_cols_len+28].values.astype(float),
            
            self.PreProcessing.calendar_data.loc[calendar_cols, :].values.astype(float)[:, self.d_cols_len-self.data_size:self.d_cols_len],
            self.PreProcessing.calendar_data.loc[calendar_cols, :].values.astype(float)[:, self.d_cols_len+28-self.data_size:self.d_cols_len+28]
        ))
        y = self.PreProcessing.train_df.iloc[[idx],6+self.d_cols_len:6+self.d_cols_len+28].values.astype(float)
        
        x = torch.FloatTensor(x)
        y = torch.FloatTensor(y)
        return x, y

In [None]:
def train_model(model, PreProcesing, trn_cols):
    model = model.to(DEVICE)
    dataset_train = Normal_Dataset_Train(PreProcessing, trn_cols)
    dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size = 100, shuffle = True)

    dataset_val = Normal_Dataset_Test(PreProcessing, trn_cols)
    dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size = 100, shuffle = False)
    
    num_epochs = 40
    best_epoch = -1
    best_score = 10000
    early_stoppping_cnt = 0
    best_model = model
    
    optimizer = RAdam(model.parameters(), lr=4e-4)
    criterion = nn.MSELoss()
    
    for epoch in range(num_epochs):
        start_time = time.time()
        model.train()
        avg_loss = 0.
        
        for x_batch, y_batch in dataloader_train:
            optimizer.zero_grad()
            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            
            preds = model(x_batch)
            
            loss = criterion(preds.cpu(), y_batch.cpu())
            loss = torch.sqrt(loss)
            loss = loss.to(DEVICE)
            loss.backward()
            optimizer.step()
            avg_loss += loss.item() / len(dataloader_train)
            del loss; gc.collect()

        model.eval()
        avg_val_loss = 0.
        
        for x_batch, y_batch in dataloader_val:
            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)

            preds = model(x_batch)
            loss = self.criterion(preds.cpu(), y_batch.cpu())
            loss = torch.sqrt(loss)
            
            avg_val_loss += loss.item() / len(dataloader_val)
            del loss; gc.collect()


        if best_score>avg_val_loss:
            best_score = avg_val_loss
            early_stoppping_cnt=0
            best_epoch=epoch+1
            best_model = model
            elapsed = time.time() - start_time
            p_avg_val_loss = termcolor.colored(np.round(avg_val_loss, 4),"red")

            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {p_avg_val_loss} time: {elapsed:.0f}s')
        else:
            early_stoppping_cnt+=1
            elapsed = time.time() - start_time
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')

        if (epoch>8) and (early_stoppping_cnt>5):
            break

    print(f'best_score : {best_score}    best_epoch : {best_epoch}')
    
    preds = []
    for x_batch, y_batch in dataloader_val:
        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        preds += model(x_batch).detch().cpu().tolist()
    
    return best_model, best_score, np.array(preds)

In [None]:
trn_cols = PreProcessing.d_cols[:900]

dataset_train = Normal_Dataset_Train(PreProcessing, trn_cols)
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size = 100, shuffle = True)

dataset_val = Normal_Dataset_Test(PreProcessing, trn_cols)
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size = 100, shuffle = True)

c=0
for x,y in dataloader_train:
    if c<1:
        print(x.size(), y.size())
        c+=1
    else:
        break
        
c=0
for x,y in dataloader_val:
    if c<1:
        print(x.size(), y.size())
        c+=1
    else:
        break

In [None]:
del dataset_train, dataloader_train, dataset_val, dataloader_val, x,y;gc.collect()

In [None]:
model = Conv_1d_Net(31)
trn_cols = PreProcessing.d_cols[:900]
gc.collect()

In [None]:
train_model(model, PreProcessing, trn_cols)