In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, gc
import math, random
import pickle
import datetime, time
from tqdm import tqdm_notebook as tqdm

import torch 
from torch import nn
from torch import optim

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold
from sklearn.cluster import KMeans

%matplotlib inline

In [2]:
def Preprocessing(train_df, calendar_df, sell_prices_df):
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_validation'
    d_cols = [f'd_{i}' for i in range(1,1914)]
    
    event_type_1 = pd.get_dummies(calendar_df.event_type_1)
    event_type_1.columns = [f'{col}_event_type_1' for col in event_type_1.columns]
    event_type_2 = pd.get_dummies(calendar_df.event_type_1)
    event_type_2.columns = [f'{col}_event_type_2' for col in event_type_2.columns]
    calendar_data = pd.concat([
        calendar_df.drop(columns=['event_name_1', 'event_type_1', 'event_name_2', 'event_type_2'])[['wday', 'd','month','snap_CA', 'snap_TX', 'snap_WI']],
        event_type_1,
        event_type_2
    ], axis=1)
    calendar_data = calendar_data.set_index('d').T
    
    
    
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    
    train_df2 = train_df.copy()
    
    train_df = train_df.T
    train_df.columns = train_df.loc['id', :].values
    train_df.loc[d_cols,  :] = train_df.loc[d_cols,  :] + np.where(
    np.isnan(
        price_data[price_data.index.isin(d_cols)]
    ), np.nan,0)
    train_df = train_df.T
    
    return train_df, calendar_df, calendar_data, price_data


def make_calendar_data(calendar_data, train_cols):
    calendar_index = [
        'wday', 'month',
        'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1', 'Sporting_event_type_1',
        'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
    ]
    calendar = calendar_data.loc[calendar_index,:]
    event_index = [
        'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1', 'Sporting_event_type_1',
        'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
    ]
    for shift in [-14, -7, 7, 14, 28, 56]:
        tmp_calendar = calendar.loc[event_index, :]
        tmp_calendar = tmp_calendar.T.shift(-shift).T
        tmp_calendar.index = [f'{col}_shift{shift}' for col in tmp_calendar.index]
        calendar = pd.concat([
            calendar,
            tmp_calendar
        ], axis=0)
    calendar = calendar[train_cols]
    calendar = torch.FloatTensor(calendar.values.astype(float))
    return calendar

def make_data(train_cols, null_check_cols, state, train_df, calendar_data, price_data):
    data_train = train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']+train_cols]
    
    null = train_df[null_check_cols].isnull().sum(axis=1)
    null = null[null==0].index
    data_train = data_train[data_train.id.isin(null)]
    
    train_price = price_data.T
    train_price= train_price[train_cols]
    
    train_product = data_train[data_train.state_id==state]['id'].unique()
    len(train_product)
    data = data_train.loc[train_product,train_cols]
    
    calendar_index = [ f'snap_{state}']
    event_index = [ f'snap_{state}']
    calendar = calendar_data.loc[calendar_index,:]
    for shift in [-14, -7, 7, 14, 28, 56]:
        tmp_calendar = calendar.loc[event_index, :]
        tmp_calendar = tmp_calendar.T.shift(-shift).T
        tmp_calendar.index = [f'{col}_shift{shift}' for col in tmp_calendar.index]
        calendar = pd.concat([
            calendar,
            tmp_calendar
        ], axis=0)
    calendar = calendar[train_cols]
    
    price = price_data.T[train_cols].loc[train_product,:]
    price_1 = price_data.loc[:,train_product].shift(-3).T[train_cols]
    price_2 = price_data.loc[:,train_product].shift(-7).T[train_cols]
    price_3 = price_data.loc[:,train_product].shift(-14).T[train_cols]

    past_price_1 = price_data.loc[:,train_product].shift(3).T[train_cols]
    past_price_2 = price_data.loc[:,train_product].shift(7).T[train_cols]
    past_price_3 = price_data.loc[:,train_product].shift(14).T[train_cols]
    print(
        price_1.isnull().sum().sum(),
        price_2.isnull().sum().sum(),
        price_3.isnull().sum().sum())
    print(
        past_price_1.isnull().sum().sum(), 
        past_price_2.isnull().sum().sum(), 
        past_price_3.isnull().sum().sum()
    )
    
    data = torch.FloatTensor(data.values.astype(float))
    
    calendar = torch.FloatTensor(calendar.values.astype(float))
    
    price = torch.FloatTensor(price.values.astype(float))
    
    price_1 = torch.FloatTensor(price_1.values.astype(float))
    price_2 = torch.FloatTensor(price_2.values.astype(float))
    price_3= torch.FloatTensor(price_3.values.astype(float))
    
    past_price_1 = torch.FloatTensor(past_price_1.values.astype(float))
    past_price_2 = torch.FloatTensor(past_price_2.values.astype(float))
    past_price_3 = torch.FloatTensor(past_price_3.values.astype(float))
    
    data_list = []
    for idx in tqdm(range(len(data))):
        _data = data[[idx],:]
        _price = price[[idx],:]
        _price_1 = price_1[[idx],:]
        _price_2 = price_2[[idx],:]
        _price_3 = price_3[[idx],:]
        _past_price_1 = past_price_1[[idx],:]
        _past_price_2 = past_price_2[[idx],:]
        _past_price_3 = past_price_3[[idx],:]
        
        x = torch.cat((_data, calendar, _price, _price_1, _price_2, _price_3, _past_price_1, _past_price_2, _past_price_3), dim=0)
        data_list.append(x.tolist())
    data_list = torch.FloatTensor(data_list)
    return data_list  #, calendar, price, price_1, price_2, price_3, past_price_1, past_price_2, past_price_3
    

In [3]:
class Mydatasets(torch.utils.data.Dataset):
    def __init__(self, data, calendar, train = True):
        self.data = data
        self.calendar = calendar
        self.datanum = len(data)
        self.train = train

    def __len__(self):
        return self.datanum

    def __getitem__(self, idx):
        _data = self.data[idx, :, :]
        x = torch.cat((_data, self.calendar), dim=0)
        if self.train:
            return x[:,:-28], x[0,-28:]
        else:
            return x

In [4]:
def mish(input):
    return input * torch.tanh(nn.functional.softplus(input))

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)

In [5]:
class residual_conv1d(nn.Module):

    def __init__(self, in_channel):
        super(residual_conv1d, self).__init__()
        
        self.mish = Mish()
        self.layer = nn.Sequential(
            nn.Conv1d(in_channel, in_channel, 1),
            Mish(),
            nn.Conv1d(in_channel, in_channel, 1)
        )

    def forward(self, x):
        x = x+self.layer(x)
        x = self.mish(x)
        return x

class Conv_1d_Net(nn.Module):

    def __init__(self, in_channel):
        super(Conv_1d_Net, self).__init__()
        
        self.layer_1 = nn.Sequential(
            nn.Conv1d(in_channel, 2*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(2*in_channel)
        )
        
        self.layer_2 = nn.Sequential(
            nn.Conv1d(2*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
        
        self.layer_3 = nn.Sequential(
            nn.Conv1d(4*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
       
        self.layer_4 = nn.Sequential(
            nn.Conv1d(4*in_channel, 8*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(8*in_channel)
        )
       
         
        self.avgpool1d = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(8*in_channel, 16*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(16*in_channel, 32*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(32*in_channel, 28),
            
            
            nn.ReLU()
        ) 

    def forward(self, x):
        #_in = x.size()[1]
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.avgpool1d(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [6]:
from torch.optim.optimizer import Optimizer
import math

class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

In [30]:
def train_model(model, train_loader, test_loader):
    num_epochs = 40
    lr = 1e-4
    eta_min = 1e-3
    t_max = 10
    numclass = 5
    
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = RAdam(params=model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)


    best_epoch = -1
    best_score = 10000
    early_stoppping_cnt = 0

    for epoch in range(num_epochs):
        print('epoch', epoch)
        start_time = time.time()
        # change model to be train_mode 
        model.train()
        avg_loss = 0.
        #for x_batch, y_batch in progress_bar(train_loader, parent=mb):
        for x_batch, y_batch in tqdm(train_loader):
            optimizer.zero_grad()
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            preds = model(x_batch)
            loss = torch.sqrt(criterion(preds.squeeze(1), y_batch))
            
            
            loss.backward()
            optimizer.step()
            scheduler.step()

            avg_loss += loss.item() / len(train_loader)

            
        # change model to be validation_mode
        model.eval()
        avg_val_loss = 0.

        for i, (x_batch, y_batch) in enumerate(test_loader):
            
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            #print('test')
            
            preds = model(x_batch)
            loss = torch.sqrt(criterion(preds.squeeze(1), y_batch))

            avg_val_loss += loss.item() / len(test_loader)
        
        if (epoch + 1) % 1 == 0:
            elapsed = time.time() - start_time
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
            
        if best_score>avg_val_loss:
            best_score = avg_val_loss
            early_stoppping_cnt=0
            best_epoch=epoch
        else:
            early_stoppping_cnt+=1
        if (best_epoch>20) and (early_stoppping_cnt>7):
                break
    
    print(f'best_score : {best_score}    best_epoch : {best_epoch}')
    torch.save(model.state_dict(), 'net.pt')
    
    return model

In [8]:
path = '/Users/kanoumotoharu/Downloads/m5-forecasting-accuracy/'
#path = '/Users/abcdm/Downloads/m5-forecasting-accuracy/'

original_train_df = pd.read_csv(path+'sales_train_validation.csv')
calendar_df = pd.read_csv(path+'calendar.csv')
sell_prices_df = pd.read_csv(path+'sell_prices.csv')
sample_submission_df = pd.read_csv(path+'sample_submission.csv')

In [9]:
%%time

train_df, calendar_df, calendar_data, price_data = Preprocessing(original_train_df, calendar_df, sell_prices_df)
d_cols = [f'd_{i}' for i in range(1,1914)]

n = 58
train_cols = d_cols[-n:]
null_check_cols = d_cols[-(n+14):]

#'snap_CA', 'snap_TX', 'snap_WI'

state='CA'
data_ca = make_data(train_cols, null_check_cols, state, train_df, calendar_data, price_data)
state='TX'
data_tx = make_data(train_cols, null_check_cols, state, train_df, calendar_data, price_data)
state='WI'
data_wi = make_data(train_cols, null_check_cols, state, train_df, calendar_data, price_data)
calendar = make_calendar_data(calendar_data, train_cols)

HBox(children=(IntProgress(value=0, max=30490), HTML(value='')))


0 0 0
0 0 0


HBox(children=(IntProgress(value=0, max=12196), HTML(value='')))


0 0 0
0 0 0


HBox(children=(IntProgress(value=0, max=9147), HTML(value='')))


0 0 0
0 0 0


HBox(children=(IntProgress(value=0, max=9147), HTML(value='')))


CPU times: user 1min 45s, sys: 15.1 s, total: 2min
Wall time: 2min 4s


In [10]:
data = torch.cat(
    (data_ca, data_tx, data_wi),
    dim=0
)

del data_ca, data_tx, data_wi
gc.collect()

108

In [11]:
trn_indx, val_indx = train_test_split([i for i in range(data.size()[0])], test_size=0.3)
trn_data = data[trn_indx, :, :]
val_data = data[val_indx, :, :]

trn_data_set=Mydatasets(trn_data, calendar, train = True)
trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 100, shuffle = True)

val_data_set=Mydatasets(val_data, calendar, train = True)
val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = True)

In [25]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [26]:
trn_data.size()[1]+ calendar.size()[1]

73

## cv

In [27]:
k = KFold(5, shuffle=True)

In [28]:
for i, v in k.split([i for i in range(data.size()[0])]):
    pass

In [29]:
for trn_indx, val_indx in k.split([i for i in range(data.size()[0])]):
    trn_data = data[trn_indx, :, :]
    val_data = data[val_indx, :, :]

    trn_data_set=Mydatasets(trn_data, calendar, train = True)
    trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 100, shuffle = True)

    val_data_set=Mydatasets(val_data, calendar, train = True)
    val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = True)
    model = Conv_1d_Net(73)
    model = train_model(model, trn_loader, val_loader)

epoch 0


HBox(children=(IntProgress(value=0, max=244), HTML(value='')))

KeyboardInterrupt: 