# Library

In [207]:
import warnings
warnings.filterwarnings("ignore")


import numpy as np
import pandas as pd
import datetime
from catboost import CatBoostClassifier
import lightgbm as lgb
from time import time
from tqdm import tqdm_notebook as tqdm
from collections import Counter
from scipy import stats
import gc, pickle
import ast
import math, random

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold, KFold, RepeatedKFold,TimeSeriesSplit, GroupKFold, GridSearchCV, train_test_split, TimeSeriesSplit
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, log_loss
from sklearn.linear_model import Ridge,Lasso, BayesianRidge
from sklearn.svm import LinearSVR
from sklearn.preprocessing import minmax_scale

import torch 
from torch import nn
from torch import optim

RANDOM_SEED = 2020

torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

%matplotlib inline

In [408]:
time()

1593256436.351873

In [208]:
%matplotlib inline

# Preprocessing

In [209]:
def create_is_sell_data(sell_prices_df, calendar_df, train_df):
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_evaluation'
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(
        lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()
    ).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    is_sell = price_data.notnull().astype(float).T
    price_data = price_data.fillna(0).T
    
    is_sell = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']], is_sell
    ], axis=1)
    price_data = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']], price_data
    ], axis=1)
    
    return price_data, is_sell

def set_index(df, name):
    d = {}
    for col, value in df.iloc[0,:].items():
        try:
            if '_evaluation' in value:
                d[col] = 'id'
            if 'd_' in value:
                d[col] = 'd'
        except:
            if type(value)!=str:
                d[col]=name
    return d

def dcol2int(col):
    if col[:2]=='d_':
        return int(col.replace('d_', ''))
    else:
        return col
    
def str_category_2_int(data):
    categories = [c for c in data.columns if data[c].dtype==object]
    for c in categories:
        if c=='id' or c=='d':
            pass
        else:
            data[c] = pd.factorize(data[c])[0]
            data[c] = data[c].replace(-1, np.nan)
    return data

def select_near_event(x, event_name):
    z = ''
    for y in x:
        if y in event_name:
            z+=y+'_'
    if len(z)==0:
        return np.nan
    else:
        return z
    
def sort_d_cols(d_cols):
    d_cols = [int(d.replace('d_','')) for d in d_cols]
    d_cols = sorted(d_cols)
    d_cols = [f'd_{d}' for d in d_cols]
    return d_cols

In [379]:
def preprocessing(path, d_cols, train_d_cols):
    train_df = pd.read_csv(path+'sales_train_evaluation.csv')
    calendar_df = pd.read_csv(path+'calendar.csv')
    sell_prices_df = pd.read_csv(path+'sell_prices.csv')
    sample_submission_df = pd.read_csv(path+'sample_submission.csv')
    
    train_df.index = train_df.id
    calendar_df['date']=pd.to_datetime(calendar_df.date)
    calendar_df.index = calendar_df.d
    price_data, is_sell = create_is_sell_data(sell_prices_df, calendar_df, train_df)
    
    str_cols = [ col for col in train_df.columns if 'id' in str(col)]
    new_columns = str_cols+d_cols
    train_df = train_df.reindex(columns=new_columns)
    
    train_df = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']],
        train_df.loc[train_df.index,d_cols]*price_data.loc[train_df.index,d_cols]
    ], axis=1)
    train_df = train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']+train_d_cols]
    
    is_sell_data = is_sell[train_d_cols]
    groups = ['dept_id', 'store_id']
    _id = '_'.join(groups)
    is_sell_dept_store = is_sell.groupby(groups)[train_d_cols].transform('sum')
    groups = ['cat_id', 'store_id']
    _id = '_'.join(groups)
    is_sell_cat_store = is_sell.groupby(groups)[train_d_cols].transform('sum')
    
    #price_data
    price_data = price_data[train_d_cols]
    

    event_type = ['Sporting', 'Cultural', 'National', 'Religious']
    calendar_df['quarter'] = pd.to_datetime(calendar_df['date']).dt.day.apply(lambda x: x//7)
    cols = ['quarter', 'wday', 'event_type_1', 'snap_CA', 'snap_TX', 'snap_WI']
    calendar_df[cols] = calendar_df[cols].shift(-28)
    
    calendar_df = pd.concat([
        calendar_df.drop( columns=['wday', 'event_type_1']),
        pd.get_dummies(calendar_df[ ['wday']].replace(np.nan, 0).astype(int).astype(str)),
        pd.get_dummies( calendar_df[['event_type_1']])
    ], axis=1)
   
    calendar_df=calendar_df[calendar_df.d.isin(train_d_cols)]
    return train_df, price_data, is_sell_data, is_sell_dept_store, is_sell_cat_store, calendar_df.T

### reduce_mem_usage

In [380]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                       df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [381]:
#pd.get_dummies(train_df[[ 'dept_id', 'store_id']])

In [402]:
class My_Dataset(torch.utils.data.Dataset):
    def __init__(self, train_df, price_data, is_sell_data, is_sell_dept_store, is_sell_cat_store, calendar_df):
        self.d_cols = sort_d_cols([d for d in train_df.columns if  'd_' in d])
        self.data = train_df[self.d_cols].astype(float)
        self.index = train_df.index
        self.cat_data = pd.get_dummies(train_df[[ 'dept_id', 'store_id']]).astype(float)
        self.price_data = price_data.loc[train_df.index, self.d_cols].astype(float)
        self.is_sell_data = is_sell_data.loc[train_df.index, self.d_cols].astype(float)
        self.is_sell_dept_store = is_sell_dept_store.loc[train_df.index, self.d_cols].astype(float)
        self.is_sell_cat_store = is_sell_cat_store.loc[train_df.index, self.d_cols].astype(float)
        use_cols=[f'snap_CA',f'snap_WI',f'snap_TX', 'quarter' , 'event_type_1_Cultural', 'event_type_1_National', 'event_type_1_Religious',
         'event_type_1_Sporting', 'month', 'year','wday']
        self.calendar_df = calendar_df.loc[use_cols,d_cols].astype(float)
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        
        d = int(random.choice(self.d_cols[:-112]).replace('d_', ''))
        train_d_cols = [f'd_{d+i}' for i in range(112)]
        trn_d_cols = train_d_cols[:-28]
        val_d_cols = train_d_cols[-28:]
        
        _id = self.index[idx]
        for s in ['CA', 'TX', 'WI']:
            if s in _id:
                state=s
                break
        
        calendar_cols = [f'snap_{state}', 'quarter' , 'event_type_1_Cultural', 'event_type_1_National', 'event_type_1_Religious',
                         'event_type_1_Sporting']
        x_1 = pd.concat([
            self.data.iloc[[idx],:][trn_d_cols],
            self.price_data.iloc[[idx],:][trn_d_cols],
            self.is_sell_data.iloc[[idx],:][trn_d_cols],
            self.is_sell_dept_store.iloc[[idx],:][trn_d_cols],
            self.is_sell_cat_store.iloc[[idx],:][trn_d_cols],
            self.calendar_df.loc[calendar_cols,trn_d_cols]
        ],axis=0)
        
        
        calendar_cols= ['month', 'year','wday_1', 'wday_2', 'wday_3', 'wday_4', 'wday_5', 'wday_6', 'wday_7']
        x_2 = self.cat_data.iloc[idx,:].values.tolist()+calendar_df.loc[calendar_cols, trn_d_cols[-1]].values.tolist()
        
        x_1 = torch.FloatTensor(x_1.values)
        x_2 = torch.FloatTensor(x_2)
        
        y = torch.FloatTensor(self.data.loc[_id,:][val_d_cols].values.astype(float))
        
        return x_1, x_2, y

In [390]:
def mish(input):
    return input * torch.tanh(nn.functional.softplus(input))

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)
    
from torch.optim.optimizer import Optimizer
import math
class RAdam(Optimizer):
    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

In [397]:
class My_rgrssor(nn.Module):
    def __init__(self, in_size):
        super(My_rgrssor, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(in_size, in_size*2, kernel_size=2),
            Mish(),
            nn.Conv1d(in_size*2, in_size*2, kernel_size=4),
            Mish(),
            nn.Conv1d(in_size*2, in_size*4, kernel_size=8),
            nn.AdaptiveAvgPool1d(5)
        )
        in_2=220+26
        self.lin = nn.Sequential(
            nn.Linear(in_2, in_2*2),
            Mish(),
            nn.Linear(in_2*2, 28)
        )


    def forward(self, x_1, x_2):
        x_1 = self.conv(x_1)
        x_1 = x_1.flatten(1)
        x_1 = torch.cat((x_1, x_2), dim=1)
        x_1 = self.lin(x_1)
        return nn.ReLU()(x_1)+1e-3

In [392]:
class TweedieLoss(nn.Module):
    def __init__(self, p):
        super(TweedieLoss, self).__init__()
        self.p = p
        
    def forward(self, preds, true):
        loss = - true * ((preds**(1-self.p))/ (1-self.p))+((preds**(2- self.p))/ (2-self.p))
        loss = loss.mean(1)
        return loss.mean()

In [386]:
%%time
path = '/Users/kanoumotoharu/Downloads/m5-forecasting-accuracy/'
#path = '/Users/abcdm/Downloads/m5-forecasting-accuracy/'
#path = '../input/m5-forecasting-accuracy/'

d_cols=[f'd_{i+1}' for i in range(1969)]
trn_d_cols = d_cols[:-28]
trn_d_cols = trn_d_cols[-730:]

train_df, price_data, is_sell_data, is_sell_dept_store, is_sell_cat_store, calendar_df = preprocessing(path, d_cols, trn_d_cols)

HBox(children=(IntProgress(value=0, max=30490), HTML(value='')))


CPU times: user 1min 9s, sys: 10.9 s, total: 1min 20s
Wall time: 1min 22s


In [413]:
def train_model(model, data_loader):
    
    model = model.to(DEVICE)
    
    num_epochs = 40
    best_epoch = -1
    best_score = 10000
    early_stoppping_cnt = 0
    best_model = model
    
    optimizer = RAdam(model.parameters(), lr=4e-4)
    criterion = TweedieLoss(1.15)
    eval_loss = nn.MSELoss()
    
    try:
        for epoch in range(num_epochs):
            start_time = time()
            model.train()
            avg_loss = 0.

            for x_1, x_2, y in tqdm(data_loader):
                optimizer.zero_grad()
                x_1 = x_1.to(DEVICE)
                x_2 = x_2.to(DEVICE)
                y = y.to(DEVICE)

                preds = model(x_1, x_2)

                loss = criterion(preds, y)
                loss.backward()
                optimizer.step()
                tmp_eval_loss = torch.sqrt(eval_loss(preds, y))
                avg_loss += tmp_eval_loss.item() / len(data_loader)

            elapsed = time.time() - start_time
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  time: {elapsed:.0f}s')
            
    except:
        return model
    
    return model

In [394]:
my_data_set= My_Dataset(train_df, price_data, is_sell_data, is_sell_dept_store, is_sell_cat_store, calendar_df)
data_loader = torch.utils.data.DataLoader(my_data_set, batch_size=500, shuffle=True)
for x_1,x_2 ,y in tqdm(data_loader):
    break

HBox(children=(IntProgress(value=0, max=61), HTML(value='')))

In [398]:
model = My_rgrssor(11)
out = model(x_1,x_2)
model(x_1,x_2).size()
loss = TweedieLoss(1.1)
loss(out, y)

torch.Size([500, 28])

In [None]:
train_model(model, data_loader)

HBox(children=(IntProgress(value=0, max=61), HTML(value='')))

tensor(73.4361, grad_fn=<MeanBackward0>)