In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, gc
import termcolor

import math, random
import pickle
import datetime, time
from tqdm import tqdm_notebook as tqdm

import torch 
from torch import nn
from torch import optim

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold
from sklearn.cluster import KMeans


RANDOM_SEED = 2020

torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

%matplotlib inline

In [2]:
def mish(input):
    return input * torch.tanh(nn.functional.softplus(input))

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)
    
class residual_conv1d(nn.Module):

    def __init__(self, in_channel):
        super(residual_conv1d, self).__init__()
        
        self.mish = Mish()
        self.layer = nn.Sequential(
            nn.Conv1d(in_channel, in_channel, 1),
            Mish(),
            nn.Conv1d(in_channel, in_channel, 1)
        )

    def forward(self, x):
        x = x+self.layer(x)
        x = self.mish(x)
        return x

class Conv_1d_Net(nn.Module):

    def __init__(self, in_channel):
        super(Conv_1d_Net, self).__init__()
        
        self.layer_1 = nn.Sequential(
            nn.Conv1d(in_channel, 2*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(2*in_channel)
        )
        
        self.layer_2 = nn.Sequential(
            nn.Conv1d(2*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
        
        self.layer_3 = nn.Sequential(
            nn.Conv1d(4*in_channel, 8*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(8*in_channel)
        )
       
         
        self.avgpool1d = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(8*in_channel, 8*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(8*in_channel, 16*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(16*in_channel, 28)
        ) 

    def forward(self, x):
        #_in = x.size()[1]
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        #x = self.layer_4(x)
        x = self.avgpool1d(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
    
    
from torch.optim.optimizer import Optimizer
import math

class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

In [3]:
def split_X_y(x_batch):
    y_batch = x_batch[:,0,-28:]
    x_batch = x_batch[:,:,:-28]
    return x_batch, y_batch

In [4]:
def preprocessing(train_df, calendar_df, sell_prices_df):
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_validation'
    d_cols = [f'd_{i}' for i in range(1,1914)]
    
    event_type_1 = pd.get_dummies(calendar_df.event_type_1)
    event_type_1.columns = [f'{col}_event_type_1' for col in event_type_1.columns]
    event_type_2 = pd.get_dummies(calendar_df.event_type_1)
    event_type_2.columns = [f'{col}_event_type_2' for col in event_type_2.columns]
    calendar_data = pd.concat([
        calendar_df.drop(columns=['event_name_1', 'event_type_1', 'event_name_2', 'event_type_2'])[['wday', 'd','month','snap_CA', 'snap_TX', 'snap_WI']],
        event_type_1,
        event_type_2
    ], axis=1)
    calendar_data = calendar_data.set_index('d').T
    
    
    
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    
    
    is_sell = price_data.notnull().astype(float).T
    price_data = price_data.fillna(0)
    
    train_df = train_df.T
    train_df.columns = train_df.loc['id', :].values
    train_df = train_df.T
    
    return train_df, calendar_df, calendar_data, price_data, is_sell


def make_calendar_data(calendar_data, train_cols):
    calendar_index = [
        'wday', 'month',
        'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1', 'Sporting_event_type_1',
        'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
    ]
    calendar = calendar_data.loc[calendar_index,:]
    event_index = [
        'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1', 'Sporting_event_type_1',
        'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
    ]
    for shift in [28, -28]:
        tmp_calendar = calendar.loc[event_index, :]
        tmp_calendar = tmp_calendar.T.shift(-shift).T
        tmp_calendar.index = [f'{col}_shift{shift}' for col in tmp_calendar.index]
        calendar = pd.concat([
            calendar,
            tmp_calendar
        ], axis=0)
    calendar = calendar[train_cols]
    calendar = torch.FloatTensor(calendar.values.astype(float))
    return calendar

In [5]:
class item_id_store_id_Dataset(torch.utils.data.Dataset):
    def __init__(self, data, calendar):
        self.data = data
        self.calendar = calendar
        self.datanum = len(data)

    def __len__(self):
        return self.datanum

    def __getitem__(self, idx):
        _data = self.data[idx, :, :]
        x = torch.cat((_data, self.calendar), dim=0)
        return x, idx

class indicate_index(torch.utils.data.Dataset):
    def __init__(self, index):
        self.index = index

    def __len__(self):
        return len(self.index)

    def __getitem__(self, idx):
        return idx
    
def _create_batch_data(index, data, calendar):
    _data = data[index, :, :]
    x = torch.tensor([])
    for tmp_x in _data:
        tmp_x = torch.cat((tmp_x, calendar),dim=0)
        x = torch.cat((x,tmp_x.unsqueeze(0)), dim=0)
    return x

class Loss_func_item_id_store_id_(nn.Module):
    def __init__(self, df, cols):
        super(Loss_func_item_id_store_id_, self).__init__()
        last_d = int(cols[-1].replace('d_', ''))
        d_cols = df.columns[df.columns.str.startswith('d_')]
        train_d_cols = last_d-28*2
        self.train_d_cols = d_cols[:train_d_cols]
        test_d_cols = last_d-28
        self.test_d_cols = d_cols[:test_d_cols]
        self._create_denominator(df)
        
    def _create_denominator(self, df):
        
        train_value = df[self.train_d_cols]
        train_value = train_value.values
        train_value = train_value[:,1:]-train_value[:,:-1]
        train_value = train_value**2
        train_value = train_value.mean(1)
        train_value[train_value==0]=1
        self.train_value = torch.FloatTensor(train_value)
        
        test_value = df[self.test_d_cols]
        test_value = test_value.values
        test_value = test_value[:,1:]-test_value[:,:-1]
        test_value = test_value**2
        test_value = test_value.mean(1)
        test_value[test_value==0]=1
        self.test_value = torch.FloatTensor(test_value)
        
    def forward(self, preds, true, idx, train):
        loss = (preds-true)**2
        loss = loss.mean(1)
        loss = loss.squeeze()
        if train:
            loss = loss/self.train_value[idx]
        else:
            loss = loss/self.test_value[idx]
        loss = torch.sqrt(loss)
        loss = loss.mean()
        return loss

class Loss_func_groupid(nn.Module):
    def __init__(self, df, cols, index_index, group_id):
        super(Loss_func_groupid, self).__init__()
        
        self.index_index = index_index
        last_d = int(cols[-1].replace('d_', ''))
        d_cols = df.columns[df.columns.str.startswith('d_')]
        train_d_cols = last_d-28*2
        self.train_d_cols = d_cols[:train_d_cols]
        test_d_cols = last_d-28
        self.test_d_cols = d_cols[:test_d_cols]
        self._create_denominator(df, group_id)
        
    def _create_denominator(self, df, group_id):
        g_df = df.groupby(group_id)#[d_cols].sum()
        
        train_value = g_df[self.train_d_cols].sum()
        train_value = train_value.loc[self.index_index,:]
        train_value = train_value.values
        train_value = train_value[:,1:]-train_value[:,:-1]
        train_value = train_value**2
        train_value = train_value.mean(1)
        train_value[train_value==0]=1
        self.train_value = torch.FloatTensor(train_value)
        
        test_value = g_df[self.test_d_cols].sum()
        test_value = test_value.loc[self.index_index,:]
        test_value = test_value.values
        test_value = test_value[:,1:]-test_value[:,:-1]
        test_value = test_value**2
        test_value = test_value.mean(1)
        test_value[test_value==0]=1
        self.test_value = torch.FloatTensor(test_value)
        
    def forward(self, preds, true, idx, length, train):
        a1=0
        a2=0
        Loss=0
        for i, _len in enumerate(length):
            _idx = idx[i]
            a2=a1+_len
            _preds = preds[a1:a2].sum(0)
            _true = true[a1:a2].sum(0)
            loss = (_preds -_true)**2
            loss = loss.mean()
            loss = loss.squeeze()
            if train:
                loss = loss/self.train_value[_idx]
            else:
                loss = loss/self.test_value[_idx]
            loss = torch.sqrt(loss)
            Loss+=loss/len(length)
            a1=a2
        return Loss

In [6]:
class PreProcessing():
    def __init__(self, path, group_id=None):
        if group_id is None:
            self.is_Group=False
        else:
            self.is_Group=True
        self.group_id = group_id
        self.path=path
        self.df = pd.read_csv(self.path+'sales_train_validation.csv')
        self.calendar_df = pd.read_csv(self.path+'calendar.csv')
        self.sell_prices_df = pd.read_csv(self.path+'sell_prices.csv')
        self.sample_submission_df = pd.read_csv(self.path+'sample_submission.csv')
        
        self.d_cols = self.df.columns[self.df.columns.str.startswith('d_')].values.tolist()
        
        self.train_df, self.calendar_df, self.calendar_data, self.price_data, self.is_sell = preprocessing(
            self.df, self.calendar_df, self.sell_prices_df
        )
        
        
    def make_data_loader(self, cols):
        self.cols = cols
        if self.is_Group:
            self.group_sum_df = self.train_df.groupby(self.group_id)[cols].transform('sum')
        
        state='CA'
        data_ca = self.make_data_g(cols, state)
        state='TX'
        data_tx = self.make_data_g(cols, state)
        state='WI'
        data_wi = self.make_data_g(cols, state)

        data = torch.cat(
            (data_ca, data_tx, data_wi),
            dim=0
        )
        calendar = make_calendar_data(self.calendar_data, cols)
        del data_ca, data_tx, data_wi; gc.collect()
        
        self.in_size=data.size()[1]+calendar.size()[0]
        self.data = data
        self.calendar = calendar

    def make_data_g(self, train_cols, state):
        
        data_train = self.train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']+train_cols]
        train_product = self.sample_submission_df[
            (self.sample_submission_df.id.str.contains(state))&(self.sample_submission_df.id.str.contains('_validation'))
            ].id.values
        
        data = data_train.loc[train_product,train_cols]
        
        
        calendar_index = [ f'snap_{state}']
        event_index = [ f'snap_{state}']
        calendar = self.calendar_data.loc[calendar_index,:]
        for shift in [28, -28]:
            tmp_calendar = calendar.loc[event_index, :]
            tmp_calendar = tmp_calendar.T.shift(shift).T
            tmp_calendar.index = [f'{col}_shift{shift}' for col in tmp_calendar.index]
            calendar = pd.concat([
                calendar,
                tmp_calendar
            ], axis=0)
        calendar = calendar[train_cols]
        
        price = self.price_data.T[train_cols].loc[train_product,:]
        past_price_1 = self.price_data.loc[:,train_product].shift(28).T[train_cols]
        past_price_2 = self.price_data.loc[:,train_product].shift(-28).T[train_cols]
        
        
        is_sell = self.is_sell[train_cols].loc[train_product,:]
        past_is_sell_1 = self.is_sell.T.shift(28).T.loc[train_product, train_cols]
        past_is_sell_2 = self.is_sell.T.shift(-28).T.loc[train_product, train_cols]

        data = torch.FloatTensor(data.values.astype(float))
        
        if self.is_Group:
            group_sum_df = self.group_sum_df.loc[train_product, :]
            group_sum_df = torch.FloatTensor(group_sum_df.values.astype(float))
            
        calendar = torch.FloatTensor(calendar.values.astype(float))
        
        price = torch.FloatTensor(price.values.astype(float))
        
        past_price_1 = torch.FloatTensor(past_price_1.values.astype(float))
        past_price_2 = torch.FloatTensor(past_price_2.values.astype(float))
        
        is_sell = torch.FloatTensor(is_sell.values.astype(float))
        past_is_sell_1 = torch.FloatTensor(past_is_sell_1.values.astype(float))
        past_is_sell_2 = torch.FloatTensor(past_is_sell_2.values.astype(float))
        
        data_list = []
        for idx in range(len(data)):
            if self.is_Group:
                _data = data[[idx],:]
                _group_sum_data = group_sum_df[[idx],:]
                _price = price[[idx],:]

                _past_price_1 = past_price_1[[idx],:]
                _past_price_2 = past_price_2[[idx],:]

                _is_sell = is_sell[[idx],:]

                _past_is_sell_1 = past_is_sell_1[[idx],:]
                _past_is_sell_2 = past_is_sell_2[[idx],:]

                x = torch.cat((
                    _data, _group_sum_data,
                    calendar,
                    _price,
                    _past_price_1, _past_price_2,
                    _is_sell,
                    _past_is_sell_1, _past_is_sell_2,
                ), dim=0)
            else:
                _data = data[[idx],:]
                #_group_sum_data = group_sum_df[[idx],:]
                _price = price[[idx],:]

                _past_price_1 = past_price_1[[idx],:]
                _past_price_2 = past_price_2[[idx],:]

                _is_sell = is_sell[[idx],:]

                _past_is_sell_1 = past_is_sell_1[[idx],:]
                _past_is_sell_2 = past_is_sell_2[[idx],:]

                x = torch.cat((
                    _data,
                    calendar,
                    _price,
                    _past_price_1, _past_price_2,
                    _is_sell,
                    _past_is_sell_1, _past_is_sell_2,
                ), dim=0)
            data_list.append(x.tolist())
        data_list = torch.FloatTensor(data_list)
        return data_list

In [12]:
class Train_Model():        
    def prepare_training_MIX(self, model, Preprocesing_agent, index_index):
        lr = 4e-4
        eta_min = 1e-3
        t_max = 10
        model = model.to(DEVICE)
        criterion_2 = Loss_func_groupid(
            cols=Preprocesing_agent.cols, df=Preprocesing_agent.df, index_index=index_index, group_id=Preprocesing_agent.group_id
        )
        criterion_1 = Loss_func_item_id_store_id_(df=Preprocesing_agent.df, cols=Preprocesing_agent.cols)
        optimizer = RAdam(params=model.parameters(), lr=lr)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)
        return model, criterion_1, criterion_2, optimizer, scheduler

    def train_model_MIX(self, model, Preprocesing_agent):
        Preprocesing_agent.df['index'] = Preprocesing_agent.df.index
        index_df = pd.concat([
            Preprocesing_agent.df.groupby(Preprocesing_agent.group_id)['index'].unique(),
            Preprocesing_agent.df.groupby(Preprocesing_agent.group_id)['index'].nunique()
        ], axis=1)
        index_df.columns=['index', 'length']
        index_df['index'] = index_df['index'].apply(lambda x: x.tolist())

        index_index = index_df.index
        index_df.reset_index(drop=True, inplace=True)
        
        data_set=indicate_index(index_df)
        
        batch_size = int(150/index_df['length'].mean())
        
        data_loader = torch.utils.data.DataLoader(data_set, batch_size = batch_size, shuffle = True)
        
        model, criterion_, criterion_g, optimizer, scheduler = self.prepare_training_MIX(
            model, Preprocesing_agent, index_index
        )
        
        num_epochs = 40
        best_epoch = -1
        best_score = 10000
        early_stoppping_cnt = 0
        best_model = model
        
        
        for epoch in range(num_epochs):
            start_time = time.time()
            
            model.train()
            avg_loss = 0.
            
            for idx in tqdm(data_loader):
                optimizer.zero_grad()
                
                index = sum(index_df.iloc[idx]['index'].values.tolist(),[])
                length = index_df.iloc[idx]['length'].values.tolist()
                x_batch = _create_batch_data(index, Preprocesing_agent.data, Preprocesing_agent.calendar)
                x_batch = x_batch[:,:,:-28]
                gc.collect()
                
                x_batch, y_batch = split_X_y(x_batch)
                x_batch = x_batch.to(DEVICE)
                y_batch = y_batch.to(DEVICE)
                
                preds = model(x_batch)
                
                loss_ = criterion_(preds.cpu(), y_batch.cpu(), index, train=True)
                loss_g = criterion_g(preds.cpu(), y_batch.cpu(), idx, length, train=True)
                loss = 0.45*loss_+0.55*loss_g
                del loss_; gc.collect()
                loss = loss.to(DEVICE)
                
                loss.backward()
                optimizer.step()
                scheduler.step()

                avg_loss += loss_g.item() / len(data_loader)
                del loss_g,loss; gc.collect()
            
            model.eval()
            avg_val_loss = 0.
            
            for idx in data_loader:
                index = sum(index_df.iloc[idx]['index'].values.tolist(),[])
                length = index_df.iloc[idx]['length'].values.tolist()

                x_batch = _create_batch_data(index, Preprocesing_agent.data, Preprocesing_agent.calendar)
                
                x_batch, y_batch = split_X_y(x_batch)
                x_batch = x_batch.to(DEVICE)
                y_batch = y_batch.to(DEVICE)
                
                preds = model(x_batch)
                #loss_ = criterion_(preds.cpu(), y_batch.cpu(), index, train=False)
                loss_g = criterion_g(preds.cpu(), y_batch.cpu(), idx, length, train=False)
                
                avg_val_loss += loss_g.item() / len(data_loader)
                del loss_g; gc.collect()
                
                
            if best_score>avg_val_loss:
                best_score = avg_val_loss
                early_stoppping_cnt=0
                best_epoch=epoch
                best_model = model
                elapsed = time.time() - start_time
                p_avg_val_loss = termcolor.colored(np.round(avg_val_loss, 4),"red")
                
                print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {p_avg_val_loss} time: {elapsed:.0f}s')
            else:
                early_stoppping_cnt+=1
                elapsed = time.time() - start_time
                print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
            
            if (epoch>10) and (early_stoppping_cnt>5):
                    break
        
        print(f'best_score : {best_score}    best_epoch : {best_epoch}')
        #torch.save(best_score.state_dict(), 'net.pt')
        
        return best_model, best_score
    
    
    def prepare_training_item_id_store_id_(self, model, Preprocesing_agent):
        lr = 7e-4
        eta_min = 1e-3
        t_max = 10
        model = model.to(DEVICE)
        criterion = Loss_func_item_id_store_id_(cols=Preprocesing_agent.cols, df=Preprocesing_agent.df)
        optimizer = RAdam(params=model.parameters(), lr=lr)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)
        return model, criterion, optimizer, scheduler
    
    def train_model_item_id_store_id_(self, model, Preprocesing_agent):
        data_set = item_id_store_id_Dataset(Preprocesing_agent.data, Preprocesing_agent.calendar)
        data_loader = torch.utils.data.DataLoader(data_set, batch_size = 150, shuffle = True)
        
        model, criterion, optimizer, scheduler = self.prepare_training_item_id_store_id_(model, Preprocesing_agent)

        num_epochs = 40
        best_epoch = -1
        best_score = 10000
        early_stoppping_cnt = 0
        best_model = model

        for epoch in range(num_epochs):
            start_time = time.time()

            model.train()
            avg_loss = 0.
            #
            for x_batch, idx in tqdm(data_loader):
                optimizer.zero_grad()
                x_batch = x_batch[:,:,:-28]; gc.collect()

                x_batch, y_batch = split_X_y(x_batch)
                x_batch = x_batch.to(DEVICE)
                y_batch = y_batch.to(DEVICE)

                preds = model(x_batch)

                loss = criterion(preds.cpu(), y_batch.cpu(), idx, train=True)
                loss = loss.to(DEVICE)

                loss.backward()
                optimizer.step()
                #scheduler.step()

                avg_loss += loss.item() / len(data_loader)
                del loss; gc.collect()

            model.eval()
            avg_val_loss = 0.

            for x_batch, idx in data_loader:
                x_batch, y_batch = split_X_y(x_batch)
                x_batch = x_batch.to(DEVICE)
                y_batch = y_batch.to(DEVICE)

                preds = model(x_batch)
                loss = criterion(preds.cpu(), y_batch.cpu(), idx, train=False)

                avg_val_loss += loss.item() / len(data_loader)
                del loss; gc.collect()


            if best_score>avg_val_loss:
                best_score = avg_val_loss
                early_stoppping_cnt=0
                best_epoch=epoch+1
                best_model = model
                elapsed = time.time() - start_time
                p_avg_val_loss = termcolor.colored(np.round(avg_val_loss, 4),"red")

                print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {p_avg_val_loss} time: {elapsed:.0f}s')
            else:
                early_stoppping_cnt+=1
                elapsed = time.time() - start_time
                print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')

            if (epoch>10) and (early_stoppping_cnt>7):
                    break

        print(f'best_score : {best_score}    best_epoch : {best_epoch}')
        #torch.save(best_score.state_dict(), 'net.pt')
        return best_model, best_score

In [9]:
#path = '/Users/kanoumotoharu/Downloads/m5-forecasting-accuracy/'
path = '/Users/abcdm/Downloads/m5-forecasting-accuracy/'
#path = '../input/m5-forecasting-accuracy/'

groupId = ['item_id', 'state_id']
#groupId = ['item_id', 'dept_id']

Preprocessing_agent = PreProcessing(path=path, group_id=groupId)
d_cols = Preprocessing_agent.d_cols
cols = d_cols[-150:]
Preprocessing_agent.make_data_loader(cols)
print(Preprocessing_agent.data.size(), Preprocessing_agent.calendar.size())

HBox(children=(FloatProgress(value=0.0, max=30490.0), HTML(value='')))


torch.Size([30490, 11, 150]) torch.Size([26, 150])


NameError: name 'Train_Model' is not defined

In [13]:
model = Conv_1d_Net(Preprocessing_agent.in_size)
Train_model = Train_Model()
best_model, best_score = Train_model.train_model_MIX(model, Preprocessing_agent)

HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 1 - avg_train_loss: 1.1549  avg_val_loss: [31m1.0257[0m time: 67s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 2 - avg_train_loss: 0.9702  avg_val_loss: [31m0.9502[0m time: 65s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))

KeyboardInterrupt: 

In [14]:
#path = '/Users/kanoumotoharu/Downloads/m5-forecasting-accuracy/'
path = '/Users/abcdm/Downloads/m5-forecasting-accuracy/'
#path = '../input/m5-forecasting-accuracy/'

#groupId = ['item_id', 'state_id']
#groupId = ['item_id', 'dept_id']

Preprocessing_agent = PreProcessing(path=path)
d_cols = Preprocessing_agent.d_cols
cols = d_cols[-150:]
Preprocessing_agent.make_data_loader(cols)
print(Preprocessing_agent.data.size(), Preprocessing_agent.calendar.size())

HBox(children=(FloatProgress(value=0.0, max=30490.0), HTML(value='')))


torch.Size([30490, 10, 150]) torch.Size([26, 150])


In [15]:
model = Conv_1d_Net(Preprocessing_agent.in_size)
Train_model = Train_Model()
best_model, best_score = Train_model.train_model_item_id_store_id_(model, Preprocessing_agent)

HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 1 - avg_train_loss: 1.0147  avg_val_loss: [31m0.9224[0m time: 31s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 2 - avg_train_loss: 0.9134  avg_val_loss: [31m0.9145[0m time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 3 - avg_train_loss: 0.9102  avg_val_loss: 0.9155 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 4 - avg_train_loss: 0.9068  avg_val_loss: 0.9148 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 5 - avg_train_loss: 0.9056  avg_val_loss: [31m0.9065[0m time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 6 - avg_train_loss: 0.9037  avg_val_loss: [31m0.9057[0m time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 7 - avg_train_loss: 0.9033  avg_val_loss: 0.9105 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 8 - avg_train_loss: 0.9030  avg_val_loss: 0.9099 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 9 - avg_train_loss: 0.9016  avg_val_loss: 0.9057 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 10 - avg_train_loss: 0.9003  avg_val_loss: [31m0.9014[0m time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 11 - avg_train_loss: 0.8984  avg_val_loss: 0.9177 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 12 - avg_train_loss: 0.8961  avg_val_loss: 0.9424 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 13 - avg_train_loss: 0.8961  avg_val_loss: 0.9557 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 14 - avg_train_loss: 0.8953  avg_val_loss: 0.9561 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 15 - avg_train_loss: 0.8943  avg_val_loss: 0.9340 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))


Epoch 16 - avg_train_loss: 0.8946  avg_val_loss: 0.9463 time: 32s


HBox(children=(FloatProgress(value=0.0, max=204.0), HTML(value='')))

KeyboardInterrupt: 