In [1]:
#path = '/Users/kanoumotoharu/Downloads/m5-forecasting-accuracy/'
path = '/Users/abcdm/Downloads/m5-forecasting-accuracy/'

In [2]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, gc
import math, random
import pickle
import datetime, time
from tqdm import tqdm_notebook as tqdm

import torch 
from torch import nn
from torch import optim

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold
from sklearn.cluster import KMeans

%matplotlib inline

In [3]:
def Preprocessing(train_df, calendar_df, sell_prices_df):
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_validation'
    d_cols = [f'd_{i}' for i in range(1,1914)]
    
    event_type_1 = pd.get_dummies(calendar_df.event_type_1)
    event_type_1.columns = [f'{col}_event_type_1' for col in event_type_1.columns]
    event_type_2 = pd.get_dummies(calendar_df.event_type_1)
    event_type_2.columns = [f'{col}_event_type_2' for col in event_type_2.columns]
    calendar_data = pd.concat([
        calendar_df.drop(columns=['event_name_1', 'event_type_1', 'event_name_2', 'event_type_2'])[['wday', 'd','month','snap_CA', 'snap_TX', 'snap_WI']],
        event_type_1,
        event_type_2
    ], axis=1)
    calendar_data = calendar_data.set_index('d').T
    
    
    
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    
    
    is_sell = price_data.notnull().astype(float).T
    price_data = price_data.fillna(0)
    
    train_df = train_df.T
    train_df.columns = train_df.loc['id', :].values
    train_df = train_df.T
    
    return train_df, calendar_df, calendar_data, price_data, is_sell


def make_calendar_data(calendar_data, train_cols):
    calendar_index = [
        'wday', 'month',
        'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1', 'Sporting_event_type_1',
        'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
    ]
    calendar = calendar_data.loc[calendar_index,:]
    event_index = [
        'Cultural_event_type_1', 'National_event_type_1', 'Religious_event_type_1', 'Sporting_event_type_1',
        'Cultural_event_type_2', 'National_event_type_2', 'Religious_event_type_2', 'Sporting_event_type_2'
    ]
    for shift in [-14, -7, 7, 14, 28, 56]:
        tmp_calendar = calendar.loc[event_index, :]
        tmp_calendar = tmp_calendar.T.shift(-shift).T
        tmp_calendar.index = [f'{col}_shift{shift}' for col in tmp_calendar.index]
        calendar = pd.concat([
            calendar,
            tmp_calendar
        ], axis=0)
    calendar = calendar[train_cols]
    calendar = torch.FloatTensor(calendar.values.astype(float))
    return calendar

def make_data(train_cols, state, train_df, calendar_data, price_data, is_sell_data):
    data_train = train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']+train_cols]
    """
    null = train_df[null_check_cols].isnull().sum(axis=1)
    null = null[null==0].index
    data_train = data_train[data_train.id.isin(null)]
    """
    
    train_product = data_train[data_train.state_id==state]['id'].unique()
    print(len(train_product))
    
    data = data_train.loc[train_product,train_cols]
    
    calendar_index = [ f'snap_{state}']
    event_index = [ f'snap_{state}']
    calendar = calendar_data.loc[calendar_index,:]
    for shift in [-14, -7, 7, 14, 28, 56]:
        tmp_calendar = calendar.loc[event_index, :]
        tmp_calendar = tmp_calendar.T.shift(-shift).T
        tmp_calendar.index = [f'{col}_shift{shift}' for col in tmp_calendar.index]
        calendar = pd.concat([
            calendar,
            tmp_calendar
        ], axis=0)
    calendar = calendar[train_cols]
    
    price = price_data.T[train_cols].loc[train_product,:]
    price_1 = price_data.loc[:,train_product].shift(-3).T[train_cols]
    price_2 = price_data.loc[:,train_product].shift(-7).T[train_cols]
    price_3 = price_data.loc[:,train_product].shift(-14).T[train_cols]

    past_price_1 = price_data.loc[:,train_product].shift(3).T[train_cols]
    past_price_2 = price_data.loc[:,train_product].shift(7).T[train_cols]
    past_price_3 = price_data.loc[:,train_product].shift(14).T[train_cols]
    
    
    is_sell = is_sell_data[train_cols].loc[train_product,:]
    is_sell_1 = is_sell_data.T.shift(-3).T.loc[train_product, train_cols]
    past_is_sell_1 = is_sell_data.T.shift(3).T.loc[train_product, train_cols]
    
    print(
        price_1.isnull().sum().sum(),
        price_2.isnull().sum().sum(),
        price_3.isnull().sum().sum())
    print(
        past_price_1.isnull().sum().sum(), 
        past_price_2.isnull().sum().sum(), 
        past_price_3.isnull().sum().sum()
    )
    print(
        is_sell_1.isnull().sum().sum(), 
        past_is_sell_1.isnull().sum().sum()
    )
    
    data = torch.FloatTensor(data.values.astype(float))
    
    calendar = torch.FloatTensor(calendar.values.astype(float))
    
    price = torch.FloatTensor(price.values.astype(float))
    
    price_1 = torch.FloatTensor(price_1.values.astype(float))
    price_2 = torch.FloatTensor(price_2.values.astype(float))
    price_3= torch.FloatTensor(price_3.values.astype(float))
    
    past_price_1 = torch.FloatTensor(past_price_1.values.astype(float))
    past_price_2 = torch.FloatTensor(past_price_2.values.astype(float))
    past_price_3 = torch.FloatTensor(past_price_3.values.astype(float))
    
    is_sell = torch.FloatTensor(is_sell.values.astype(float))
    is_sell_1 = torch.FloatTensor(is_sell_1.values.astype(float))
    past_is_sell_1 = torch.FloatTensor(past_is_sell_1.values.astype(float))
    
    data_list = []
    for idx in tqdm(range(len(data))):
        _data = data[[idx],:]
        _price = price[[idx],:]
        
        _price_1 = price_1[[idx],:]
        _price_2 = price_2[[idx],:]
        _price_3 = price_3[[idx],:]
        
        _past_price_1 = past_price_1[[idx],:]
        _past_price_2 = past_price_2[[idx],:]
        _past_price_3 = past_price_3[[idx],:]
        
        _is_sell = is_sell[[idx],:]
        _is_sell_1 = is_sell_1[[idx],:]
        _past_is_sell_1 = past_is_sell_1[[idx],:]
        
        x = torch.cat((
            _data, calendar, _price, _price_1, _price_2, _price_3,
            _past_price_1, _past_price_2, _past_price_3,
            _is_sell, _is_sell_1, _past_is_sell_1
        ), dim=0)
        data_list.append(x.tolist())
    data_list = torch.FloatTensor(data_list)
    return data_list  #, calendar, price, price_1, price_2, price_3, past_price_1, past_price_2, past_price_3

In [4]:
class Mydatasets(torch.utils.data.Dataset):
    def __init__(self, data, calendar, train = True):
        self.data = data
        self.calendar = calendar
        self.datanum = len(data)
        self.train = train

    def __len__(self):
        return self.datanum

    def __getitem__(self, idx):
        _data = self.data[idx, :, :]
        x = torch.cat((_data, self.calendar), dim=0)
        if self.train:
            return x[:,:-28], x[0,-28:]
        else:
            return x

In [5]:
def mish(input):
    return input * torch.tanh(nn.functional.softplus(input))

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)

In [6]:
class residual_conv1d(nn.Module):

    def __init__(self, in_channel):
        super(residual_conv1d, self).__init__()
        
        self.mish = Mish()
        self.layer = nn.Sequential(
            nn.Conv1d(in_channel, in_channel, 1),
            Mish(),
            nn.Conv1d(in_channel, in_channel, 1)
        )

    def forward(self, x):
        x = x+self.layer(x)
        x = self.mish(x)
        return x

class Conv_1d_Net(nn.Module):

    def __init__(self, in_channel):
        super(Conv_1d_Net, self).__init__()
        
        self.layer_1 = nn.Sequential(
            nn.Conv1d(in_channel, 2*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(2*in_channel)
        )
        
        self.layer_2 = nn.Sequential(
            nn.Conv1d(2*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
        
        self.layer_3 = nn.Sequential(
            nn.Conv1d(4*in_channel, 8*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(8*in_channel)
        )
       
         
        self.avgpool1d = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(8*in_channel, 8*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(8*in_channel, 16*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(16*in_channel, 28)
        ) 

    def forward(self, x):
        #_in = x.size()[1]
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        #x = self.layer_4(x)
        x = self.avgpool1d(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [7]:
from torch.optim.optimizer import Optimizer
import math

class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

In [8]:
def train_model(model, train_loader, test_loader):
    num_epochs = 40
    lr = 1e-4
    eta_min = 1e-3
    t_max = 10
    numclass = 5
    
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = RAdam(params=model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)


    best_epoch = -1
    best_score = 10000
    early_stoppping_cnt = 0
    best_model = model

    for epoch in range(num_epochs):
        print('epoch', epoch+1)
        start_time = time.time()
        # change model to be train_mode 
        model.train()
        avg_loss = 0.
        #for x_batch, y_batch in progress_bar(train_loader, parent=mb):
        for x_batch, y_batch in tqdm(train_loader):
            optimizer.zero_grad()
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            preds = model(x_batch)
            loss = torch.sqrt(criterion(preds.squeeze(1), y_batch))
            
            
            loss.backward()
            optimizer.step()
            scheduler.step()

            avg_loss += loss.item() / len(train_loader)

            
        # change model to be validation_mode
        model.eval()
        avg_val_loss = 0.

        for i, (x_batch, y_batch) in enumerate(test_loader):
            
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            #print('test')
            
            preds = model(x_batch)
            loss = torch.sqrt(criterion(preds.squeeze(1), y_batch))

            avg_val_loss += loss.item() / len(test_loader)
        
        if (epoch + 1) % 1 == 0:
            elapsed = time.time() - start_time
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
            
        if best_score>avg_val_loss:
            best_score = avg_val_loss
            early_stoppping_cnt=0
            best_epoch=epoch
            best_model = model
        else:
            early_stoppping_cnt+=1
        if (epoch>10) and (early_stoppping_cnt>7):
                break
    
    print(f'best_score : {best_score}    best_epoch : {best_epoch}')
    torch.save(model.state_dict(), 'net.pt')
    
    return best_model, best_score

In [9]:
original_train_df = pd.read_csv(path+'sales_train_validation.csv')
calendar_df = pd.read_csv(path+'calendar.csv')
sell_prices_df = pd.read_csv(path+'sell_prices.csv')
sample_submission_df = pd.read_csv(path+'sample_submission.csv')

In [10]:
%%time
train_df, calendar_df, calendar_data, price_data, is_sell = Preprocessing(original_train_df, calendar_df, sell_prices_df)

HBox(children=(FloatProgress(value=0.0, max=30490.0), HTML(value='')))


Wall time: 33.1 s


In [11]:
%%time
d_cols = [f'd_{i}' for i in range(1,1914)]

n = 200
train_cols = d_cols[-n:]

#'snap_CA', 'snap_TX', 'snap_WI'

state='CA'
data_ca = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='TX'
data_tx = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='WI'
data_wi = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
calendar = make_calendar_data(calendar_data, train_cols)

12196
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=12196.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))


Wall time: 14.5 s


In [12]:
data = torch.cat(
    (data_ca, data_tx, data_wi),
    dim=0
)

del data_ca, data_tx, data_wi
gc.collect()

23

In [19]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [15]:
data.size()

torch.Size([30490, 18, 200])

## cv

In [21]:
in_size = data.size()[1]+ calendar.size()[0]
in_size

76

In [17]:
k = KFold(5, shuffle=True, random_state=2020)

In [23]:
cv_score = 0
for trn_indx, val_indx in k.split([i for i in range(data.size()[0])]):
    trn_data = data[trn_indx, :, :]
    val_data = data[val_indx, :, :]

    trn_data_set=Mydatasets(trn_data, calendar, train = True)
    trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 200, shuffle = True)

    val_data_set=Mydatasets(val_data, calendar, train = True)
    val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = False)
    model = Conv_1d_Net(in_size)
    best_model, best_score = train_model(model, trn_loader, val_loader)
    cv_score+=best_score/5
    gc.collect()
print(cv_score)

epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.0179  avg_val_loss: 2.1720 time: 32s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2570  avg_val_loss: 2.0354 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2399  avg_val_loss: 2.0421 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.2460  avg_val_loss: 2.0017 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2324  avg_val_loss: 1.9922 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2899  avg_val_loss: 2.0048 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2576  avg_val_loss: 2.0230 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2202  avg_val_loss: 2.0123 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2325  avg_val_loss: 2.0386 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1912  avg_val_loss: 2.0003 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.2237  avg_val_loss: 2.0709 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1929  avg_val_loss: 2.0524 time: 33s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.2165  avg_val_loss: 2.0217 time: 33s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.1940  avg_val_loss: 2.0543 time: 33s
best_score : 1.9921816714474407    best_epoch : 4
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.0072  avg_val_loss: 2.1291 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2528  avg_val_loss: 2.1107 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2150  avg_val_loss: 2.0558 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.2369  avg_val_loss: 2.0327 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2165  avg_val_loss: 2.1219 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2111  avg_val_loss: 2.0349 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2007  avg_val_loss: 2.1272 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2463  avg_val_loss: 2.2263 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2647  avg_val_loss: 2.1362 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1936  avg_val_loss: 2.0477 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1921  avg_val_loss: 2.0252 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.2039  avg_val_loss: 2.0440 time: 33s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.1867  avg_val_loss: 2.0034 time: 33s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.1783  avg_val_loss: 2.0676 time: 33s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.2269  avg_val_loss: 2.1669 time: 33s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.2244  avg_val_loss: 1.9904 time: 33s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.1861  avg_val_loss: 2.0684 time: 33s
epoch 17


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 18 - avg_train_loss: 2.1843  avg_val_loss: 2.1902 time: 33s
epoch 18


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 19 - avg_train_loss: 2.2297  avg_val_loss: 2.0292 time: 33s
epoch 19


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 20 - avg_train_loss: 2.1828  avg_val_loss: 1.9993 time: 33s
epoch 20


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 21 - avg_train_loss: 2.1666  avg_val_loss: 1.9984 time: 33s
epoch 21


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 22 - avg_train_loss: 2.1919  avg_val_loss: 2.0833 time: 33s
epoch 22


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 23 - avg_train_loss: 2.1659  avg_val_loss: 1.9983 time: 33s
epoch 23


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 24 - avg_train_loss: 2.1579  avg_val_loss: 2.0429 time: 33s
epoch 24


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 25 - avg_train_loss: 2.1728  avg_val_loss: 2.0918 time: 33s
best_score : 1.9903566563715696    best_epoch : 15
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.0766  avg_val_loss: 2.2236 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2405  avg_val_loss: 2.1116 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2527  avg_val_loss: 2.1033 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.1999  avg_val_loss: 2.0824 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2465  avg_val_loss: 2.1406 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.1784  avg_val_loss: 2.1264 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.1999  avg_val_loss: 2.1779 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2020  avg_val_loss: 2.0750 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.1754  avg_val_loss: 2.1650 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1755  avg_val_loss: 2.1160 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1886  avg_val_loss: 2.1076 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1795  avg_val_loss: 2.0904 time: 34s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.1832  avg_val_loss: 2.1654 time: 33s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.1772  avg_val_loss: 2.2291 time: 33s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.1971  avg_val_loss: 2.2338 time: 33s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.1788  avg_val_loss: 2.1697 time: 33s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.1758  avg_val_loss: 2.0973 time: 33s
best_score : 2.0750006056222756    best_epoch : 7
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.0108  avg_val_loss: 2.2175 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.1933  avg_val_loss: 2.2580 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.1635  avg_val_loss: 2.1988 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.1749  avg_val_loss: 2.1474 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.1627  avg_val_loss: 2.1966 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2105  avg_val_loss: 2.1726 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2198  avg_val_loss: 2.2863 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.1973  avg_val_loss: 2.2212 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.1855  avg_val_loss: 2.2612 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1687  avg_val_loss: 2.2324 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1477  avg_val_loss: 2.1725 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1272  avg_val_loss: 2.1602 time: 33s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.1255  avg_val_loss: 2.1913 time: 33s
best_score : 2.147375897305911    best_epoch : 3
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 2.9035  avg_val_loss: 2.2059 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.1999  avg_val_loss: 2.2140 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.1887  avg_val_loss: 2.1903 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.2251  avg_val_loss: 2.1808 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2197  avg_val_loss: 2.4548 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.1886  avg_val_loss: 2.2277 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.1714  avg_val_loss: 2.1846 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.1873  avg_val_loss: 2.1628 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.1561  avg_val_loss: 2.1522 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1632  avg_val_loss: 2.1708 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1711  avg_val_loss: 2.4923 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1703  avg_val_loss: 2.2516 time: 33s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.1724  avg_val_loss: 2.1426 time: 33s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.1485  avg_val_loss: 2.1298 time: 33s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.1417  avg_val_loss: 2.1782 time: 33s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.1497  avg_val_loss: 2.1538 time: 33s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.1380  avg_val_loss: 2.1346 time: 33s
epoch 17


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 18 - avg_train_loss: 2.1282  avg_val_loss: 2.1577 time: 33s
epoch 18


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 19 - avg_train_loss: 2.1492  avg_val_loss: 2.1771 time: 33s
epoch 19


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 20 - avg_train_loss: 2.1579  avg_val_loss: 2.2465 time: 33s
epoch 20


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 21 - avg_train_loss: 2.1576  avg_val_loss: 2.1844 time: 33s
epoch 21


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 22 - avg_train_loss: 2.1333  avg_val_loss: 2.1098 time: 33s
epoch 22


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 23 - avg_train_loss: 2.1595  avg_val_loss: 2.1649 time: 34s
epoch 23


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 24 - avg_train_loss: 2.1236  avg_val_loss: 2.1211 time: 34s
epoch 24


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 25 - avg_train_loss: 2.1202  avg_val_loss: 2.1299 time: 33s
epoch 25


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 26 - avg_train_loss: 2.1122  avg_val_loss: 2.2182 time: 33s
epoch 26


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 27 - avg_train_loss: 2.1102  avg_val_loss: 2.1769 time: 33s
epoch 27


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 28 - avg_train_loss: 2.0991  avg_val_loss: 2.1555 time: 33s
epoch 28


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 29 - avg_train_loss: 2.0925  avg_val_loss: 2.1395 time: 33s
epoch 29


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 30 - avg_train_loss: 2.0997  avg_val_loss: 2.1579 time: 33s
epoch 30


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 31 - avg_train_loss: 2.0930  avg_val_loss: 2.1061 time: 33s
epoch 31


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 32 - avg_train_loss: 2.0949  avg_val_loss: 2.2221 time: 33s
epoch 32


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 33 - avg_train_loss: 2.0915  avg_val_loss: 2.2690 time: 33s
epoch 33


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 34 - avg_train_loss: 2.0719  avg_val_loss: 2.1575 time: 33s
epoch 34


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 35 - avg_train_loss: 2.0819  avg_val_loss: 2.1289 time: 33s
epoch 35


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 36 - avg_train_loss: 2.0787  avg_val_loss: 2.2089 time: 33s
epoch 36


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 37 - avg_train_loss: 2.0854  avg_val_loss: 2.3088 time: 33s
epoch 37


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 38 - avg_train_loss: 2.0836  avg_val_loss: 2.1567 time: 33s
epoch 38


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 39 - avg_train_loss: 2.2479  avg_val_loss: 2.2978 time: 33s
epoch 39


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 40 - avg_train_loss: 2.1200  avg_val_loss: 2.1138 time: 33s
best_score : 2.1060540627260678    best_epoch : 30
2.062193778694653


In [24]:
%%time
d_cols = [f'd_{i}' for i in range(1,1914)]
train_cols = d_cols[-400:-200]

#'snap_CA', 'snap_TX', 'snap_WI'

state='CA'
data_ca = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='TX'
data_tx = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='WI'
data_wi = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
calendar = make_calendar_data(calendar_data, train_cols)

data = torch.cat(
    (data_ca, data_tx, data_wi),
    dim=0
)

del data_ca, data_tx, data_wi
gc.collect()

cv_score = 0
for trn_indx, val_indx in k.split([i for i in range(data.size()[0])]):
    trn_data = data[trn_indx, :, :]
    val_data = data[val_indx, :, :]

    trn_data_set=Mydatasets(trn_data, calendar, train = True)
    trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 200, shuffle = True)

    val_data_set=Mydatasets(val_data, calendar, train = True)
    val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = False)
    model = Conv_1d_Net(in_size)
    best_model, best_score = train_model(model, trn_loader, val_loader)
    cv_score+=best_score/5
    gc.collect()
print(cv_score)

12196
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=12196.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))


epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.1295  avg_val_loss: 2.1259 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2703  avg_val_loss: 2.0890 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2551  avg_val_loss: 2.2817 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.3325  avg_val_loss: 2.0413 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2818  avg_val_loss: 2.0559 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2944  avg_val_loss: 2.1165 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2684  avg_val_loss: 2.0508 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2897  avg_val_loss: 2.0686 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2403  avg_val_loss: 2.0379 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.2696  avg_val_loss: 2.0474 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.2028  avg_val_loss: 2.0879 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.2474  avg_val_loss: 2.0569 time: 33s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.2235  avg_val_loss: 2.0625 time: 33s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.2401  avg_val_loss: 2.0453 time: 33s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.2500  avg_val_loss: 2.0474 time: 33s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.1993  avg_val_loss: 2.1218 time: 33s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.2358  avg_val_loss: 2.0557 time: 33s
epoch 17


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 18 - avg_train_loss: 2.2150  avg_val_loss: 2.2183 time: 33s
best_score : 2.0378685349323704    best_epoch : 8
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.0050  avg_val_loss: 2.2497 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2888  avg_val_loss: 2.1598 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2863  avg_val_loss: 2.1296 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.2623  avg_val_loss: 2.5893 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2957  avg_val_loss: 2.3421 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2360  avg_val_loss: 2.4133 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2770  avg_val_loss: 2.1481 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2360  avg_val_loss: 2.1775 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2204  avg_val_loss: 2.2372 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.2050  avg_val_loss: 2.1518 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1969  avg_val_loss: 2.1465 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1890  avg_val_loss: 2.1598 time: 33s
best_score : 2.1296333289537275    best_epoch : 2
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 3.0871  avg_val_loss: 2.1136 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2869  avg_val_loss: 2.1081 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2463  avg_val_loss: 2.1254 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.2674  avg_val_loss: 2.0872 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2276  avg_val_loss: 2.0840 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.3416  avg_val_loss: 2.0740 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2273  avg_val_loss: 2.1252 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2224  avg_val_loss: 2.0668 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2459  avg_val_loss: 2.1361 time: 34s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.2676  avg_val_loss: 2.0936 time: 34s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.2299  avg_val_loss: 2.1028 time: 34s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.2367  avg_val_loss: 2.0843 time: 34s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.2350  avg_val_loss: 2.0740 time: 34s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.2119  avg_val_loss: 2.1638 time: 33s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.1868  avg_val_loss: 2.1485 time: 33s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.1752  avg_val_loss: 2.0598 time: 33s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.1859  avg_val_loss: 2.1382 time: 33s
epoch 17


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 18 - avg_train_loss: 2.2118  avg_val_loss: 2.0867 time: 34s
epoch 18


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 19 - avg_train_loss: 2.1771  avg_val_loss: 2.0856 time: 33s
epoch 19


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 20 - avg_train_loss: 2.2079  avg_val_loss: 2.0870 time: 34s
epoch 20


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 21 - avg_train_loss: 2.2086  avg_val_loss: 2.1276 time: 33s
epoch 21


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 22 - avg_train_loss: 2.2064  avg_val_loss: 2.0877 time: 33s
epoch 22


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 23 - avg_train_loss: 2.2251  avg_val_loss: 2.0866 time: 33s
epoch 23


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 24 - avg_train_loss: 2.2115  avg_val_loss: 2.1768 time: 33s
epoch 24


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 25 - avg_train_loss: 2.2048  avg_val_loss: 2.1065 time: 33s
best_score : 2.059828351755611    best_epoch : 15
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 2.9849  avg_val_loss: 2.2240 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.2303  avg_val_loss: 2.1223 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2448  avg_val_loss: 2.1602 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.2553  avg_val_loss: 2.3056 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2183  avg_val_loss: 2.2247 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2424  avg_val_loss: 2.1327 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2133  avg_val_loss: 2.2971 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2409  avg_val_loss: 2.1901 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2771  avg_val_loss: 2.1272 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.2195  avg_val_loss: 2.3170 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.2654  avg_val_loss: 2.1328 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.2220  avg_val_loss: 2.1309 time: 33s
best_score : 2.1223174245631102    best_epoch : 1
epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 2.9663  avg_val_loss: 2.3500 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.1966  avg_val_loss: 2.3374 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.1989  avg_val_loss: 2.2442 time: 34s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.1831  avg_val_loss: 2.2958 time: 34s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2197  avg_val_loss: 2.2848 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.1574  avg_val_loss: 2.2657 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.1622  avg_val_loss: 2.2889 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.1724  avg_val_loss: 2.2741 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.1832  avg_val_loss: 2.3078 time: 34s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1807  avg_val_loss: 2.3318 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1373  avg_val_loss: 2.2650 time: 33s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1481  avg_val_loss: 2.2835 time: 33s
best_score : 2.2442482651257127    best_epoch : 2
2.1187791810661065
Wall time: 43min 45s


In [25]:
%%time
d_cols = [f'd_{i}' for i in range(1,1914)]
train_cols = d_cols[-600:-400]

#'snap_CA', 'snap_TX', 'snap_WI'

state='CA'
data_ca = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='TX'
data_tx = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='WI'
data_wi = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
calendar = make_calendar_data(calendar_data, train_cols)

data = torch.cat(
    (data_ca, data_tx, data_wi),
    dim=0
)

del data_ca, data_tx, data_wi
gc.collect()

cv_score = 0
for trn_indx, val_indx in k.split([i for i in range(data.size()[0])]):
    trn_data = data[trn_indx, :, :]
    val_data = data[val_indx, :, :]

    trn_data_set=Mydatasets(trn_data, calendar, train = True)
    trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 200, shuffle = True)

    val_data_set=Mydatasets(val_data, calendar, train = True)
    val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = False)
    model = Conv_1d_Net(in_size)
    best_model, best_score = train_model(model, trn_loader, val_loader)
    cv_score+=best_score/5
    gc.collect()
print(cv_score)

12196
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=12196.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))


epoch 0


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 1 - avg_train_loss: 2.8548  avg_val_loss: 2.0650 time: 33s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.3133  avg_val_loss: 2.0571 time: 33s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.2865  avg_val_loss: 1.9935 time: 33s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.3637  avg_val_loss: 1.9923 time: 33s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.2917  avg_val_loss: 2.0793 time: 33s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.2795  avg_val_loss: 2.0592 time: 33s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.2859  avg_val_loss: 1.9952 time: 33s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.2762  avg_val_loss: 2.0238 time: 33s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.2612  avg_val_loss: 2.0131 time: 33s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.2632  avg_val_loss: 1.9870 time: 33s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.2568  avg_val_loss: 1.9966 time: 34s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.2781  avg_val_loss: 2.0036 time: 33s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.2845  avg_val_loss: 1.9796 time: 34s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.2543  avg_val_loss: 2.0013 time: 33s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.2591  avg_val_loss: 1.9920 time: 33s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.2856  avg_val_loss: 1.9629 time: 33s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.2811  avg_val_loss: 2.0463 time: 33s
epoch 17


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 18 - avg_train_loss: 2.2732  avg_val_loss: 2.0207 time: 33s
epoch 18


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))


Epoch 19 - avg_train_loss: 2.2432  avg_val_loss: 1.9705 time: 34s
epoch 19


HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))

KeyboardInterrupt: 

In [26]:
%%time
d_cols = [f'd_{i}' for i in range(1,1914)]
train_cols = d_cols[-800:-600]

#'snap_CA', 'snap_TX', 'snap_WI'

state='CA'
data_ca = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='TX'
data_tx = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
state='WI'
data_wi = make_data(train_cols, state, train_df, calendar_data, price_data, is_sell)
calendar = make_calendar_data(calendar_data, train_cols)

data = torch.cat(
    (data_ca, data_tx, data_wi),
    dim=0
)

del data_ca, data_tx, data_wi
gc.collect()

cv_score = 0
for trn_indx, val_indx in k.split([i for i in range(data.size()[0])]):
    trn_data = data[trn_indx, :, :]
    val_data = data[val_indx, :, :]

    trn_data_set=Mydatasets(trn_data, calendar, train = True)
    trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 200, shuffle = True)

    val_data_set=Mydatasets(val_data, calendar, train = True)
    val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = Falselse)
    model = Conv_1d_Net(in_size)
    best_model, best_score = train_model(model, trn_loader, val_loader)
    cv_score+=best_score/5
    gc.collect()
print(cv_score)

12196
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=12196.0), HTML(value='')))


9147
0 0 0
0 0 0
0 0


HBox(children=(FloatProgress(value=0.0, max=9147.0), HTML(value='')))




KeyboardInterrupt: 