In [5]:
import warnings
warnings.filterwarnings("ignore")


import numpy as np
import pandas as pd
import datetime, random, math
from catboost import CatBoostClassifier
import lightgbm as lgb
from time import time
from tqdm import tqdm
from collections import Counter
from scipy import stats
import gc, pickle
import ast
from typing import Union

import torch
from torch import nn
from torch import optim

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold, KFold, RepeatedKFold, GroupKFold, GridSearchCV, train_test_split, TimeSeriesSplit
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, log_loss
from sklearn.linear_model import Ridge,Lasso, BayesianRidge
from sklearn.svm import LinearSVR
from sklearn.preprocessing import minmax_scale
from sklearn.cluster import KMeans
%matplotlib inline

In [2]:
def create_is_sell_data(sell_prices_df, calendar_df, train_df):
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_evaluation'
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(
        lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()
    ).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    is_sell = price_data.notnull().astype(float).T
    price_data = price_data.fillna(0).T
    
    is_sell = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']], is_sell
    ], axis=1)
    price_data = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']], price_data
    ], axis=1)
    
    return price_data, is_sell

def set_index(df, name):
    d = {}
    for col, value in df.iloc[0,:].items():
        try:
            if '_evaluation' in value:
                d[col] = 'id'
            if 'd_' in value:
                d[col] = 'd'
        except:
            if type(value)!=str:
                d[col]=name
    return d

def dcol2int(col):
    if col[:2]=='d_':
        return int(col.replace('d_', ''))
    else:
        return col
    
def str_category_2_int(data):
    categories = [c for c in data.columns if data[c].dtype==object]
    for c in categories:
        if c=='id' or c=='d':
            pass
        else:
            data[c] = pd.factorize(data[c])[0]
            data[c] = data[c].replace(-1, np.nan)
    return data

def select_near_event(x, event_name):
    z = ''
    for y in x:
        if y in event_name:
            z+=y+'_'
    if len(z)==0:
        return np.nan
    else:
        return z
    
def sort_d_cols(d_cols):
    d_cols = [int(d.replace('d_','')) for d in d_cols]
    d_cols = sorted(d_cols)
    d_cols = [f'd_{d}' for d in d_cols]
    return d_cols

In [3]:
def preprocessing(path, d_cols, train_d_cols):
    train_df = pd.read_csv(path+'sales_train_evaluation.csv')
    calendar_df = pd.read_csv(path+'calendar.csv')
    sell_prices_df = pd.read_csv(path+'sell_prices.csv')
    sample_submission_df = pd.read_csv(path+'sample_submission.csv')
    
    train_df.index = train_df.id
    calendar_df['date']=pd.to_datetime(calendar_df.date)
    calendar_df.index = calendar_df.d
    price_data, is_sell = create_is_sell_data(sell_prices_df, calendar_df, train_df)
    
    str_cols = [ col for col in train_df.columns if 'id' in str(col)]
    new_columns = str_cols+d_cols
    train_df = train_df.reindex(columns=new_columns)
    
    train_df = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']],
        train_df.loc[train_df.index,d_cols]*price_data.loc[train_df.index,d_cols]
    ], axis=1)
    
    
    df = train_df.loc[:,d_cols].T.astype(float)
    a = df.loc[d_cols[28:-140]].rolling(28, min_periods=1).sum().replace(0,np.nan)+df.loc[d_cols[28:-140]][::-1].rolling(28, min_periods=1).sum()[::-1].replace(0,np.nan)
    a[a.notnull()]=0
    df.loc[d_cols[28:-140]] += a
    df = df.loc[d_cols,:].T.astype(float)
    del a;gc.collect()
    
    
    data = train_df[train_d_cols].stack(dropna=False).reset_index()
    data = data.rename(columns=set_index(data, 'TARGET'))
    data.reset_index(drop=True, inplace=True)
    
    is_sell_data = is_sell[train_d_cols].stack(dropna=False).reset_index()
    is_sell_data = is_sell_data.rename(columns=set_index(is_sell_data, 'is_sell'))
    data = pd.merge(data, is_sell_data, on=['id', 'd'], how='left')
    for groups in [['dept_id', 'store_id'], ['cat_id', 'store_id']]:
        _id = '_'.join(groups)
        is_sell_data = is_sell.groupby(groups)[train_d_cols].transform('sum').stack(dropna=False).reset_index()
        is_sell_data = is_sell_data.rename(columns=set_index(is_sell_data, f'is_sell_cnt_{_id}'))
        data = pd.merge(data, is_sell_data, on=['id', 'd'], how='left')
    del is_sell, is_sell_data;gc.collect()
    
    data = reduce_mem_usage(data)
    
    for key, value in train_df[['dept_id', 'cat_id', 'state_id', 'store_id']].to_dict().items():
        data[key] = data.id.map(value)
    
    #snap_data
    snap_data = calendar_df[['snap_CA', 'snap_WI', 'snap_TX', 'd']]
    snap_data.set_index('d', inplace=True)
    data[f'snap']=0
    for key, value in snap_data.to_dict().items():
        k = key.replace('snap_', '')
        data.loc[data.state_id==k,'snap'] = data.loc[data.state_id==k, 'd'].map(value).fillna(0)
    
    #dept_id_price
    dept_id_price = price_data[train_d_cols]/price_data.groupby(['dept_id', 'store_id'])[train_d_cols].transform('mean')
    dept_id_price = dept_id_price.T.astype(float)
    dept_id_price = dept_id_price.replace(0,np.nan)
    
    #cat_id_price
    cat_id_price = price_data[train_d_cols]/price_data.groupby(['cat_id', 'store_id'])[train_d_cols].transform('mean')
    cat_id_price = cat_id_price.T.astype(float)
    cat_id_price = cat_id_price.replace(0,np.nan)
    
    #price_data
    price_data = price_data[train_d_cols].T.astype(float)
    price_data.replace(0,np.nan, inplace=True)
    
    dept_id_price = dept_id_price.stack(dropna=False).reset_index()
    cat_id_price = cat_id_price.stack(dropna=False).reset_index()
    price_data = price_data.stack(dropna=False).reset_index()
    
    dept_id_price.rename(columns=set_index(dept_id_price, 'dept_id_price'), inplace=True)
    cat_id_price.rename(columns=set_index(cat_id_price, 'cat_id_price'), inplace=True)
    price_data.rename(columns=set_index(price_data, 'price'), inplace=True)

    data = pd.merge(data, dept_id_price, on=['d', 'id'], how='left')
    data = pd.merge(data, cat_id_price, on=['d', 'id'], how='left')
    data = pd.merge(data, price_data, on=['d', 'id'], how='left')
    
    event_name = ['SuperBowl', 'ValentinesDay', 'PresidentsDay', 'LentStart', 'LentWeek2', 'StPatricksDay', 'Purim End', 
              'OrthodoxEaster', 'Pesach End', 'Cinco De Mayo', "Mother's day", 'MemorialDay', 'NBAFinalsStart', 'NBAFinalsEnd',
              "Father's day", 'IndependenceDay', 'Ramadan starts', 'Eid al-Fitr', 'LaborDay', 'ColumbusDay', 'Halloween', 
              'EidAlAdha', 'VeteransDay', 'Thanksgiving', 'Christmas', 'Chanukah End', 'NewYear', 'OrthodoxChristmas', 
              'MartinLutherKingDay', 'Easter']
    event_type = ['Sporting', 'Cultural', 'National', 'Religious']
    event_names = {'event_name_1':event_name, 'event_type_1':event_type}
    for event, event_name in event_names.items():
        for w in [4]:
            calendar_df[f'new_{event}_{w}']=''
            for i in range(-1,-(w+1),-1):
                calendar_df[f'new_{event}_{w}'] += calendar_df[event].shift(i).astype(str)+'|'
            calendar_df[f'new_{event}_{w}'] = calendar_df[f'new_{event}_{w}'].apply(lambda x: x.split('|'))
            calendar_df[f'new_{event}_{w}'] = calendar_df[f'new_{event}_{w}'].apply(lambda x: select_near_event(x, event_name))

    #calendar_dict
    cols = ['new_event_name_1_4', 'new_event_type_1_4', 'wday', 'month', 'year', 'event_name_1','event_type_1']
    for key, value in calendar_df[cols].to_dict().items():
        data[key] = data.d.map(value)
    for shift in [-1,1]:
        data[f'snap_{shift}'] = data.groupby(['id'])['snap'].shift(shift)
    
    return data

In [4]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                       df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [282]:
class Mydatasets(torch.utils.data.Dataset):
    def __init__(self, data, price, price_3, price_7, price_14, past_price_3, past_price_7, past_price_10, calendar, train = True):
        self.data = data
        self.price = price
        
        self.price_3 = price_3
        self.price_7 = price_7
        self.price_14 = price_14
        
        self.past_price_3 = past_price_3
        self.past_price_7 = past_price_7
        self.past_price_10 = past_price_10
        
        
        self.calendar = calendar
        self.datanum = len(data)
        self.train = train

    def __len__(self):
        return self.datanum

    def __getitem__(self, idx):
        _data = self.data[[idx],:-28]
        _price = self.price[[idx],:-28]
        
        _price_3 = self.price_3[[idx],:-28]
        _price_7 = self.price_7[[idx],:-28]
        _price_14 = self.price_14[[idx],:-28]
        
        _past_price_3 = self.past_price_3[[idx],:-28]
        _past_price_7 = self.past_price_7[[idx],:-28]
        _past_price_10 = self.past_price_10[[idx],:-28]
        
        
        x = torch.cat((_data, self.calendar, _price, _price_3, _price_7, _price_14, _past_price_3, _past_price_7, _past_price_10), dim=0)
        if self.train:
            label = self.data[idx, -28:]
            return x, label
        else:
            return x

In [285]:
data_set=Mydatasets(data, price, price_3, price_7, price_14, past_price_3, past_price_7, past_price_10, calendar, train = True)
trainloader = torch.utils.data.DataLoader(data_set, batch_size = 100, shuffle = True)
a = 0
for x, y in tqdm(trainloader):
    if a<4:
        print(x.size(), y.size())
    a+=1

HBox(children=(IntProgress(value=0, max=122), HTML(value='')))

torch.Size([100, 51, 172]) torch.Size([100, 28])
torch.Size([100, 51, 172]) torch.Size([100, 28])
torch.Size([100, 51, 172]) torch.Size([100, 28])
torch.Size([100, 51, 172]) torch.Size([100, 28])



In [286]:
x.size()

torch.Size([57, 51, 172])

In [44]:
def mish(input):
    return input * torch.tanh(nn.functional.softplus(input))

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)

In [62]:
em = nn.Embedding(10,10, padding_idx=0)
a = torch.randint(0,10, (10,10))
a = em(a)
a.size()

torch.Size([10, 10, 10])

In [50]:
lstm = nn.GRU(input_size=10,hidden_size=30,num_layers=2,batch_first=True, bidirectional=True)

In [61]:
b = lstm(a)[0]
b.flatten(1).size()

torch.Size([10, 600])

torch.Size([10, 600])

In [None]:
class MyNet(nn.Module):

    def __init__(self, in_channel):
        super(Conv_1d_Net, self).__init__()
        
        self.conv_layer = nn.Sequential(
            nn.Conv1d(1, 5, 1),
            Mish(),
            nn.Conv1d(5, 10, 1),
            Mish(),
            nn.Conv1d(10, 20, 1)
        )
        
        self.event_lstm()
        
       
        self.layer_4 = nn.Sequential(
            nn.Conv1d(4*in_channel, 8*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(8*in_channel)
        )
       
         
        self.avgpool1d = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(8*in_channel, 16*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(16*in_channel, 32*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(32*in_channel, 28),
            
            
            nn.ReLU()
        ) 

    def forward(self, x):
        #_in = x.size()[1]
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.avgpool1d(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [289]:
class Conv_1d_Net(nn.Module):

    def __init__(self, in_channel):
        super(Conv_1d_Net, self).__init__()
        
        self.layer_1 = nn.Sequential(
            nn.Conv1d(in_channel, 2*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(2*in_channel)
        )
        
        self.layer_2 = nn.Sequential(
            nn.Conv1d(2*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
        
        self.layer_3 = nn.Sequential(
            nn.Conv1d(4*in_channel, 4*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(4*in_channel)
        )
       
        self.layer_4 = nn.Sequential(
            nn.Conv1d(4*in_channel, 8*in_channel, 1),
            nn.Dropout(0.2),
            Mish(),
            residual_conv1d(8*in_channel)
        )
       
         
        self.avgpool1d = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(8*in_channel, 16*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(16*in_channel, 32*in_channel),
            nn.Dropout(0.1),
            Mish(),
            nn.Linear(32*in_channel, 28),
            
            
            nn.ReLU()
        ) 

    def forward(self, x):
        #_in = x.size()[1]
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.avgpool1d(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [290]:
from torch.optim.optimizer import Optimizer
import math

class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

In [291]:
def train_model(model, train_loader, test_loader):
    num_epochs = 40
    lr = 1e-4
    eta_min = 1e-3
    t_max = 10
    numclass = 5
    
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = RAdam(params=model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)


    best_epoch = -1

    for epoch in range(num_epochs):
        print('epoch', epoch)
        start_time = time.time()
        # change model to be train_mode 
        model.train()
        avg_loss = 0.
        #for x_batch, y_batch in progress_bar(train_loader, parent=mb):
        for x_batch, y_batch in tqdm(train_loader):
            optimizer.zero_grad()
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            preds = model(x_batch)
            loss = torch.sqrt(criterion(preds.squeeze(1), y_batch))
            
            
            loss.backward()
            optimizer.step()
            scheduler.step()

            avg_loss += loss.item() / len(train_loader)

            
        # change model to be validation_mode
        model.eval()
        avg_val_loss = 0.

        for i, (x_batch, y_batch) in enumerate(test_loader):
            
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            #print('test')
            
            preds = model(x_batch)
            loss = torch.sqrt(criterion(preds.squeeze(1), y_batch))

            avg_val_loss += loss.item() / len(test_loader)

        if (epoch + 1) % 1 == 0:
            elapsed = time.time() - start_time
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
    
    
    torch.save(model.state_dict(), 'net.pt')
    
    return model

In [294]:
trn_indx, val_indx = train_test_split([i for i in range(data.size()[0])], test_size=0.3)

In [295]:
#data = torch.log1p(data)

In [297]:
trn_data = data[trn_indx,:]
trn_price = price[trn_indx,:]
trn_price_3 = price_3[trn_indx,:]
trn_price_7 = price_7[trn_indx,:]
trn_price_14 = price_14[trn_indx,:]
trn_past_price_3 = past_price_3[trn_indx,:]
trn_past_price_7 = past_price_7[trn_indx,:]
trn_past_price_10 = past_price_10[trn_indx,:]


val_data = data[val_indx,:]
val_price = price[val_indx,:]
val_price_3 = price_3[val_indx,:]
val_price_7 = price_7[val_indx,:]
val_price_14 = price_14[val_indx,:]
val_past_price_3 = past_price_3[val_indx,:]
val_past_price_7 = past_price_7[val_indx,:]
val_past_price_10 = past_price_10[val_indx,:]

In [299]:
del data, price, price_14, price_28, price_7, past_price_3, past_price_7, past_price_10

NameError: name 'data' is not defined

In [300]:
gc.collect()

166

In [307]:
trn_data_set=Mydatasets(trn_data, trn_price, trn_price_3, trn_price_7, trn_price_14, 
                        trn_past_price_3, trn_past_price_7, trn_past_price_10, calendar, train = True)
trn_loader = torch.utils.data.DataLoader(trn_data_set, batch_size = 100, shuffle = True)

val_data_set=Mydatasets(val_data, val_price, val_price_3,  val_price_7, val_price_14, 
                        val_past_price_3, val_past_price_7, val_past_price_10, calendar, train = True)
val_loader = torch.utils.data.DataLoader(val_data_set, batch_size = 50, shuffle = True)

In [308]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [309]:
gc.collect()

8

In [310]:
model = Conv_1d_Net(51)
model(x).size()
y.size()

torch.Size([57, 28])

In [311]:
model = train_model(model, trn_loader, val_loader)

epoch 0


HBox(children=(IntProgress(value=0, max=86), HTML(value='')))

KeyboardInterrupt: 

In [256]:
model = train_model(model, trn_loader, val_loader)

epoch 0


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 1 - avg_train_loss: 2.1203  avg_val_loss: 2.1032 time: 5s
epoch 1


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 2 - avg_train_loss: 2.1318  avg_val_loss: 2.1211 time: 5s
epoch 2


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 3 - avg_train_loss: 2.0964  avg_val_loss: 2.1512 time: 5s
epoch 3


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 4 - avg_train_loss: 2.1211  avg_val_loss: 2.1076 time: 5s
epoch 4


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 5 - avg_train_loss: 2.1115  avg_val_loss: 2.1282 time: 5s
epoch 5


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 6 - avg_train_loss: 2.1619  avg_val_loss: 2.1404 time: 5s
epoch 6


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 7 - avg_train_loss: 2.1773  avg_val_loss: 2.1353 time: 5s
epoch 7


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 8 - avg_train_loss: 2.1250  avg_val_loss: 2.1122 time: 5s
epoch 8


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 9 - avg_train_loss: 2.1526  avg_val_loss: 2.1456 time: 5s
epoch 9


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 10 - avg_train_loss: 2.1336  avg_val_loss: 2.1213 time: 5s
epoch 10


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 11 - avg_train_loss: 2.1516  avg_val_loss: 2.1633 time: 5s
epoch 11


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 12 - avg_train_loss: 2.1312  avg_val_loss: 2.1044 time: 5s
epoch 12


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 13 - avg_train_loss: 2.1465  avg_val_loss: 2.1850 time: 5s
epoch 13


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 14 - avg_train_loss: 2.1497  avg_val_loss: 2.1517 time: 5s
epoch 14


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 15 - avg_train_loss: 2.1298  avg_val_loss: 2.0950 time: 5s
epoch 15


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 16 - avg_train_loss: 2.1300  avg_val_loss: 2.1377 time: 5s
epoch 16


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 17 - avg_train_loss: 2.1140  avg_val_loss: 2.0829 time: 5s
epoch 17


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 18 - avg_train_loss: 2.1511  avg_val_loss: 2.1445 time: 5s
epoch 18


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 19 - avg_train_loss: 2.1123  avg_val_loss: 2.1728 time: 5s
epoch 19


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 20 - avg_train_loss: 2.1323  avg_val_loss: 2.1651 time: 5s
epoch 20


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 21 - avg_train_loss: 2.1109  avg_val_loss: 2.0819 time: 5s
epoch 21


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 22 - avg_train_loss: 2.1213  avg_val_loss: 2.2155 time: 5s
epoch 22


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 23 - avg_train_loss: 2.1169  avg_val_loss: 2.1592 time: 5s
epoch 23


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 24 - avg_train_loss: 2.1202  avg_val_loss: 2.1458 time: 5s
epoch 24


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 25 - avg_train_loss: 2.1313  avg_val_loss: 2.1328 time: 5s
epoch 25


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 26 - avg_train_loss: 2.1576  avg_val_loss: 2.1006 time: 5s
epoch 26


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 27 - avg_train_loss: 2.1172  avg_val_loss: 2.1809 time: 5s
epoch 27


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 28 - avg_train_loss: 2.1501  avg_val_loss: 2.1678 time: 5s
epoch 28


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 29 - avg_train_loss: 2.1274  avg_val_loss: 2.1778 time: 5s
epoch 29


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 30 - avg_train_loss: 2.1475  avg_val_loss: 2.2173 time: 5s
epoch 30


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 31 - avg_train_loss: 2.1217  avg_val_loss: 2.1851 time: 5s
epoch 31


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 32 - avg_train_loss: 2.1302  avg_val_loss: 2.0814 time: 5s
epoch 32


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 33 - avg_train_loss: 2.1178  avg_val_loss: 2.2346 time: 5s
epoch 33


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 34 - avg_train_loss: 2.1308  avg_val_loss: 2.2164 time: 5s
epoch 34


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 35 - avg_train_loss: 2.1627  avg_val_loss: 2.1959 time: 5s
epoch 35


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 36 - avg_train_loss: 2.1414  avg_val_loss: 2.1569 time: 5s
epoch 36


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 37 - avg_train_loss: 2.1729  avg_val_loss: 2.1529 time: 5s
epoch 37


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 38 - avg_train_loss: 2.1314  avg_val_loss: 2.1827 time: 5s
epoch 38


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 39 - avg_train_loss: 2.1931  avg_val_loss: 2.1199 time: 5s
epoch 39


HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))


Epoch 40 - avg_train_loss: 2.1369  avg_val_loss: 2.1757 time: 5s
