In [2]:
from IPython.core.display import display, HTML

import pandas as pd
import numpy as np
from scipy import stats
import random
import glob
import os
import gc

from joblib import Parallel, delayed

from sklearn import preprocessing, model_selection
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

path_root = './'
data_dir ='../input/optiver-realized-volatility-prediction/'
path_submissions = '/'

target_name = 'target'

DEBUG = False


In [3]:
def log_return(list_stock_prices):
    return np.log(list_stock_prices).diff() 

def realized_volatility(series_log_return):
    return np.sqrt(np.sum(series_log_return**2))

def realized_mad(series_log_return):
    return np.mean(np.absolute(series_log_return - np.mean(series_log_return)))

def realized_median_abs_dev(series_log_return):
    return stats.median_absolute_deviation(series_log_return, nan_policy='omit')

def rmspe(y_true, y_pred):
    return  (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true))))

def calc_wap(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1'])/(df['bid_size1'] + df['ask_size1'])
    return wap

def calc_wap2(df):
    wap = (df['bid_price2'] * df['ask_size2'] + df['ask_price2'] * df['bid_size2'])/(df['bid_size2'] + df['ask_size2'])
    return wap

def count_unique(series):
    return len(np.unique(series))

In [4]:
def preprocessor_book(file_path):
    df = pd.read_parquet(file_path)
    
    df['wap'] = calc_wap(df)
    df['log_return'] = df.groupby('time_id')['wap'].apply(log_return)
    
    df['wap2'] = calc_wap(df)
    df['log_return2'] = df.groupby('time_id')['wap2'].apply(log_return)
    
    df['wap_imbalance'] = abs(df['wap'] - df['wap2'])
    
    df['spread'] = (df['ask_price1'] - df['bid_price1']) / ((df['ask_price1'] + df['bid_price1'])/2)
    
    df['bid_spread'] = df['bid_price1'] - df['bid_price2']
    df['ask_spread'] = df['ask_price1'] - df['ask_price2']
    df['total_volume'] = (df['ask_size1'] + df['ask_size2']) + (df['bid_size1'] + df['bid_size2'])
    df['volume_imbalance'] = abs((df['ask_size1'] + df['ask_size2']) - (df['bid_size1'] + df['bid_size2']))
    
    agg_dict = {
        'log_return':[realized_volatility,realized_mad,realized_median_abs_dev],
        'log_return2':[realized_volatility,realized_mad,realized_median_abs_dev],
        'wap_imbalance':[np.mean],
        'spread':[np.mean],
        'bid_spread':[np.mean],
        'ask_spread':[np.mean],
        'volume_imbalance':[np.mean],
        'total_volume':[np.mean],
        'wap':[np.mean],
    }
    
    
    df_feature = pd.DataFrame(df.groupby(['time_id']).agg(agg_dict)).reset_index()
    
    df_feature.columns = ['_'.join(col) for col in df_feature.columns] #time_id is changed to time_id_
        
    #create row_id
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature = df_feature.drop(['time_id_'],axis=1)
    
    return df_feature
    

In [5]:
def preprocessor_trade(file_path):
    df = pd.read_parquet(file_path)
    df['log_return'] = df.groupby('time_id')['price'].apply(log_return)
    df['dollar_volume'] = df['price'] * df['size']
    
    
    agg_dict = {
        'log_return':[realized_volatility,realized_mad,realized_median_abs_dev],
        'seconds_in_bucket':[count_unique],
        'size':[np.sum],
        'order_count':[np.mean],
        'dollar_volume':[np.sum],
    }
    
    df_feature = df.groupby('time_id').agg(agg_dict).reset_index()
    
    df_feature.columns = ['_'.join(col) for col in df_feature.columns]

    
    df_feature = df_feature.add_prefix('trade_')
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['trade_time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature = df_feature.drop(['trade_time_id_'],axis=1)
    
    return df_feature

In [6]:
def preprocessor(list_stock_ids, is_train = True):
    from joblib import Parallel, delayed # parallel computing to save time
    df = pd.DataFrame()
    
    def for_joblib(stock_id):
        if is_train:
            file_path_book = data_dir + "book_train.parquet/stock_id=" + str(stock_id)
            file_path_trade = data_dir + "trade_train.parquet/stock_id=" + str(stock_id)
        else:
            file_path_book = data_dir + "book_test.parquet/stock_id=" + str(stock_id)
            file_path_trade = data_dir + "trade_test.parquet/stock_id=" + str(stock_id)
            
        df_tmp = pd.merge(preprocessor_book(file_path_book),preprocessor_trade(file_path_trade),on='row_id',how='left')
     
        return pd.concat([df,df_tmp])
    
    df = Parallel(n_jobs=-1, verbose=1)(
        delayed(for_joblib)(stock_id) for stock_id in list_stock_ids
        )

    df =  pd.concat(df,ignore_index = True)
    return df

In [7]:
train = pd.read_csv(os.path.join(data_dir,'train.csv'))

In [8]:
df_train = pd.read_csv('../input/filtered-train-data/train_processed.csv')

In [9]:
test = pd.read_csv(os.path.join(data_dir,'test.csv'))
test_ids = test.stock_id.unique()

In [10]:
%%time
df_test = preprocessor(list_stock_ids=test_ids, is_train=False)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


CPU times: user 43.6 ms, sys: 23.1 ms, total: 66.7 ms
Wall time: 1.26 s


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.2s finished


In [11]:
df_test = test.merge(df_test, on=['row_id'], how='left')

In [12]:
df_train['stock_id'] = df_train['row_id'].apply(lambda x:x.split('-')[0])
df_test['stock_id'] = df_test['row_id'].apply(lambda x:x.split('-')[0])
df_train['time_id'] = df_train['row_id'].apply(lambda x:x.split('-')[1])
df_test['time_id'] = df_test['row_id'].apply(lambda x:x.split('-')[1])

In [13]:
#PyTorch 

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data

In [14]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [15]:
class OptiveDataset(Dataset):
    def __init__(self, X, Y, emb_cols=['stock_id', 'time_id']):
        X = X.copy()
        self.X1 = X.loc[:,emb_cols].copy().values.astype(np.int64) #categorical columns
        self.X2 = X.drop(columns=emb_cols).copy().values.astype(np.float32) #numerical columns
        self.y = Y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return (self.X1[idx], self.X2[idx]), self.y[idx]
    
class OptiveDatasetTest(Dataset):
    def __init__(self, X, emb_cols=['stock_id', 'time_id']):
        X = X.copy()
        self.X1 = X.loc[:,emb_cols].copy().values.astype(np.int64) #categorical columns
        self.X2 = X.drop(columns=emb_cols).copy().values.astype(np.float32) #numerical columns
        
    def __len__(self):
        return len(self.X1)
    
    def __getitem__(self, idx):
        return (self.X1[idx], self.X2[idx])

In [16]:
df_train = df_train.fillna(0)
df_test = df_test.fillna(0)

In [17]:
train_dataset = OptiveDataset(df_train.drop(['target', 'time_id','row_id'], axis=1), df_train['target'], emb_cols=['stock_id'])
train_dl = DataLoader(train_dataset, batch_size=4, shuffle=True)

#test the dataset class
for (emb, count), target in train_dl:
    print((emb.shape, count.shape), target.shape)
    break;

(torch.Size([4, 1]), torch.Size([4, 20])) torch.Size([4])


In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2))

def RMSPELoss(y_pred, y_true):
    return torch.sqrt(torch.mean( ((y_true - y_pred) / y_true) ** 2 ))

def train_epoch(train_dl, valid_dl, model, loss_fn, opt, sch, epoch, fold, device=device):
    # taining loop
    model.train()
    running_loss_ = 0
    
    pbar = tqdm(enumerate(train_dl), total=len(train_dl))
    for i, ((cats, counts), targets) in pbar:
        cats, counts, targets = cats.to(device), counts.to(device), targets.unsqueeze(1).to(device)
        
        opt.zero_grad()
        y_pred = model(cats, counts)
        loss = loss_fn(y_pred.float(), targets.float())
        
        loss.backward()
        opt.step()
        
        running_loss_ += loss.item()
        if (i+1) % 100 == 0:
            pbar.set_description(f"running loss:{running_loss_ / (i+1): 0.6f}")
    
    sch.step(loss)

    epoch_loss = running_loss_ / len(train_dl)
    #print(f'==> Epoch {epoch} TRAIN loss: {epoch_loss:.6f}')
    
    # Validation loop
    model.eval()
    valid_loss = 0
    best_loss = np.inf
    
    for i, ((cats, counts), targets) in enumerate(valid_dl):
        cats, counts, targets = cats.to(device), counts.to(device), targets.unsqueeze(1).to(device)
        
        with torch.no_grad():
            y_pred = model(cats, counts)
            val_loss = loss_fn(y_pred.float(), targets.float())
            
        valid_loss += val_loss.item() * targets.shape[0]
    sch.step(valid_loss)
    
    valid_epoch_loss = valid_loss / len(valid_dl)
    print(f'==>F{fold}, Epoch {epoch} VALID loss: {valid_epoch_loss:.8f}')
    
    if valid_epoch_loss < best_loss:
        best_loss = valid_epoch_loss
        torch.save(model.state_dict(), f'FOLD{fold}_optive_model.pth')
    
    model.train()
    return model, epoch_loss, valid_epoch_loss

In [19]:
def perpare_dataset(train, valid, test=None, batch_size=64, drop_cols=['target', 'time_id', 'row_id'], emb_cols=['stock_id']):
    train_dataset = OptiveDataset(train.drop(drop_cols, axis=1), train['target'], emb_cols=emb_cols)
    valid_dataset = OptiveDataset(valid.drop(drop_cols, axis=1), valid['target'], emb_cols=emb_cols)    
    
    train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_dl = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    
    return train_dl, valid_dl

In [20]:
torch.cuda.get_device_name()

'Tesla P100-PCIE-16GB'

In [21]:
class OptiverModel(nn.Module):
    def __init__(self, embedding_sizes=16, num_embeddings=max(df_train['stock_id'].astype(np.int8))+1):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_sizes)
        self.emb_drop = nn.Dropout(0.25)
        
        self.bn1 = nn.BatchNorm1d(20)
        self.lin1 = nn.Linear(embedding_sizes+20, 32)
        self.lin2 = nn.Linear(32, 16)
        self.lin3 = nn.Linear(16, 8)
        self.lin4 = nn.Linear(8, 4)
        self.lin5 = nn.Linear(4, 1)
        
        

    def forward(self, x_cat, x_cont):
        x1 = self.emb(x_cat)
        x1 = torch.flatten(x1, end_dim=1)
        x1 = self.emb_drop(x1)
        x2 = self.bn1(x_cont)
        x = torch.cat([x1, x2], 1)
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = F.relu(self.lin3(x))
        x = F.relu(self.lin4(x))
        x = self.lin5(x)
        x = torch.sigmoid(x)
        
        
        return x

In [25]:
n_folds = 10
epochs = 10

kf = model_selection.KFold(n_splits=n_folds, shuffle=True, random_state=42)
seed_everything(46)

for fold_idx, (dev_index, val_index) in enumerate(kf.split(range(len(df_train)))):
    
    if fold_idx > 4:
        break #train 5 folds
        
    train_ = df_train.loc[dev_index,].reset_index(drop=True)
    valid_ = df_train.loc[val_index, ].reset_index(drop=True)
    
    train_dl, valid_dl = perpare_dataset(train_, valid_)
    
    model = OptiverModel(embedding_sizes=29,).to(device)
    loss_fn = RMSELoss
    
    opt = optim.Adam(model.parameters(), lr=0.01)
    sch = optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.2, patience=3)
    
    counter = 0
    for epoch in range(epochs):
        model, epoch_loss, valid_epoch_loss = train_epoch(train_dl, valid_dl, 
                                                                   model, loss_fn, opt, 
                                                                   sch, epoch, fold_idx, device=device)

running loss: 0.006054: 100%|██████████| 5411/5411 [00:26<00:00, 203.39it/s]
  0%|          | 18/5411 [00:00<00:30, 175.31it/s]

==>F0, Epoch 0 VALID loss: 0.31272788


running loss: 0.004926: 100%|██████████| 5411/5411 [00:25<00:00, 210.12it/s]
  0%|          | 17/5411 [00:00<00:33, 162.08it/s]

==>F0, Epoch 1 VALID loss: 0.31289477


running loss: 0.004925: 100%|██████████| 5411/5411 [00:27<00:00, 199.98it/s]
  0%|          | 16/5411 [00:00<00:34, 156.17it/s]

==>F0, Epoch 2 VALID loss: 0.31288310


running loss: 0.004925: 100%|██████████| 5411/5411 [00:26<00:00, 200.68it/s]
  0%|          | 18/5411 [00:00<00:30, 176.06it/s]

==>F0, Epoch 3 VALID loss: 0.31288704


running loss: 0.004925: 100%|██████████| 5411/5411 [00:25<00:00, 211.93it/s]
  0%|          | 18/5411 [00:00<00:30, 178.42it/s]

==>F0, Epoch 4 VALID loss: 0.31274601


running loss: 0.004926: 100%|██████████| 5411/5411 [00:26<00:00, 206.49it/s]
  0%|          | 16/5411 [00:00<00:33, 159.18it/s]

==>F0, Epoch 5 VALID loss: 0.31278289


running loss: 0.004926: 100%|██████████| 5411/5411 [00:25<00:00, 211.74it/s]
  0%|          | 18/5411 [00:00<00:30, 177.66it/s]

==>F0, Epoch 6 VALID loss: 0.31283621


running loss: 0.004925: 100%|██████████| 5411/5411 [00:25<00:00, 210.62it/s]
  0%|          | 16/5411 [00:00<00:34, 155.26it/s]

==>F0, Epoch 7 VALID loss: 0.31273349


running loss: 0.004925: 100%|██████████| 5411/5411 [00:26<00:00, 200.83it/s]
  0%|          | 18/5411 [00:00<00:30, 173.99it/s]

==>F0, Epoch 8 VALID loss: 0.31293240


running loss: 0.004925: 100%|██████████| 5411/5411 [00:25<00:00, 214.22it/s]


==>F0, Epoch 9 VALID loss: 0.31312236


running loss: 0.005506: 100%|██████████| 5411/5411 [00:27<00:00, 197.96it/s]
  0%|          | 16/5411 [00:00<00:33, 159.97it/s]

==>F1, Epoch 0 VALID loss: 0.31533655


running loss: 0.004919: 100%|██████████| 5411/5411 [00:25<00:00, 214.36it/s]
  0%|          | 18/5411 [00:00<00:30, 175.08it/s]

==>F1, Epoch 1 VALID loss: 0.31526186


running loss: 0.004925: 100%|██████████| 5411/5411 [00:25<00:00, 211.50it/s]
  0%|          | 10/5411 [00:00<00:54, 98.69it/s]

==>F1, Epoch 2 VALID loss: 0.31559533


running loss: 0.004232: 100%|██████████| 5411/5411 [00:26<00:00, 206.19it/s]
  0%|          | 18/5411 [00:00<00:30, 176.16it/s]

==>F1, Epoch 3 VALID loss: 0.18996361


running loss: 0.002970: 100%|██████████| 5411/5411 [00:25<00:00, 213.97it/s]
  0%|          | 18/5411 [00:00<00:30, 174.57it/s]

==>F1, Epoch 4 VALID loss: 0.19276436


running loss: 0.002965: 100%|██████████| 5411/5411 [00:27<00:00, 198.67it/s]
  0%|          | 18/5411 [00:00<00:30, 177.89it/s]

==>F1, Epoch 5 VALID loss: 0.18934036


running loss: 0.002953: 100%|██████████| 5411/5411 [00:26<00:00, 203.55it/s]
  0%|          | 13/5411 [00:00<00:44, 122.07it/s]

==>F1, Epoch 6 VALID loss: 0.18898710


running loss: 0.002954: 100%|██████████| 5411/5411 [00:25<00:00, 208.74it/s]
  0%|          | 18/5411 [00:00<00:30, 175.13it/s]

==>F1, Epoch 7 VALID loss: 0.18972304


running loss: 0.002956: 100%|██████████| 5411/5411 [00:25<00:00, 212.90it/s]
  0%|          | 17/5411 [00:00<00:32, 165.26it/s]

==>F1, Epoch 8 VALID loss: 0.18947493


running loss: 0.002951: 100%|██████████| 5411/5411 [00:26<00:00, 203.27it/s]


==>F1, Epoch 9 VALID loss: 0.18972270


running loss: 0.006056: 100%|██████████| 5411/5411 [00:25<00:00, 211.21it/s]
  0%|          | 18/5411 [00:00<00:30, 175.15it/s]

==>F2, Epoch 0 VALID loss: 0.31352211


running loss: 0.004931: 100%|██████████| 5411/5411 [00:27<00:00, 199.59it/s]
  0%|          | 18/5411 [00:00<00:31, 172.74it/s]

==>F2, Epoch 1 VALID loss: 0.31384844


running loss: 0.004925: 100%|██████████| 5411/5411 [00:25<00:00, 214.83it/s]
  0%|          | 18/5411 [00:00<00:30, 178.26it/s]

==>F2, Epoch 2 VALID loss: 0.31369827


running loss: 0.004924: 100%|██████████| 5411/5411 [00:25<00:00, 208.12it/s]
  0%|          | 17/5411 [00:00<00:32, 164.42it/s]

==>F2, Epoch 3 VALID loss: 0.31363245


running loss: 0.004924: 100%|██████████| 5411/5411 [00:26<00:00, 207.28it/s]
  0%|          | 18/5411 [00:00<00:30, 177.11it/s]

==>F2, Epoch 4 VALID loss: 0.31344115


running loss: 0.004924: 100%|██████████| 5411/5411 [00:24<00:00, 217.94it/s]
  0%|          | 17/5411 [00:00<00:32, 164.34it/s]

==>F2, Epoch 5 VALID loss: 0.31382417


running loss: 0.004924: 100%|██████████| 5411/5411 [00:27<00:00, 196.00it/s]
  0%|          | 18/5411 [00:00<00:30, 174.94it/s]

==>F2, Epoch 6 VALID loss: 0.31362505


running loss: 0.004924: 100%|██████████| 5411/5411 [00:26<00:00, 201.02it/s]
  0%|          | 17/5411 [00:00<00:32, 164.11it/s]

==>F2, Epoch 7 VALID loss: 0.31356664


running loss: 0.004923: 100%|██████████| 5411/5411 [00:25<00:00, 213.88it/s]
  0%|          | 18/5411 [00:00<00:30, 178.31it/s]

==>F2, Epoch 8 VALID loss: 0.31339034


running loss: 0.004923: 100%|██████████| 5411/5411 [00:25<00:00, 209.00it/s]


==>F2, Epoch 9 VALID loss: 0.31376311


running loss: 0.006914: 100%|██████████| 5411/5411 [00:26<00:00, 206.89it/s]
  0%|          | 18/5411 [00:00<00:30, 176.11it/s]

==>F3, Epoch 0 VALID loss: 0.31569184


running loss: 0.004921: 100%|██████████| 5411/5411 [00:24<00:00, 218.33it/s]
  0%|          | 18/5411 [00:00<00:30, 179.23it/s]

==>F3, Epoch 1 VALID loss: 0.31536866


running loss: 0.004920: 100%|██████████| 5411/5411 [00:27<00:00, 200.09it/s]
  0%|          | 18/5411 [00:00<00:30, 177.27it/s]

==>F3, Epoch 2 VALID loss: 0.31550756


running loss: 0.004920: 100%|██████████| 5411/5411 [00:25<00:00, 211.10it/s]
  0%|          | 17/5411 [00:00<00:31, 169.68it/s]

==>F3, Epoch 3 VALID loss: 0.31590868


running loss: 0.004924: 100%|██████████| 5411/5411 [00:26<00:00, 205.34it/s]
  0%|          | 17/5411 [00:00<00:32, 164.48it/s]

==>F3, Epoch 4 VALID loss: 0.31560720


running loss: 0.004928: 100%|██████████| 5411/5411 [00:25<00:00, 208.13it/s]
  0%|          | 18/5411 [00:00<00:30, 175.56it/s]

==>F3, Epoch 5 VALID loss: 0.31541571


running loss: 0.004015: 100%|██████████| 5411/5411 [00:25<00:00, 213.58it/s]
  0%|          | 17/5411 [00:00<00:32, 164.25it/s]

==>F3, Epoch 6 VALID loss: 0.09246248


running loss: 0.001449: 100%|██████████| 5411/5411 [00:26<00:00, 202.39it/s]
  0%|          | 18/5411 [00:00<00:30, 176.28it/s]

==>F3, Epoch 7 VALID loss: 0.09237162


running loss: 0.001405: 100%|██████████| 5411/5411 [00:25<00:00, 211.86it/s]
  0%|          | 17/5411 [00:00<00:32, 165.12it/s]

==>F3, Epoch 8 VALID loss: 0.08613199


running loss: 0.001379: 100%|██████████| 5411/5411 [00:26<00:00, 202.99it/s]


==>F3, Epoch 9 VALID loss: 0.08535180


running loss: 0.005547: 100%|██████████| 5411/5411 [00:26<00:00, 201.44it/s]
  0%|          | 17/5411 [00:00<00:32, 164.97it/s]

==>F4, Epoch 0 VALID loss: 0.09654556


running loss: 0.001498: 100%|██████████| 5411/5411 [00:26<00:00, 208.11it/s]
  0%|          | 18/5411 [00:00<00:30, 179.33it/s]

==>F4, Epoch 1 VALID loss: 0.09240182


running loss: 0.001436: 100%|██████████| 5411/5411 [00:25<00:00, 212.61it/s]
  0%|          | 17/5411 [00:00<00:33, 163.43it/s]

==>F4, Epoch 2 VALID loss: 0.08483667


running loss: 0.001418: 100%|██████████| 5411/5411 [00:26<00:00, 201.38it/s]
  0%|          | 18/5411 [00:00<00:29, 179.81it/s]

==>F4, Epoch 3 VALID loss: 0.09008417


running loss: 0.001354: 100%|██████████| 5411/5411 [00:24<00:00, 217.04it/s]
  0%|          | 18/5411 [00:00<00:30, 177.61it/s]

==>F4, Epoch 4 VALID loss: 0.08368601


running loss: 0.001350: 100%|██████████| 5411/5411 [00:26<00:00, 206.64it/s]
  0%|          | 17/5411 [00:00<00:32, 167.09it/s]

==>F4, Epoch 5 VALID loss: 0.08483293


running loss: 0.001347: 100%|██████████| 5411/5411 [00:25<00:00, 209.72it/s]
  0%|          | 18/5411 [00:00<00:29, 179.94it/s]

==>F4, Epoch 6 VALID loss: 0.08351940


running loss: 0.001333: 100%|██████████| 5411/5411 [00:25<00:00, 210.38it/s]
  0%|          | 17/5411 [00:00<00:32, 164.20it/s]

==>F4, Epoch 7 VALID loss: 0.08288474


running loss: 0.001334: 100%|██████████| 5411/5411 [00:26<00:00, 201.01it/s]
  0%|          | 18/5411 [00:00<00:30, 176.55it/s]

==>F4, Epoch 8 VALID loss: 0.08284059


running loss: 0.001330: 100%|██████████| 5411/5411 [00:25<00:00, 214.78it/s]


==>F4, Epoch 9 VALID loss: 0.08282846
