In [1]:
#Import the required lib
import os
import numpy as np
import pandas as pd 
from utils import pct_change, preprocess_and_split_df, convert_to_dataset, data_preparation, plot_data, create_bollinger_limits
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import random
import torch
import statistics
from LSTM_model_future_change import LSTM_FutureChangeGeneral, LSTM_FutureChange

import warnings
warnings.filterwarnings('ignore')

In [2]:
# create the data dict again 
CRYPTO_DICT = { 
            "ADA": {}, 
            "BCH": {},
            "BNB": {}, 
            "BTC": {},
            "ETH": {},
            "LTC": {}, 
            "NEO": {},
            "TRX": {},
            "XRP": {},
}

# upload the data 

for Crypto_code in CRYPTO_DICT:
    CRYPTO_DICT[Crypto_code]["df"]  = pd.read_pickle(f"Training_Data/Data_preprocessed/{Crypto_code}/{Crypto_code}-main_df.pkl")
    CRYPTO_DICT[Crypto_code]["df_train"]  = pd.read_pickle(f"Training_Data/Data_preprocessed/{Crypto_code}/{Crypto_code}-train_df.pkl")
    CRYPTO_DICT[Crypto_code]["df_val"]  = pd.read_pickle(f"Training_Data/Data_preprocessed/{Crypto_code}/{Crypto_code}-val_df.pkl")
    CRYPTO_DICT[Crypto_code]["df_test"]  = pd.read_pickle(f"Training_Data/Data_preprocessed/{Crypto_code}/{Crypto_code}-test_df.pkl")

In [3]:
CRYPTO_DICT["BTC"]["df_train"].columns

Index(['unix', 'date', 'symbol', 'close', 'mid_price_1', 'mid_price_2',
       'Volume USDT', 'SMA', 'EMA0', 'EMA1', 'EMA2', 'RSI', 'upper', 'lower',
       'upper_check', 'lower_check', 'future_price', 'future_change'],
      dtype='object', name=0)

In [4]:
# Dataset for future price prediction 
future_price_columns =  ['close', 'mid_price_1', 'mid_price_2',
       'Volume USDT', 'SMA', 'EMA0', 'EMA1', 'EMA2', 'RSI', 'upper', 'lower',
       'upper_check', 'lower_check', 'future_price']

future_price_length = (len(future_price_columns) - 1)

# Dataset for future change prediction 
future_change_columns = ['close', 'mid_price_1', 'mid_price_2',
       'Volume USDT', 'SMA', 'EMA0', 'EMA1', 'EMA2', 'RSI', 'upper', 'lower',
       'upper_check', 'lower_check','future_change']
future_change_length = (len(future_change_columns) -  1)

In [5]:
datasets = [["future_price_dataset", future_price_columns, future_price_length]]
for dataset in datasets: 
    print(f"{datasets[0][0]} start converting for all coins")
    for Crypto_code in CRYPTO_DICT:
        if "datasets" not in list(CRYPTO_DICT[Crypto_code].keys()):
            CRYPTO_DICT[Crypto_code]["datasets"] = {}
        for df in CRYPTO_DICT[Crypto_code]: 
            if df == "df" or df == "datasets": 
                pass
            else:
                if dataset[0] not in list(CRYPTO_DICT[Crypto_code]["datasets"].keys()):
                    CRYPTO_DICT[Crypto_code]["datasets"][dataset[0]] = {}
                CRYPTO_DICT[Crypto_code]["datasets"][dataset[0]][f"{df}_dataset"] = convert_to_dataset(CRYPTO_DICT[Crypto_code][df], dataset[1], dataset[2])
    print(f"{datasets[0][0]} finished converting for all coins")

future_price_dataset start converting for all coins
future_price_dataset finished converting for all coins


In [6]:
CRYPTO_DICT["ADA"]["df_train"].head(5)

Unnamed: 0,unix,date,symbol,close,mid_price_1,mid_price_2,Volume USDT,SMA,EMA0,EMA1,EMA2,RSI,upper,lower,upper_check,lower_check,future_price,future_change
10,1605534120,11/16/2020,ADA/USDT,0.569916,0.104354,0.573263,-0.503231,0.874844,0.173305,1.473872,0.27849,98.371336,1.328975,-0.188765,1,0,-0.523333,0
11,1605534180,11/16/2020,ADA/USDT,0.569443,0.466574,0.58219,-0.608049,1.030657,-0.351334,0.772922,0.720969,98.507463,1.571223,-0.228248,1,0,0.911994,1
12,1605534240,11/16/2020,ADA/USDT,-0.281825,0.46201,0.359231,-0.585942,1.154065,0.584498,-0.366869,0.457446,92.559524,1.001169,0.474485,1,0,1.504723,1
13,1605534300,11/16/2020,ADA/USDT,-0.501578,-0.42132,-0.577299,-0.595586,0.92245,1.547995,0.10891,0.64827,83.557951,0.607406,0.565131,1,0,-0.406365,1
14,1605534360,11/16/2020,ADA/USDT,0.858088,0.048246,0.247989,-0.590355,1.429333,0.432349,0.662095,0.192357,83.172414,0.516682,1.284834,1,0,-1.339432,0


In [7]:
# Create 1 list for all Crypto_coin data
train_dataset_all = []
val_dataset_all = []
test_dataset_all = []

for Crypto_code in CRYPTO_DICT:
  train_dataset_all += CRYPTO_DICT[Crypto_code]["datasets"]['future_price_dataset']['df_train_dataset']
  del CRYPTO_DICT[Crypto_code]["datasets"]['future_price_dataset']['df_train_dataset']
  val_dataset_all += CRYPTO_DICT[Crypto_code]["datasets"]['future_price_dataset']['df_val_dataset']
  del CRYPTO_DICT[Crypto_code]["datasets"]['future_price_dataset']['df_val_dataset']
  test_dataset_all += CRYPTO_DICT[Crypto_code]["datasets"]['future_price_dataset']['df_test_dataset']
  del CRYPTO_DICT[Crypto_code]["datasets"]['future_price_dataset']['df_test_dataset']

In [8]:
del CRYPTO_DICT

In [9]:
print(f"len of train dataset: {len(train_dataset_all)}")
print(f"len of val dataset: {len(val_dataset_all)}")
print(f"len of test dataset: {len(test_dataset_all)}")


len of train dataset: 1393341
len of val dataset: 173696
len of test dataset: 173689


In [10]:
train_dataset_all[0][0]

tensor([[ 5.6992e-01,  1.0435e-01,  5.7326e-01, -5.0323e-01,  8.7484e-01,
          1.7331e-01,  1.4739e+00,  2.7849e-01,  9.8371e+01,  1.3290e+00,
         -1.8876e-01,  1.0000e+00,  0.0000e+00],
        [ 5.6944e-01,  4.6657e-01,  5.8219e-01, -6.0805e-01,  1.0307e+00,
         -3.5133e-01,  7.7292e-01,  7.2097e-01,  9.8507e+01,  1.5712e+00,
         -2.2825e-01,  1.0000e+00,  0.0000e+00],
        [-2.8182e-01,  4.6201e-01,  3.5923e-01, -5.8594e-01,  1.1541e+00,
          5.8450e-01, -3.6687e-01,  4.5745e-01,  9.2560e+01,  1.0012e+00,
          4.7448e-01,  1.0000e+00,  0.0000e+00],
        [-5.0158e-01, -4.2132e-01, -5.7730e-01, -5.9559e-01,  9.2245e-01,
          1.5480e+00,  1.0891e-01,  6.4827e-01,  8.3558e+01,  6.0741e-01,
          5.6513e-01,  1.0000e+00,  0.0000e+00],
        [ 8.5809e-01,  4.8246e-02,  2.4799e-01, -5.9036e-01,  1.4293e+00,
          4.3235e-01,  6.6209e-01,  1.9236e-01,  8.3172e+01,  5.1668e-01,
          1.2848e+00,  1.0000e+00,  0.0000e+00],
        [ 1.419

## Modeling 

In [11]:
# Import model creation lib
import torch
from torch import nn
import pandas as pd
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard

In [12]:
# Check for GPU
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU.')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

No GPU available, training on CPU.


In [13]:
train_on_gpu

False

In [14]:
# num_batches
batch_size = 64

# Create Dataloaders 
training_dataloader = torch.utils.data.DataLoader(train_dataset_all, batch_size=batch_size, shuffle=True, drop_last=True)
validation_dataloader = torch.utils.data.DataLoader(val_dataset_all, batch_size=batch_size, shuffle=True, drop_last=True)
testing_dataloader = torch.utils.data.DataLoader(test_dataset_all, batch_size=batch_size, shuffle=True, drop_last=True)
print(f"Number of Training batches: {len(training_dataloader)}")
print(f"Number of validation batches: {len(validation_dataloader)}")
print(f"Number of Testing batches: {len(testing_dataloader)}")

Number of Training batches: 21770
Number of validation batches: 2714
Number of Testing batches: 2713


In [64]:
import torch.nn as nn
import torch.nn.functional as F


class Time2Vector(nn.Module):
    def __init__(self, input_size, activation="sin", dropout_ratio=0.4):
        super(Time2Vector, self).__init__()
        
        # input size 
        self.input_size = input_size

        
        # non-periodic/linear vector 
        self.fc1 = nn.Linear(input_size, input_size)
        
        # periodic/linear vector 
        self.fc2 = nn.Linear(input_size, input_size)
        
        # dropout layer 
        self.dropout = nn.Dropout(dropout_ratio)
        
        # activation function 
        if activation == "sin": 
            self.activation = torch.sin
        else:
            self.activation = torch.cos
            
    def forward(self, x): 
        # periodic layer
        out_periodic = self.fc1(x)
        out_periodic = self.dropout(out_periodic)
        
        # non-periodic layer 
        out_nonperiodic = self.activation(self.fc2(x))
        out_nonperiodic = self.dropout(out_nonperiodic)
        
        # output 
        out = torch.cat([out_periodic, out_nonperiodic], -1)
        
        return out 
        
        

In [65]:
model = Time2Vector(13)

In [17]:
# training hyperparameters 
num_epoch = 8

# model hyperparameters 
input_size = len(train_dataset_all[0][0][0])
output_size = len(train_dataset_all[0][1])
hidden_size = 256
drop_out = 0.4
num_layers = 2

In [104]:
import torch.nn as nn
import torch.nn.functional as F
import torch


class Time2Vector(nn.Module):
    def __init__(self, input_size, activation="sin", dropout_ratio=0.4):
        super(Time2Vector, self).__init__()

        # input size
        self.input_size = input_size


        # non-periodic/linear vector
        self.fc1 = nn.Linear(input_size, input_size)

        # periodic/linear vector
        self.fc2 = nn.Linear(input_size, input_size)

        # dropout layer
        self.dropout = nn.Dropout(dropout_ratio)

        # activation function
        if activation == "sin":
            self.activation = torch.sin
        else:
            self.activation = torch.cos

    def forward(self, x):
        # periodic layer
        out_periodic = self.fc1(x)
        out_periodic = self.dropout(out_periodic)

        # non-periodic layer
        out_nonperiodic = self.activation(self.fc2(x))
        out_nonperiodic = self.dropout(out_nonperiodic)

        # output
        out = torch.cat([out_periodic, out_nonperiodic], -1)

        return out


class CNN_BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_ratio, output_size, kernel_size=1):
        super(CNN_BiLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.output_size = output_size

        # dropout layer
        self.dropout = nn.Dropout(dropout_ratio)

        # Time2Vector
        self.t2v = Time2Vector(self.input_size)


        # Conv1
        self.conv1 = nn.Conv1d(self.input_size*3, (self.input_size*3)*4, kernel_size=kernel_size, bias=False)
        self.conv2 = nn.Conv1d((self.input_size*3)*4, (self.input_size * 3) * 4, kernel_size=kernel_size, bias=False)


        # LSTM layer
        self.lstm = nn.LSTM((self.input_size*3)*4, self.hidden_size, num_layers=self.num_layers, dropout=dropout_ratio,
                            batch_first=True, bidirectional=True, bias=False)

        # fully connected layer
        self.fc1 = nn.Linear(hidden_size * 2, 128, bias=False)
        self.fc2 = nn.Linear(128, self.output_size)

        # # BATCH_NORMALIZATION layer
        self.batch1d1_conv1 = nn.BatchNorm1d((self.input_size*3)*4)
        self.batch1d1_conv2 = nn.BatchNorm1d((self.input_size*3)*4)
        self.batch1d2 = nn.BatchNorm1d(hidden_size * 2)
        self.batch1d3 = nn.BatchNorm1d(128)

    def forward(self, x, hidden):

        # time_embeddings
        out_embed = self.t2v(x)

        # combine with input
        x = torch.cat([x, out_embed], -1)

        # get size of input to conv
        batch_size, seq_len, features = x.size()

        # reshape input to conv layer
        x = x.view(batch_size * seq_len, features)
        x = x.unsqueeze(-1)
        
        # go throw conv layers
        x = F.leaky_relu(self.conv1(x))

        x = self.dropout(x)
        x = self.batch1d1_conv1(x)

        x = F.leaky_relu(self.conv2(x))

        x = self.dropout(x)
        x = self.batch1d1_conv2(x)

        # reshape x for LSTM layer
        x = x.view(batch_size, seq_len, -1)

        # lstm layer x[B, seq_len, input_size]
        x, hidden = self.lstm(x, hidden)

        # get the last output
        x = x[:, -1, :]

        # drop out and batch_norm
        x = self.dropout(x)
        x = self.batch1d2(x)

        # first fc layer
        x = F.leaky_relu(self.fc1(x))

        # drop out and batch_norm
        x = self.dropout(x)
        x = self.batch1d3(x)

        # first fc layer
        x = self.fc2(x)

        return x, hidden

    def init_hidden(self, batch_size, train_on_gpu):
        '''
        Initializes hidden state
        '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_().cuda(),
                      weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_().cuda())
        else:
            hidden = (weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_(),
                      weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_())

        return hidden

In [18]:
import torch.nn as nn
import torch.nn.functional as F
import torch


class Time2Vector(nn.Module):
    def __init__(self, input_size, activation="sin", dropout_ratio=0.4):
        super(Time2Vector, self).__init__()

        # input size
        self.input_size = input_size


        # non-periodic/linear vector
        self.fc1 = nn.Linear(input_size, input_size)

        # periodic/linear vector
        self.fc2 = nn.Linear(input_size, input_size)

        # dropout layer
        self.dropout = nn.Dropout(dropout_ratio)

        # activation function
        if activation == "sin":
            self.activation = torch.sin
        else:
            self.activation = torch.cos

    def forward(self, x):
        # periodic layer
        out_periodic = self.fc1(x)
        out_periodic = self.dropout(out_periodic)

        # non-periodic layer
        out_nonperiodic = self.activation(self.fc2(x))
        out_nonperiodic = self.dropout(out_nonperiodic)

        # output
        out = torch.cat([out_periodic, out_nonperiodic], -1)

        return out


class CNN_BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_ratio, output_size, kernel_size=1):
        super(CNN_BiLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.output_size = output_size

        # dropout layer
        self.dropout = nn.Dropout(dropout_ratio)

        # Time2Vector
        self.t2v = Time2Vector(self.input_size)


        # Conv1
        self.conv1 = nn.Conv1d(self.input_size*3, (self.input_size*3)*4, kernel_size=kernel_size, bias=False)
        self.conv2 = nn.Conv1d((self.input_size*3)*4, (self.input_size * 3) * 4, kernel_size=kernel_size, bias=False)


        # LSTM layer
        self.lstm = nn.LSTM((self.input_size*3)*4, self.hidden_size, num_layers=self.num_layers, dropout=dropout_ratio,
                            batch_first=True, bidirectional=True, bias=False)

        # fully connected layer
        self.fc1 = nn.Linear(hidden_size * 2, 128, bias=False)
        self.fc2 = nn.Linear(128, self.output_size)

        # # BATCH_NORMALIZATION layer
        self.batch1d1_conv1 = nn.BatchNorm1d((self.input_size*3)*4)
        self.batch1d1_conv2 = nn.BatchNorm1d((self.input_size*3)*4)
        self.batch1d2 = nn.BatchNorm1d(hidden_size * 2)
        self.batch1d3 = nn.BatchNorm1d(128)

    def forward(self, x, hidden):

        # time_embeddings
        out_embed = self.t2v(x)

        # combine with input
        x = torch.cat([x, out_embed], -1)

        # get size of input to conv
        batch_size, seq_len, features = x.size()

        # reshape input to conv layer
        x = x.view(batch_size * seq_len, features)
        x = x.unsqueeze(-1)
        
        # go throw conv layers
        x = F.leaky_relu(self.conv1(x))

        x = self.dropout(x)
        x = self.batch1d1_conv1(x)

        x = F.leaky_relu(self.conv2(x))

        x = self.dropout(x)
        x = self.batch1d1_conv2(x)

        # reshape x for LSTM layer
        x = x.view(batch_size, seq_len, -1)

        # lstm layer x[B, seq_len, input_size]
        x, hidden = self.lstm(x, hidden)

        # get the last output
        x = x[:, -1, :]

        # drop out and batch_norm
        x = self.dropout(x)
        x = self.batch1d2(x)

        # first fc layer
        x = F.leaky_relu(self.fc1(x))

        # drop out and batch_norm
        x = self.dropout(x)
        x = self.batch1d3(x)

        # first fc layer
        x = self.fc2(x)

        return x, hidden

    def init_hidden(self, batch_size, train_on_gpu):
        '''
        Initializes hidden state
        '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_().cuda(),
                      weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_().cuda())
        else:
            hidden = (weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_(),
                      weight.new(self.num_layers*2, batch_size, self.hidden_size).zero_())

        return hidden

In [19]:
# Building the network
model = CNN_BiLSTM(input_size, hidden_size, num_layers, drop_out, output_size)

criterion = nn.MSELoss()

model = model.float()
model

CNN_BiLSTM(
  (dropout): Dropout(p=0.4, inplace=False)
  (t2v): Time2Vector(
    (fc1): Linear(in_features=13, out_features=13, bias=True)
    (fc2): Linear(in_features=13, out_features=13, bias=True)
    (dropout): Dropout(p=0.4, inplace=False)
  )
  (conv1): Conv1d(39, 156, kernel_size=(1,), stride=(1,), bias=False)
  (conv2): Conv1d(156, 156, kernel_size=(1,), stride=(1,), bias=False)
  (lstm): LSTM(156, 256, num_layers=2, bias=False, batch_first=True, dropout=0.4, bidirectional=True)
  (fc1): Linear(in_features=512, out_features=128, bias=False)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
  (batch1d1_conv1): BatchNorm1d(156, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch1d1_conv2): BatchNorm1d(156, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch1d2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch1d3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [106]:
model.init_hidden(64, True)[0].shape

AssertionError: Torch not compiled with CUDA enabled

In [113]:
def train(model, training_dataloader, validation_dataloader, epochs=10, clip=2, print_every=200, learning_rate_decay=1800):
    step = 0 
    model.train()
    if(train_on_gpu):
        model.cuda()
    counter = 0 
    # start at inf for val_loss
    valid_loss_min = np.Inf
    
    # lr set at the beginning to be high at 0.1
    lr = 0.01
    opt = optim.SGD(model.parameters(), lr=lr)
    print(f"learning_rate: {lr}")
    
    # start training loop
    for e in range(epochs):
        # initialize hidden state
        h = model.init_hidden(batch_size, train_on_gpu)
        train_losses = []
        for inputs, targets in training_dataloader:
            counter += 1
            
            # decrease the learning rate every 1500 step 
            if counter % learning_rate_decay == 0: 
                # decrease the lr by 0.25 each 1500 step to help model converge 
                lr= lr * 0.775
                opt = optim.SGD(model.parameters(), lr=lr)
                print(f"learning_rate decreased to: {lr}")
                
            
            # move to GPU if available   
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()
                
            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])
            
            # zero accumulated gradients
            model.zero_grad()
            
            # get network output
            output, h = model(inputs, h)
            # find loss and back propogate 
            loss = criterion(output, targets.float())
            loss.backward()
            train_losses.append(loss.item())
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            opt.step()
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = model.init_hidden(batch_size, train_on_gpu)
                val_losses = []
                model.eval()
                for inputs, targets in validation_dataloader: 
                    
                    # move to GPU
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()
                    
                    # create new hidden varaibles 
                    val_h = tuple([each.data for each in val_h])
                    
                    # forward 
                    output, val_h = model(inputs, val_h)
                    
                    # calculate val_batch accuracy 
                    
                    # loss
                    val_loss = criterion(output, targets.float())
                    val_losses.append(val_loss.item())
                
                model.train()
                
                # average val loss over all batches 
                avg_val_loss = np.mean(val_losses)
                
                
                
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(np.mean(train_losses)),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))
                
                train_losses = []
                
                # save model if validation loss has decreased
                if avg_val_loss <= valid_loss_min:
                    print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                    valid_loss_min,
                    avg_val_loss))
                    torch.save(model.state_dict(), "General_Model_price")
                    valid_loss_min = avg_val_loss

In [20]:
train(model, training_dataloader, validation_dataloader, epochs= num_epoch, print_every=200)

NameError: name 'train' is not defined

In [21]:
model.load_state_dict(torch.load("CNN_BiLSTM_Model_price.pt", map_location=device))

<All keys matched successfully>

In [None]:
val_h = model.init_hidden(batch_size, train_on_gpu)
val_losses = []
model.eval()
output_list, targets_list = [], []
for inputs, targets in validation_dataloader: 

    # move to GPU
    if(train_on_gpu):
        inputs, targets = inputs.cuda(), targets.cuda()

    # create new hidden varaibles 
    val_h = tuple([each.data for each in val_h])

    # forward 
    output, val_h = model(inputs, val_h)

    # calculate val_batch accuracy 
    output_list.append(output)
    targets_list.append(targets)
    # loss
    val_loss = criterion(output, targets.float())
    val_losses.append(val_loss.item())

model.train()

# average val loss over all batches 
avg_val_loss = np.mean(val_losses)
print(avg_val_loss)