In [1]:
#IMPORTS
import os
import re
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from enum import Enum
from functools import reduce
from typing import Any, List
from collections import namedtuple

import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from prettytable import PrettyTable as PrettyTable

In [23]:
random.seed(42)

In [3]:
import pandas as pd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
# Define the file path
file_path = '/content/drive/My Drive/all_data_loaded.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify it was loaded correctly
print(df.head())

Mounted at /content/drive
              datetime        time fsym tsym       close        high  \
0  2015-01-01 00:00:00  1420070400  BTC  USD  320.999986  320.999986   
1  2015-01-01 00:01:00  1420070460  BTC  USD  320.999986  320.999986   
2  2015-01-01 00:02:00  1420070520  BTC  USD  320.999986  320.999986   
3  2015-01-01 00:03:00  1420070580  BTC  USD  320.999986  320.999986   
4  2015-01-01 00:04:00  1420070640  BTC  USD  320.969986  320.999986   

          low        open  volumefrom  volumeto  
0  320.999986  320.999986    1.736972    557.57  
1  320.999986  320.999986    0.000000      0.00  
2  320.999986  320.999986    1.413333    453.68  
3  320.999986  320.999986    2.000000    642.00  
4  320.969986  320.999986    2.843880    912.71  


In [4]:
df.datetime = pd.to_datetime(df.datetime)
df.set_index(df.datetime, inplace = True)
df.drop(columns=["time","fsym","tsym"])

# Resample to 5 minutes
df_5min = df.resample('5T').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volumefrom': 'sum',
    'volumeto' : 'sum'
})


# Resample to 15 minutes
df_15min = df.resample('15T').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volumefrom': 'sum',
    'volumeto' : 'sum'
})


# Resample to 1 hour
df_1h = df.resample('1H').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volumefrom': 'sum',
    'volumeto' : 'sum'
})



# Resample to 6 hours
df_6h = df.resample('6H').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volumefrom': 'sum',
    'volumeto' : 'sum'
})


# Resample to 6 hours
df_1d = df.resample('1D').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volumefrom': 'sum',
    'volumeto' : 'sum'
})

# Now df_5min, df_15min, df_20min, df_1h, df_3h, df_6h are your aggregated DataFrames
df_list = [df_5min, df_15min, df_1h, df_6h, df_1d]

In [5]:
#GLOBALS :

market_impact = 0.015

class Actions(int, Enum):
    DOUBLE_SELL = 0
    SELL = 1
    HOLD = 2
    BUY = 3
    DOUBLE_BUY = 4
    CLOSE_ALL = 5
    COMBO_BUY = 6


class Positions:
    SHORT = -1
    FLAT = 0
    LONG = 1
    COUNTER = 0


class Ledgers:
    ACTIVE_LONG = {
        "Entry Price" : [],
        "Current Price" : [],
        "Dollar Profit" : [],
        "% Return" : []
    }
    ACTIVE_SHORT = {
        "Entry Price" : [],
        "Current Price" : [],
        "Dollar Profit" : [],
        "% Return" : []
    }
    HIST = {
        "Entry Price" : [],
        "Action Type" : [],
        "Position Type" : [],
        "Dollar Profit, Realized"   : 0,
        "Dollar Profit, Unrealized" : 0,
        "Time Index" : []
    }



In [6]:
# Baseline Models

class RandomWalk():
    def __init__(self):
        self.action_idx_lst = []

    def random_selection(self):
        if Positions.COUNTER == 0:
            self.action_idx_lst = [0, 1, 3, 4, 6]
            return random.choice(self.action_idx_lst)
        elif abs(Positions.COUNTER) > 8:
            self.action_idx_lst = 5
            return self.action_idx_lst
        else:
            return random.choice(self.action_idx_lst)


class MomentumFollowing():
    def __init__(self):
        self.action_idx_lst = []

    def trend_selection(self, data, cut_out):
        if abs(Positions.COUNTER) > 10:
            return 5 #close all
        # if data[0][0][0] > data[0][0][cut_out]:
        curr_price = data[0][0][-1]
        threshold = np.mean(float(data[0][0][-cut_out]) + float(data[0][0][-(cut_out+1)]) +
                            float(data[0][0][-(cut_out+2)]) + float(data[0][0][-(cut_out+3)]))
        if curr_price > threshold:   #if the latest data is bigger than 5 timestamps old data
            if Positions.COUNTER < 0 :
                return 5
            else:
                self.action_idx_lst = [3,4,6]
                return random.choice(self.action_idx_lst)
        elif curr_price < threshold:
            if Positions.COUNTER > 0:
                return 5
            else:
                self.action_idx_lst = [0, 1]
                return random.choice(self.action_idx_lst)
        else:
            return 2

In [7]:
# Convolutional DQN
class ConvDQN(nn.Module):
    def __init__(self, seq_len_in, actions_n, kernel_size=8):
        super(ConvDQN, self).__init__()
        n_filters = 64
        max_pool_kernel = 2
        self.conv1 = nn.Conv1d(1, n_filters, kernel_size)
        self.maxPool = nn.MaxPool1d(max_pool_kernel, stride=1)
        self.LRelu = nn.LeakyReLU()
        self.conv2 = nn.Conv1d(n_filters, n_filters, kernel_size // 2)

        self.hidden_dim = n_filters * (
                            (
                                (
                                    (seq_len_in - kernel_size + 1) -
                                    max_pool_kernel + 1) -
                                    kernel_size // 2 + 1) -
                                    max_pool_kernel + 1)

        self.out_layer = nn.Linear(self.hidden_dim, actions_n)

    def forward(self, x):
        c1_out = self.conv1(x)
        max_pool_1 = self.maxPool(self.LRelu(c1_out))
        c2_out = self.conv2(max_pool_1)
        max_pool_2 = self.maxPool(self.LRelu(c2_out))
        # print("c1_out:\t%s"%str(c1_out.shape))
        # print("max_pool_1:\t%s"%str(max_pool_1.shape))
        # print("c2_out:\t%s"%str(c2_out.shape))
        # print("max_pool_2:\t%s"%str(max_pool_2.shape))
        # print(self.hidden_dim)
        max_pool_2 = max_pool_2.view(-1, self.hidden_dim)
        # print("max_pool_2_view:\t%s"%str(max_pool_2.shape))

        return self.LRelu(self.out_layer(max_pool_2))



In [8]:
class ConvDuelingDQN(nn.Module):
    def __init__(self, seq_len_in, actions_n, kernel_size=8):
        super(ConvDuelingDQN, self).__init__()
        n_filters = 64
        max_pool_kernel = 2
        self.conv1 = nn.Conv1d(1, n_filters, kernel_size)
        self.maxPool = nn.MaxPool1d(max_pool_kernel, stride=1)
        self.LRelu = nn.LeakyReLU()
        self.conv2 = nn.Conv1d(n_filters, n_filters, kernel_size // 2)

        self.advantage_hidden_dim = n_filters * (
                            (
                                (
                                    (seq_len_in - kernel_size + 1) -
                                    max_pool_kernel + 1) -
                                    kernel_size // 2 + 1) -
                                    max_pool_kernel + 1)

        self.value_hidden_dim = n_filters * (
                            (
                                (
                                    (seq_len_in - kernel_size + 1) -
                                    max_pool_kernel + 1) -
                                    kernel_size // 2 + 1) -
                                    max_pool_kernel + 1)

        self.advantage_layer = nn.Linear(self.advantage_hidden_dim, actions_n)
        self.value_layer = nn.Linear(self.value_hidden_dim, 1)

    def forward(self, x):
        c1_out = self.conv1(x)
        max_pool_1 = self.maxPool(self.LRelu(c1_out))
        c2_out = self.conv2(max_pool_1)
        max_pool_2 = self.maxPool(self.LRelu(c2_out))
        max_pool_2 = max_pool_2.view(-1, self.advantage_hidden_dim)

        advantage = self.advantage_layer(max_pool_2)
        value = self.value_layer(max_pool_2).expand_as(advantage)
        q_values = value + (advantage - advantage.mean(1, keepdim=True))
        #print(q_values)

        return q_values


In [9]:
#Linear Dueling
class LinearDuelingDQN(nn.Module):
    def __init__(self, input_dim, num_actions):
        super(LinearDuelingDQN, self).__init__()
        self.num_actions = num_actions

        '''self.feature = nn.Sequential(
            nn.Linear(input_dim, 120),
            nn.LeakyReLU(),
            nn.Linear(120, 120),
            nn.LeakyReLU()
        )

        self.advantage_stream = nn.Sequential(
            nn.Linear(120, 120),
            nn.LeakyReLU(),
            nn.Linear(120, num_actions)
        )

        self.value_stream = nn.Sequential(
            nn.Linear(120, 120),
            nn.ReLU(),
            nn.Linear(120, 1)
        )'''
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu = nn.ReLU()
        self.fc_value = nn.Linear(64, 256)
        self.fc_adv = nn.Linear(64, 256)

        self.value = nn.Linear(256, 1)
        self.adv = nn.Linear(256, self.num_actions)

    def forward(self, x):
        '''x = self.feature(x)
        advantage = self.advantage_stream(x)
        value = self.value_stream(x)
        # Ensure the shapes are compatible for addition and mean calculation
        return value + advantage - advantage.mean(dim=1, keepdim=True)'''

        y = self.relu(self.fc1(x))
        value = self.relu(self.fc_value(y))
        adv = self.relu(self.fc_adv(y))

        value = self.value(value)
        adv = self.adv(adv)

        advAverage = torch.mean(adv, dim=1, keepdim=True)
        Q = value + adv - advAverage
        #print(Q.squeeze(1))

        return Q.squeeze(1)


In [10]:
#UTIL FUNCTIONS

Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward') )


class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)


def print_stats(model, c_return, t):
    c_return = np.array(c_return).flatten()
    t.add_row([str(model), "%.2f" % np.mean(c_return), "%.2f" % np.amax(c_return), "%.2f" % np.amin(c_return),
               "%.2f" % np.std(c_return)])

def plot_pnl(name, cum_returns, slice):
    """ NB. cum_returns must be 2-dim """
    # Mean
    M = np.mean(np.array(cum_returns), axis=0)
    # std dev
    S = np.std(np.array(cum_returns), axis=0)
    # upper and lower limit of confidence intervals
    LL = M - 0.95 * S
    UL = M + 0.95 * S

    fig, axs = plt.subplots(1, 2, figsize=(20, 5))  # 1 row, 2 columns

    axs[0].plot(range(len(M)), M, linewidth=2)  # Plot the mean curve on the first subplot
    axs[0].fill_between(range(len(M)), LL, UL, color='b', alpha=.2)  # Fill between for the first subplot
    axs[0].grid(True)
    axs[0].set_xlabel("Trading Instant (h)")
    axs[0].set_ylabel("Return")
    axs[0].legend(['Cumulative Average Return (%)'], loc='upper left')

    # axs[1].plot(x=slice.index, y = slice)
    axs[1].plot(slice)

    plt.tight_layout()
    plt.show()

In [24]:
def transform(position: Positions, action: int, state : float, index) -> Any: #Returns : Position, Realized Profits, Unrealized Profits, Closing (False)/Opening(True) a Trade

    """
    If the 'portfolio' (Ledger) is long more than 1 one unit
    We use FIFO (first in first out) to remove the earliest Long in the Active_Long ledger / short in the Active_Short ledger
    """

    # fees = 0.0033 * state
    fees = 5
    realized_profit = 0
    realized_profit_0 = 0
    realized_profit_1 = 0

    #Updating Dollar Profits for each Ledger:
    #1. Update the State:
    Ledgers.ACTIVE_LONG["Current Price"] = [state] * len(Ledgers.ACTIVE_LONG["Entry Price"])
    Ledgers.ACTIVE_SHORT["Current Price"] = [state] * len(Ledgers.ACTIVE_SHORT["Entry Price"])
    #2. Update Profit Figures:
    Ledgers.ACTIVE_LONG["Dollar Profit"] = [current - entry for entry, current in zip(Ledgers.ACTIVE_LONG["Entry Price"], Ledgers.ACTIVE_LONG["Current Price"])]
    Ledgers.ACTIVE_SHORT["Dollar Profit"] = [entry - current for entry, current in zip(Ledgers.ACTIVE_SHORT["Entry Price"], Ledgers.ACTIVE_SHORT["Current Price"])]
    Ledgers.ACTIVE_LONG["% Return"] = [((current - entry)/entry+1) for entry, current in zip(Ledgers.ACTIVE_LONG["Entry Price"], Ledgers.ACTIVE_LONG["Current Price"])]
    Ledgers.ACTIVE_SHORT["% Return"] = [(1-(current - entry)/entry) for entry, current in zip(Ledgers.ACTIVE_SHORT["Entry Price"], Ledgers.ACTIVE_SHORT["Current Price"])]

    if action == Actions.HOLD:
        Ledgers.HIST['Entry Price'].append(state)
        Ledgers.HIST['Action Type'].append(int(action))
        Ledgers.HIST['Position Type'].append(position)
        Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])
        Ledgers.HIST['Time Index'].append(index)
        return position, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

    elif action == Actions.BUY:
        Ledgers.HIST['Time Index'].append(index)
        #Update the counter
        Positions.COUNTER += 1
        if position == Positions.SHORT:  #CLOSING A POSITION BECAUSE WE ARE IN THE SHORT
            #Save the profit of the earliest Long position & update profits positions
            realized_profit = Ledgers.ACTIVE_SHORT['Dollar Profit'][0]
            Ledgers.HIST['Dollar Profit, Realized'] += realized_profit - fees

            #Remove the earliest position : first in, first out
            Ledgers.ACTIVE_SHORT = {key: value[1:] for key, value in Ledgers.ACTIVE_SHORT.items()}

            #Update Unrealized Profits
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])

            if Positions.COUNTER < 0:
                Ledgers.HIST['Entry Price'].append(state)
                Ledgers.HIST['Action Type'].append(int(action))
                Ledgers.HIST['Position Type'].append(Positions.SHORT)

                # print(Ledgers.ACTIVE_SHORT['Entry Price'])
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) > 0 #check

                return Positions.SHORT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

            elif Positions.COUNTER == 0:
                Ledgers.HIST['Entry Price'].append(state)
                Ledgers.HIST['Action Type'].append(int(action))
                Ledgers.HIST['Position Type'].append(Positions.FLAT)

                # print(Ledgers.ACTIVE_SHORT['Entry Price'])
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) == 0 #check

                return Positions.FLAT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

        elif position == Positions.LONG or position == Positions.FLAT:  #OPENING A POSITION BECAUSE WE WERE LONG OR FLAT
            Ledgers.ACTIVE_LONG["Entry Price"].append(state)
            Ledgers.ACTIVE_LONG["Current Price"].append(state)
            Ledgers.ACTIVE_LONG["Dollar Profit"].append(0)
            Ledgers.ACTIVE_LONG["% Return"].append(1)

            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))
            Ledgers.HIST['Position Type'].append(Positions.LONG)
            Ledgers.HIST['Dollar Profit, Realized'] -= fees
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])


            # print(Ledgers.ACTIVE_SHORT['Entry Price'])
            assert len(Ledgers.ACTIVE_SHORT['Entry Price']) == 0

            return Positions.LONG, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], True

    elif action == Actions.SELL:
        Ledgers.HIST['Time Index'].append(index)
        #Update the counter
        Positions.COUNTER -= 1
        if position == Positions.LONG:  #CLOSING A POSITION BECAUSE WE ARE IN THE LONG
            #Save the profit of the earliest Long position & update profits positions
            realized_profits = Ledgers.ACTIVE_LONG['Dollar Profit'][0]
            Ledgers.HIST['Dollar Profit, Realized'] += realized_profits - fees

            #Remove the earliest position : first in, first out
            Ledgers.ACTIVE_LONG = {key: value[1:] for key, value in Ledgers.ACTIVE_LONG.items()}

            #Update Unrealized Profits & Ledgers.HIST
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])
            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))

            if Positions.COUNTER > 0:
                Ledgers.HIST['Position Type'].append(Positions.LONG)
                assert len(Ledgers.ACTIVE_LONG['Entry Price']) > 0 #check

                return Positions.LONG, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

            elif Positions.COUNTER == 0:
                Ledgers.HIST['Position Type'].append(Positions.FLAT)
                assert len(Ledgers.ACTIVE_LONG['Entry Price']) == 0 #check

                return Positions.FLAT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], True

        elif position == Positions.SHORT or position == Positions.FLAT:  #OPENING A POSITION BECAUSE WE WERE SHORT OR FLAT
            Ledgers.ACTIVE_SHORT["Entry Price"].append(state)
            Ledgers.ACTIVE_SHORT["Current Price"].append(state)
            Ledgers.ACTIVE_SHORT["Dollar Profit"].append(0)
            Ledgers.ACTIVE_SHORT["% Return"].append(1)


            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))
            Ledgers.HIST['Position Type'].append(Positions.SHORT)
            Ledgers.HIST['Dollar Profit, Realized'] -= fees

            assert len(Ledgers.ACTIVE_LONG['Entry Price']) == 0

            return Positions.SHORT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], True

    elif action == Actions.DOUBLE_BUY:
        Ledgers.HIST['Time Index'].append(index)
        #Update the counter
        Positions.COUNTER += 2
        if position == Positions.SHORT and Positions.COUNTER <= 0:  #CLOSING TWO POSITIONS BECAUSE WE ARE IN THE SHORT
            #Save the profit of the earliest Long position & update profits positions
            realized_profit_0 = Ledgers.ACTIVE_SHORT['Dollar Profit'][0]
            realized_profit_1 = Ledgers.ACTIVE_SHORT['Dollar Profit'][1]
            Ledgers.HIST['Dollar Profit, Realized'] += realized_profit_0 + realized_profit_1 - fees * 2

            #Remove the earliest position : first in, first out
            Ledgers.ACTIVE_SHORT = {key: value[2:] for key, value in Ledgers.ACTIVE_SHORT.items()}

            #Update Unrealized Profits
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])


            if Positions.COUNTER < 0:
                #Updating Hist Ledger
                Ledgers.HIST['Entry Price'].append(state)
                Ledgers.HIST['Action Type'].append(int(action))
                Ledgers.HIST['Position Type'].append(Positions.SHORT)

                # print(Ledgers.ACTIVE_SHORT['Entry Price'])
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) > 0 #check

                return Positions.SHORT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

            elif Positions.COUNTER == 0:
                #Updating Hist Ledger
                Ledgers.HIST['Entry Price'].append(state)
                Ledgers.HIST['Action Type'].append(int(action))
                Ledgers.HIST['Position Type'].append(Positions.FLAT)

                # print(Ledgers.ACTIVE_SHORT['Entry Price'])
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) == 0 #check

                return Positions.FLAT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

        else:  #OPENING A POSITION BECAUSE WE WERE LONG OR FLAT

            if Positions.COUNTER - 2 == -1: # that means we were Short 1 unit before and must match it / close it with a Long in FIFO mode
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) == 1
                #Closing the last Short position:
                #Saving the earliest profit
                realized_profit = Ledgers.ACTIVE_SHORT['Dollar Profit'][0]
                Ledgers.HIST['Dollar Profit, Realized'] += realized_profit - fees
                #Remove the earliest position : first in, first out
                Ledgers.ACTIVE_SHORT = {key: value[1:] for key, value in Ledgers.ACTIVE_SHORT.items()}

            else: #Opening the first Long
                Ledgers.ACTIVE_LONG["Entry Price"].append(state)
                Ledgers.ACTIVE_LONG["Current Price"].append(state)
                Ledgers.ACTIVE_LONG["Dollar Profit"].append(0)
                Ledgers.ACTIVE_LONG["% Return"].append(1)

            #Open the second Long
            Ledgers.ACTIVE_LONG["Entry Price"].append(state)
            Ledgers.ACTIVE_LONG["Current Price"].append(state)
            Ledgers.ACTIVE_LONG["Dollar Profit"].append(0)
            Ledgers.ACTIVE_LONG["% Return"].append(1)

            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))
            Ledgers.HIST['Position Type'].append(Positions.LONG)
            Ledgers.HIST['Dollar Profit, Realized'] -= fees * 2
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])

            # print(Ledgers.ACTIVE_SHORT['Entry Price'])
            assert(len(Ledgers.ACTIVE_SHORT['Entry Price'])) == 0

            return Positions.LONG, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], True

    elif action == Actions.DOUBLE_SELL:
        Ledgers.HIST['Time Index'].append(index)
        #Update the counter
        Positions.COUNTER -= 2
        if position == Positions.LONG and Positions.COUNTER >= 0:  #CLOSING A POSITION BECAUSE WE ARE IN THE LONG
            #Save the profit of the earliest Long position & update profits positions
            realized_profit_0 = Ledgers.ACTIVE_LONG['Dollar Profit'][0]
            realized_profit_1 = Ledgers.ACTIVE_LONG['Dollar Profit'][1]
            Ledgers.HIST['Dollar Profit, Realized'] += realized_profit_0 + realized_profit_1 - fees * 2

            #Remove the earliest position : first in, first out
            Ledgers.ACTIVE_LONG = {key: value[2:] for key, value in Ledgers.ACTIVE_LONG.items()}

            #Update Unrealized Profits
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])
            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))

            if Positions.COUNTER > 0:
                Ledgers.HIST['Position Type'].append(Positions.LONG)
                assert len(Ledgers.ACTIVE_LONG['Entry Price']) > 0 #check
                return Positions.LONG, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

            elif Positions.COUNTER == 0:
                Ledgers.HIST['Position Type'].append(Positions.FLAT)
                assert len(Ledgers.ACTIVE_LONG['Entry Price']) == 0 #check
                return Positions.FLAT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

        else:  #OPENING A POSITION BECAUSE WE WERE SHORT OR FLAT
            if Positions.COUNTER + 2 == 1: # that means we were Long 1 unit before and must match it / close it with a Short FIFO mode
                #First closing:
                #Remove the earliest position : first in, first out
                realized_profit = Ledgers.ACTIVE_LONG['Dollar Profit'][0]
                Ledgers.HIST['Dollar Profit, Realized'] += realized_profit - fees
                Ledgers.ACTIVE_LONG = {key: value[1:] for key, value in Ledgers.ACTIVE_LONG.items()}

            else: #First Shorting
                Ledgers.ACTIVE_SHORT["Entry Price"].append(state)
                Ledgers.ACTIVE_SHORT["Current Price"].append(state)
                Ledgers.ACTIVE_SHORT["Dollar Profit"].append(0)
                Ledgers.ACTIVE_SHORT["% Return"].append(1)

            #Second Shorting
            Ledgers.ACTIVE_SHORT["Entry Price"].append(state)
            Ledgers.ACTIVE_SHORT["Current Price"].append(state)
            Ledgers.ACTIVE_SHORT["Dollar Profit"].append(0)
            Ledgers.ACTIVE_SHORT["% Return"].append(1)

            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))
            Ledgers.HIST['Position Type'].append(Positions.SHORT)
            Ledgers.HIST['Dollar Profit, Realized'] -= fees * 2
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])

            assert len(Ledgers.ACTIVE_LONG['Current Price']) == 0

            return Positions.SHORT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], True

    elif action == Actions.COMBO_BUY:
        Ledgers.HIST['Time Index'].append(index)
        #Update the counter
        Positions.COUNTER += 6
        if position == Positions.SHORT and Positions.COUNTER <= 0:  #CLOSING x POSITIONS BECAUSE WE ARE IN THE SHORT
            #Save the profit of the earliest Long position & update profits positions
            realized_profit_temp = 0
            for i in range(6):
                realized_profit_temp =+ Ledgers.ACTIVE_SHORT['Dollar Profit'][i]

            Ledgers.HIST['Dollar Profit, Realized'] += realized_profit_temp - fees * 6

            #Remove the earliest position : first in, first out
            # print(Ledgers.ACTIVE_LONG['Current Price'])
            Ledgers.ACTIVE_SHORT = {key: value[6:] for key, value in Ledgers.ACTIVE_SHORT.items()}

            #Update Unrealized Profits
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])


            if Positions.COUNTER < 0:
                #Updating Hist Ledger
                Ledgers.HIST['Entry Price'].append(state)
                Ledgers.HIST['Action Type'].append(int(action))
                Ledgers.HIST['Position Type'].append(Positions.SHORT)

                # print(Ledgers.ACTIVE_SHORT['Entry Price'])
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) > 0 #check

                return Positions.SHORT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

            elif Positions.COUNTER == 0:
                #Updating Hist Ledger
                Ledgers.HIST['Entry Price'].append(state)
                Ledgers.HIST['Action Type'].append(int(action))
                Ledgers.HIST['Position Type'].append(Positions.FLAT)

                # print(Ledgers.ACTIVE_SHORT['Entry Price'])
                assert len(Ledgers.ACTIVE_SHORT['Entry Price']) == 0 #check

                return Positions.FLAT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False

        else:  #OPENING A POSITION BECAUSE WE WERE LONG OR FLAT
            if 0 < Positions.COUNTER < 6: # that means we were Short Position.Count - 6 units before and must match it / close it with a Long in FIFO mode
                #Closing the last Short position:
                realized_profit_temp = 0
                indexer = abs(Positions.COUNTER - 6)
                for i in range(indexer):
                    realized_profit_temp += Ledgers.ACTIVE_SHORT['Dollar Profit'][i]

                Ledgers.HIST['Dollar Profit, Realized'] += realized_profit_temp - fees * indexer
                #Remove the earliest position : first in, first out
                Ledgers.ACTIVE_SHORT = {key: value[indexer:] for key, value in Ledgers.ACTIVE_SHORT.items()}

                #Open the rest of the longs
                for i in range(6 - indexer):
                    Ledgers.ACTIVE_LONG["Entry Price"].append(state)
                    Ledgers.ACTIVE_LONG["Current Price"].append(state)
                    Ledgers.ACTIVE_LONG["Dollar Profit"].append(0)
                    Ledgers.ACTIVE_LONG["% Return"].append(1)

            else: #Updating Long positions
                #to improve : simulate order book thinening
                for i in range(6):
                    Ledgers.ACTIVE_LONG["Entry Price"].append(state)
                    Ledgers.ACTIVE_LONG["Current Price"].append(state)
                    Ledgers.ACTIVE_LONG["Dollar Profit"].append(0)
                    Ledgers.ACTIVE_LONG["% Return"].append(1)


            Ledgers.HIST['Entry Price'].append(state)
            Ledgers.HIST['Action Type'].append(int(action))
            Ledgers.HIST['Position Type'].append(Positions.LONG)
            Ledgers.HIST['Dollar Profit, Realized'] -= fees * 2
            Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])

            # print(Ledgers.ACTIVE_SHORT['Entry Price'])
            assert(len(Ledgers.ACTIVE_SHORT['Entry Price'])) == 0

            return Positions.LONG, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], True

    elif action == Actions.CLOSE_ALL:
        Ledgers.HIST['Time Index'].append(index)
        Ledgers.HIST['Entry Price'].append(state)
        Ledgers.HIST['Action Type'].append(int(action))
        Ledgers.HIST['Position Type'].append(Positions.FLAT)
        Ledgers.HIST['Dollar Profit, Unrealized'] = sum(Ledgers.ACTIVE_LONG['Dollar Profit']) + sum(Ledgers.ACTIVE_SHORT['Dollar Profit'])
        Ledgers.HIST['Dollar Profit, Realized'] += (Ledgers.HIST['Dollar Profit, Unrealized']
                                                    - fees * (abs(Positions.COUNTER)))

        #Penalty to simulate cost of market impact from market/panic selling:
        if Ledgers.HIST['Dollar Profit, Realized'] > 0:
            Ledgers.HIST['Dollar Profit, Realized'] *= (1 - market_impact)
        else:
            Ledgers.HIST['Dollar Profit, Realized'] *= (1 + market_impact)

        Ledgers.HIST['Dollar Profit, Unrealized'] = 0

        #Resetting:
        Ledgers.ACTIVE_LONG = {
            "Entry Price" : [],
            "Current Price" : [],
            "Dollar Profit" : [],
            "% Return" : []
        }
        Ledgers.ACTIVE_SHORT = {
            "Entry Price" : [],
            "Current Price" : [],
            "Dollar Profit" : [],
            "% Return" : []
        }

        Positions.COUNTER = 0

        return Positions.FLAT, Ledgers.HIST['Dollar Profit, Realized'], Ledgers.HIST['Dollar Profit, Unrealized'], False



In [30]:
#ENVIRONMENT

class Environment:
    def __init__(self, data, aggressive = False):
        self.data = data
        self.reward_f = "profit"
        self.aggressive = aggressive
        self.reset()

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def reset(self):
        self.t = 13
        self.done = False
        self.aggressive = False
        self.agent_pos = Positions.FLAT
        Positions.COUNTER = 0
        self.init_price = self.data.iloc[0, :]['close']


        Ledgers.ACTIVE_LONG = {
            "Entry Price" : [],
            "Current Price" : [],
            "Dollar Profit" : [],
            "% Return" : []
        }

        Ledgers.ACTIVE_SHORT = {
            "Entry Price" : [],
            "Current Price" : [],
            "Dollar Profit" : [],
            "% Return" : []
        }

        Ledgers.HIST = {
            "Entry Price" : [],
            "Action Type" : [],
            "Position Type" : [],
            "Dollar Profit, Realized"   : 0,
            "Dollar Profit, Unrealized" : 0,
            "Time Index" : []
        }

        self.agent_init_pos_real = Ledgers.HIST['Dollar Profit, Realized']
        self.agent_init_pos_unreal = Ledgers.HIST['Dollar Profit, Realized']
        self.agent_pos_total = 0 # realized + unrealized

        self.profits            = [0 for e in range(len(self.data))]
        self.cumulative_return  = [1 for e in range(len(self.data))]

    def get_state(self):
        if not self.done:
            return torch.tensor([price for price in self.data.iloc[self.t - 13:self.t + 1, :]['close']], device=self.device,
                                dtype=torch.float)
        else:
            return None

    def step(self, act):

        reward = 0

        # GET CURRENT STATE
        state = self.data.iloc[self.t, :]['close']
        data_idx = self.data.index
        index = data_idx[self.t]

        # NEW ACTIONS :
        # print()
        # print()
        # print(f'Agent Position, before       : {self.agent_pos}')
        # print(f'Position Counter, before     : {Positions.COUNTER}')
        # print(f'Action Chosen                : {act}')
        self.agent_pos, _realized_profits, _unrealized_profits, _ = transform(self.agent_pos, act, state, index)
        # print(f'Agent Position               : {self.agent_pos}')
        # print(f'Position Counter             : {Positions.COUNTER}')
        # print(f'Agent Realized Profits       : {_realized_profits}')
        # print(f'Agent Unrealized Profits     : {_unrealized_profits}')
        # print(f'Ledger Hist $ Prof Realized  : {Ledgers.HIST["Dollar Profit, Realized"]}')
        # print(f'Ledger Hist $ Prof Unrealized: {Ledgers.HIST["Dollar Profit, Unrealized"]}')
        # print(f'Ledger Hist Entry Price      : {Ledgers.HIST["Entry Price"][::-1]}')
        # print(f'Ledger Hist Action Type      : {Ledgers.HIST["Action Type"][::-1]}')
        # print(f'Ledger Hist Position Type    : {Ledgers.HIST["Position Type"][::-1]}')
        # print(f'Ledger Active Long CP        : {Ledgers.ACTIVE_LONG["Current Price"][::-1]}')
        # print(f'Ledger Active Long EP        : {Ledgers.ACTIVE_LONG["Entry Price"][::-1]}')
        # print(f'Ledger Active Short CP       : {Ledgers.ACTIVE_SHORT["Current Price"][::-1]}')
        # print(f'Ledger Active Short EP       : {Ledgers.ACTIVE_SHORT["Entry Price"][::-1]}')

        self.profits[self.t] = _realized_profits + _unrealized_profits

        self.agent_pos_total += Ledgers.HIST['Dollar Profit, Realized'] + Ledgers.HIST['Dollar Profit, Realized']


        self.cumulative_return[self.t] += (reduce(lambda x, y : x * y , Ledgers.ACTIVE_LONG['% Return'], 1)
                                           * reduce(lambda x , y : x * y, Ledgers.ACTIVE_SHORT['% Return'], 1))

        # COLLECT THE REWARD
        reward = 0
        risk_free_rate = 0.03
        annual_factor = 252

        if self.reward_f == "profit":
            curr_profits = self.profits[self.t]
            if curr_profits > 0:
                reward = 10
            elif curr_profits < 0:
                reward = -10
            elif curr_profits == 0:
                if self.aggressive:
                    reward = -2
                else:
                    reward = 0

        if self.agent_pos == Positions.FLAT and (reward > -5): #penalize not trying to improve
            reward = -5

        # UPDATE THE STATE
        self.t += 1

        if (self.t == len(self.data) - 1):
            self.done = True

        return torch.tensor([reward], device=self.device, dtype=torch.float), self.done, torch.tensor([state],
                                                                                                 dtype=torch.float)  # reward, done, current_state

In [13]:
#AGENT

class Agent:

    def __init__(self,
                 ACTION_NUMBER=len(list(Actions)),
                 REPLAY_MEM_SIZE=100,
                 BATCH_SIZE=40,
                 DISCOUNT=0.98,
                 EPS_START=1,
                 EPS_END=0.12,
                 EPS_STEPS=300,
                 LEARNING_RATE=0.001,
                 INPUT_DIM=14,
                 HIDDEN_DIM=120,
                 TARGET_UPDATE=10,
                 MODEL = 'ConvDQN'):

        self.ACTION_NUMBER = ACTION_NUMBER
        self.REPLAY_MEM_SIZE = REPLAY_MEM_SIZE
        self.BATCH_SIZE = BATCH_SIZE
        self.DISCOUNT = DISCOUNT
        self.EPS_START = EPS_START
        self.EPS_END = EPS_END
        self.EPS_STEPS = EPS_STEPS
        self.LEARNING_RATE = LEARNING_RATE
        self.INPUT_DIM = INPUT_DIM
        self.HIDDEN_DIM = HIDDEN_DIM
        self.TARGET_UPDATE = TARGET_UPDATE
        self.MODEL = MODEL
        self.DOUBLE = False

        if self.MODEL == 'LinearDuelingDQN' or self.MODEL == 'ConvDuelingDQN':
          self.DOUBLE = True

        self.ACTION_LIST = list(Actions)
        self.TRAINING = True  # to do not pick random actions during testing

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("Agent is using device:\t" + str(self.device))

        if self.MODEL == 'RandomWalk' :
            self.policy_net = RandomWalk()
            self.model_name = '_RandomWalk_'

        elif self.MODEL == 'MomentumFollowing':
            self.policy_net = MomentumFollowing()
            self.model_name = '_MomentumFollowing_'

        else:
            if self.MODEL == 'ConvDQN':
                self.policy_net = ConvDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                self.target_net = ConvDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                self.model_name = '_ConvDQN_'
            elif self.MODEL == 'ConvDuelingDQN':
                self.policy_net = ConvDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                self.target_net = ConvDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                self.model_name = '_ConvDuelingDQN_'
            elif self.MODEL == 'LinearDuelingDQN':
                self.policy_net = LinearDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                self.target_net = LinearDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                self.model_name = '_LinearDuelingDQN_'



            self.target_net.load_state_dict(self.policy_net.state_dict())
            self.target_net.eval()

            self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.LEARNING_RATE)

        self.memory = ReplayMemory(self.REPLAY_MEM_SIZE)
        self.steps_done = 0
        self.training_cumulative_reward = []

    def select_action(self, state):
        """ the epsilon-greedy action selection"""
        state = state.unsqueeze(0).unsqueeze(1)
        # print(f'State shape : {state.shape}')
        sample = random.random()
        if self.TRAINING:
            if self.steps_done > self.EPS_STEPS:
                eps_threshold = self.EPS_END
            else:
                eps_threshold = self.EPS_START
        else:
            eps_threshold = self.EPS_END

        self.steps_done += 1

        if self.MODEL == 'ConvDQN' or self.MODEL == 'ConvDuelingDQN' or self.MODEL == 'LinearDuelingDQN':
            # [Exploitation] pick the best action according to current Q approx.
            if sample > eps_threshold:
                with torch.no_grad():
                    return torch.tensor([self.policy_net(state).argmax()], device=self.device, dtype=torch.long)

            # [Exploration]  pick a random action from the action space
            else:
                return torch.tensor([random.choice(self.ACTION_LIST)], device=self.device, dtype=torch.long)

        else:
            if self.MODEL == 'RandomWalk':
                temp = self.policy_net.random_selection()
                return torch.tensor([temp], device=self.device, dtype=torch.long)
                # return self.policy_net.random_selection()
            elif self.MODEL == 'MomentumFollowing':
                temp = self.policy_net.trend_selection(state, cut_out = 5)
                return torch.tensor([temp], device=self.device, dtype=torch.long)
                # return self.policy_net.trend_selection(state, cut_out = 5)

    def optimize_model(self):
        if len(self.memory) < self.BATCH_SIZE:
            # it will return without doing nothing if we have not enough data to sample
            return
        transitions = self.memory.sample(self.BATCH_SIZE)

        batch = Transition(*zip(*transitions))

        # Compute a mask of non-final states and concatenate the batch elements
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=self.device,
                                      dtype=torch.bool)
        nfns = [s for s in batch.next_state if s is not None]
        # nfns = []
        # for s in batch.next_state:
        #     if s is not None:
        #         print(s.shape)
        #         nfns.append(s)

        non_final_next_states = torch.cat(nfns).view(len(nfns), -1)
        non_final_next_states = non_final_next_states.unsqueeze(1)

        state_batch = torch.cat(batch.state).view(self.BATCH_SIZE, -1)
        state_batch = state_batch.unsqueeze(1)
        action_batch = torch.cat(batch.action).view(self.BATCH_SIZE, -1)
        reward_batch = torch.cat(batch.reward).view(self.BATCH_SIZE, -1)

        # Compute Q(s_t, a)
        #print(state_batch.shape)
        #print(action_batch.shape)
        state_action_values = self.policy_net(state_batch).gather(1, action_batch)

        if self.DOUBLE: #for Dueling networks
            _, next_state_action = self.policy_net(state_batch).max(1, keepdim=True)

            next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device).view(self.BATCH_SIZE, -1)

            out = self.target_net(non_final_next_states)
            next_state_values[non_final_mask] = out.gather(1, next_state_action[non_final_mask])

        if not self.DOUBLE:
            # Compute V(s_{t+1}) for all next states.
            next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device)
            next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
            next_state_values = next_state_values.view(self.BATCH_SIZE, -1)

        # Compute V(s_{t+1}) for all next states.
        next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device)
        next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
        next_state_values = next_state_values.view(self.BATCH_SIZE, -1)

        # Compute the expected Q values
        expected_state_action_values = (next_state_values * self.DISCOUNT) + reward_batch

        # Compute MSE loss
        loss = F.mse_loss(state_action_values,
                          expected_state_action_values)

        # Optimize model
        self.optimizer.zero_grad()
        loss.backward()
        for param in self.policy_net.parameters():
            param.grad.data.clamp_(-1, 1)
        self.optimizer.step()

    def train(self, env, path, num_episodes=30):
        self.TRAINING = True
        cumulative_reward = [0 for t in range(num_episodes)]
        print("Training:")
        if (self.MODEL != 'RandomWalk') and (self.MODEL != 'MomentumFollowing'):
            for i_episode in tqdm(range(num_episodes)):
                # Initialize the environment and state
                env.reset()  # reset the env st it is set at the beginning of the time serie
                self.steps_done = 0
                state = env.get_state()
                for t in range(len(env.data)):  # while not env.done
                    # Select and perform an action
                    action = self.select_action(state)
                    reward, done, _ = env.step(action)

                    cumulative_reward[i_episode] += reward.item()

                    if done:
                        break

                    else:
                        next_state = env.get_state()

                        # Store the transition in memory
                        self.memory.push(state, action, next_state, reward)

                        # Move to the next state
                        state = next_state

                        self.optimize_model()

                # Update the target network, copying all weights and biases of policy_net
                if i_episode % self.TARGET_UPDATE == 0:
                    self.target_net.load_state_dict(self.policy_net.state_dict())

            # save the model
            model_name = env.reward_f + self.model_name
            count = 0
            while os.path.exists(path + model_name):  # avoid overrinding models
                count += 1
                model_name = model_name + "_" + str(count)

            torch.save(self.policy_net.state_dict(), path + model_name)

        else: #Baseline Models
            for i_episode in tqdm(range(num_episodes)):
                # Initialize the environment and state
                env.reset()  # reset the env st it is set at the beginning of the time serie
                self.steps_done = 0
                state = env.get_state()

                for t in range(len(env.data)):  # while not env.done
                    # Select and perform an action
                    action = self.select_action(state)
                    reward, done, _ = env.step(action)

                    cumulative_reward[i_episode] += reward.item()

                    if done:
                        break

                    else:
                        next_state = env.get_state()

                        # Store the transition in memory
                        self.memory.push(state, action, next_state, reward)

                        # Move to the next state
                        state = next_state

        return cumulative_reward

    def test(self, env_test, path=None):
        self.TRAINING = False
        cumulative_reward = [0 for _ in range(len(env_test.data))]
        reward_list = [0 for _ in range(len(env_test.data))]

        if (self.MODEL != 'RandomWalk') and (self.MODEL != 'MomentumFollowing'):
            if self.model_name is None:
                pass
            elif path is not None:
                if re.match(".*_ConvDQN_.*", self.model_name) or re.match(".*ConvDuelingDQN.*", self.model_name) or re.match(".*LinearDuelingDQN.*", self.model_name):
                  if re.match(".*_ConvDQN_.*", self.model_name):
                      self.policy_net = ConvDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                  elif re.match(".*ConvDuelingDQN.*", self.model_name):
                      self.policy_net = ConvDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
                  elif re.match(".*LinearDuelingDQN.*", self.model_name):
                      self.policy_net = LinearDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)


                  if str(self.device) == "cuda":
                      self.policy_net.load_state_dict(torch.load(path + "profit" + self.model_name))
                  else:
                      self.policy_net.load_state_dict(torch.load(path + "profit" + self.model_name, map_location=torch.device('cpu')))
                else:
                      raise RuntimeError("Please Provide a valid model name or valid path.")
            else:
                raise RuntimeError('Path can not be None if model Name is not None.')

        env_test.reset()
        state = env_test.get_state()
        for t in tqdm(range(len(env_test.data))):  # while not env.done

            # Select and perform an action
            action = self.select_action(state)

            reward, done, _ = env_test.step(action)

            cumulative_reward[t] += reward.item() + cumulative_reward[t - 1 if t - 1 > 0 else 0]
            reward_list[t] = reward

            next_state = env_test.get_state()
            state = next_state

            if done:
                break

        return cumulative_reward, reward_list


In [33]:
analytic_dic = {
    "5MIN" : {
        'LinearDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'MomentumFollowing' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'RandomWalk' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
    },
    "15MIN" : {
        'LinearDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'MomentumFollowing' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'RandomWalk' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
    },
    "1H" : {
        'LinearDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'MomentumFollowing' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'RandomWalk' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
    },
    "6H" : {
        'LinearDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'MomentumFollowing' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'RandomWalk' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
    },
    "1D" : {
        'LinearDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDuelingDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'MomentumFollowing' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'RandomWalk' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
        'ConvDQN' : {
            "Time Index" : None,
            "Price" : None,
            "Profit" : None,
            "Action Type" : None,
            "Position Type" : None,
            "Time Action Idx" : None
        },
    },
}

def main_():
    global analytic_dic
    #----------------------------- LOAD DATA ---------------------------------------------------------------------------
    path = './'


    # ----------------------------- AGENTS INPUT --------------------------------
    # ACTION_NUMBER = len(list(Actions))
    ACTION_NUMBER = len(list(Actions))
    REPLAY_MEM_SIZE = 100
    BATCH_SIZE = 10
    DISCOUNT = 0.98
    EPS_START = 1
    EPS_END = 0.12
    EPS_STEPS = 100
    LEARNING_RATE = 0.001
    INPUT_DIM = 14
    HIDDEN_DIM = 120
    TARGET_UPDATE = 10
    N_TEST = 1
    TRADING_PERIOD = 1000
    MODEL_LIST = ['LinearDuelingDQN','ConvDuelingDQN','MomentumFollowing', 'RandomWalk', 'ConvDQN']

        # "Action Type" : [],
        # "Position Type" : [],


    df_list_names = ["5MIN","15MIN","1H","6H","1D"]
    for i, df_ in enumerate(df_list):
      print()
      print()
      print(df_list_names[i])
      index = random.randrange(len(df_) - TRADING_PERIOD - 1)
      for MODEL in MODEL_LIST:
          dqn_agent = Agent(ACTION_NUMBER,
                          REPLAY_MEM_SIZE,
                          BATCH_SIZE,
                          DISCOUNT,
                          EPS_START,
                          EPS_END,
                          EPS_STEPS,
                          LEARNING_RATE,
                          INPUT_DIM,
                          HIDDEN_DIM,
                          TARGET_UPDATE,
                          MODEL)

          train_size = int(TRADING_PERIOD * 0.8)
          profit_dqn_return = []

          profit_train_env = Environment(df_[index:index + train_size], "profit")

          # Profit Double DQN
          cr_profit_dqn = dqn_agent.train(profit_train_env, path)
          profit_train_env.reset()
          print()
          j = 0
          while j < N_TEST:
              print("Test nr. %s" % str(i+1))
              # index = random.randrange(len(df) - TRADING_PERIOD - 1)

              profit_test_env = Environment(df_[index + train_size:index + TRADING_PERIOD], "profit")

              # Profit Double DQN
              cr_profit_dqn_test, _ = dqn_agent.test(profit_test_env , path=path)
              profit_dqn_return.append(profit_test_env.profits)
              # profit_dqn_return = profit_dqn_return[0][:-1]
              del profit_dqn_return[0][-1]

              slice_df = df_[index + train_size:index + TRADING_PERIOD]

              analytic_dic[df_list_names[i]][MODEL]["Time Index"] = slice_df.index
              analytic_dic[df_list_names[i]][MODEL]["Price"] = slice_df.close
              analytic_dic[df_list_names[i]][MODEL]["Profit"] = profit_dqn_return
              analytic_dic[df_list_names[i]][MODEL]["Action Type"] = list(Ledgers.HIST['Action Type'])
              analytic_dic[df_list_names[i]][MODEL]["Entry Price"] = list(Ledgers.HIST['Entry Price'])
              analytic_dic[df_list_names[i]][MODEL]["Position Type"] = list(Ledgers.HIST['Position Type'])
              analytic_dic[df_list_names[i]][MODEL]["Time Action Idx"] = list(Ledgers.HIST['Time Index'])

              profit_test_env.reset()


              #--------------------------------------- Print Test Stats ---------------------------------------------------------
              t = PrettyTable(["Trading System", "Avg. Return ($)", "Max Return ($)", "Min Return ($)", "Std. Dev."])
              print_stats(f'Profit {MODEL}', profit_dqn_return, t)

              print(t)


              # plot_pnl("Profit C-DQN", profit_dqn_return, slice)
              # print(len(temp_lst))

              j += 1




if __name__ == "__main__":
    main_()



5MIN
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:05<00:00,  4.17s/it]



Test nr. 1


 92%|█████████▎| 185/200 [00:00<00:00, 855.01it/s]


+-------------------------+-----------------+----------------+----------------+-----------+
|      Trading System     | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------------+-----------------+----------------+----------------+-----------+
| Profit LinearDuelingDQN |      432.18     |    3899.05     |    -2658.94    |  1767.08  |
+-------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:18<00:00,  4.63s/it]



Test nr. 1


 92%|█████████▎| 185/200 [00:00<00:00, 865.17it/s]


+-----------------------+-----------------+----------------+----------------+-----------+
|     Trading System    | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-----------------------+-----------------+----------------+----------------+-----------+
| Profit ConvDuelingDQN |     -524.48     |     335.53     |    -2208.30    |   574.75  |
+-----------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:22<00:00,  1.33it/s]



Test nr. 1


 92%|█████████▎| 185/200 [00:00<00:00, 800.80it/s]


+--------------------------+-----------------+----------------+----------------+-----------+
|      Trading System      | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+--------------------------+-----------------+----------------+----------------+-----------+
| Profit MomentumFollowing |     -904.53     |     193.70     |    -1919.31    |   660.89  |
+--------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:17<00:00,  1.72it/s]



Test nr. 1


 92%|█████████▎| 185/200 [00:00<00:00, 1433.61it/s]


+-------------------+-----------------+----------------+----------------+-----------+
|   Trading System  | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------+-----------------+----------------+----------------+-----------+
| Profit RandomWalk |     -1379.95    |      0.00      |    -2579.59    |   705.57  |
+-------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [01:47<00:00,  3.59s/it]



Test nr. 1


 92%|█████████▎| 185/200 [00:00<00:00, 709.15it/s]


+----------------+-----------------+----------------+----------------+-----------+
| Trading System | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+----------------+-----------------+----------------+----------------+-----------+
| Profit ConvDQN |     -145.04     |     241.97     |    -1494.78    |   244.70  |
+----------------+-----------------+----------------+----------------+-----------+


15MIN
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:03<00:00,  4.13s/it]



Test nr. 2


 92%|█████████▎| 185/200 [00:00<00:00, 939.30it/s]


+-------------------------+-----------------+----------------+----------------+-----------+
|      Trading System     | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------------+-----------------+----------------+----------------+-----------+
| Profit LinearDuelingDQN |     -2294.37    |    2602.00     |   -10407.15    |  2461.71  |
+-------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:17<00:00,  4.59s/it]



Test nr. 2


 92%|█████████▎| 185/200 [00:00<00:00, 875.90it/s]


+-----------------------+-----------------+----------------+----------------+-----------+
|     Trading System    | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-----------------------+-----------------+----------------+----------------+-----------+
| Profit ConvDuelingDQN |     2325.23     |    10267.58    |    -1662.92    |  2197.45  |
+-----------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:22<00:00,  1.31it/s]



Test nr. 2


 92%|█████████▎| 185/200 [00:00<00:00, 1049.57it/s]


+--------------------------+-----------------+----------------+----------------+-----------+
|      Trading System      | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+--------------------------+-----------------+----------------+----------------+-----------+
| Profit MomentumFollowing |     -2532.45    |     135.64     |    -5658.37    |  1537.16  |
+--------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:17<00:00,  1.75it/s]



Test nr. 2


 92%|█████████▎| 185/200 [00:00<00:00, 1402.15it/s]


+-------------------+-----------------+----------------+----------------+-----------+
|   Trading System  | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------+-----------------+----------------+----------------+-----------+
| Profit RandomWalk |     -1218.42    |     456.13     |    -2753.69    |   912.68  |
+-------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [01:48<00:00,  3.62s/it]



Test nr. 2


 92%|█████████▎| 185/200 [00:00<00:00, 934.27it/s]


+----------------+-----------------+----------------+----------------+-----------+
| Trading System | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+----------------+-----------------+----------------+----------------+-----------+
| Profit ConvDQN |     -1687.49    |     746.31     |    -5410.55    |  2331.87  |
+----------------+-----------------+----------------+----------------+-----------+


1H
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:03<00:00,  4.13s/it]



Test nr. 3


 92%|█████████▎| 185/200 [00:00<00:00, 838.26it/s]


+-------------------------+-----------------+----------------+----------------+-----------+
|      Trading System     | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------------+-----------------+----------------+----------------+-----------+
| Profit LinearDuelingDQN |     -1761.18    |      0.00      |    -4746.83    |  1234.43  |
+-------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:17<00:00,  4.58s/it]



Test nr. 3


 92%|█████████▎| 185/200 [00:00<00:00, 813.71it/s]


+-----------------------+-----------------+----------------+----------------+-----------+
|     Trading System    | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-----------------------+-----------------+----------------+----------------+-----------+
| Profit ConvDuelingDQN |     -963.35     |      0.00      |    -2390.98    |   694.70  |
+-----------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:22<00:00,  1.34it/s]



Test nr. 3


 92%|█████████▎| 185/200 [00:00<00:00, 764.28it/s]


+--------------------------+-----------------+----------------+----------------+-----------+
|      Trading System      | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+--------------------------+-----------------+----------------+----------------+-----------+
| Profit MomentumFollowing |     -1519.11    |      0.00      |    -3238.61    |   993.33  |
+--------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:17<00:00,  1.72it/s]



Test nr. 3


 92%|█████████▎| 185/200 [00:00<00:00, 1460.95it/s]


+-------------------+-----------------+----------------+----------------+-----------+
|   Trading System  | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------+-----------------+----------------+----------------+-----------+
| Profit RandomWalk |     -1228.47    |      0.00      |    -2725.21    |   883.73  |
+-------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [01:47<00:00,  3.57s/it]



Test nr. 3


 92%|█████████▎| 185/200 [00:00<00:00, 938.98it/s]


+----------------+-----------------+----------------+----------------+-----------+
| Trading System | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+----------------+-----------------+----------------+----------------+-----------+
| Profit ConvDQN |     -1083.14    |      0.00      |    -2293.60    |   770.53  |
+----------------+-----------------+----------------+----------------+-----------+


6H
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:03<00:00,  4.13s/it]



Test nr. 4


 92%|█████████▎| 185/200 [00:00<00:00, 921.46it/s]


+-------------------------+-----------------+----------------+----------------+-----------+
|      Trading System     | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------------+-----------------+----------------+----------------+-----------+
| Profit LinearDuelingDQN |     -338.84     |     539.51     |    -1732.46    |   509.73  |
+-------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:17<00:00,  4.59s/it]



Test nr. 4


 92%|█████████▎| 185/200 [00:00<00:00, 801.00it/s]


+-----------------------+-----------------+----------------+----------------+-----------+
|     Trading System    | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-----------------------+-----------------+----------------+----------------+-----------+
| Profit ConvDuelingDQN |     -1127.94    |      0.00      |    -2660.31    |   678.71  |
+-----------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:23<00:00,  1.30it/s]



Test nr. 4


 92%|█████████▎| 185/200 [00:00<00:00, 1067.83it/s]


+--------------------------+-----------------+----------------+----------------+-----------+
|      Trading System      | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+--------------------------+-----------------+----------------+----------------+-----------+
| Profit MomentumFollowing |     -453.38     |     404.93     |    -1648.13    |   474.40  |
+--------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:17<00:00,  1.74it/s]



Test nr. 4


 92%|█████████▎| 185/200 [00:00<00:00, 1382.25it/s]


+-------------------+-----------------+----------------+----------------+-----------+
|   Trading System  | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------+-----------------+----------------+----------------+-----------+
| Profit RandomWalk |     -1730.07    |      0.00      |    -3565.08    |  1089.33  |
+-------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [01:48<00:00,  3.61s/it]



Test nr. 4


 92%|█████████▎| 185/200 [00:00<00:00, 897.47it/s]


+----------------+-----------------+----------------+----------------+-----------+
| Trading System | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+----------------+-----------------+----------------+----------------+-----------+
| Profit ConvDQN |      15.35      |     802.51     |    -1821.20    |   524.18  |
+----------------+-----------------+----------------+----------------+-----------+


1D
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:04<00:00,  4.15s/it]



Test nr. 5


 92%|█████████▎| 185/200 [00:00<00:00, 910.28it/s]


+-------------------------+-----------------+----------------+----------------+-----------+
|      Trading System     | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------------+-----------------+----------------+----------------+-----------+
| Profit LinearDuelingDQN |    -17239.02    |    23179.56    |   -88673.97    |  22796.36 |
+-------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [02:17<00:00,  4.59s/it]



Test nr. 5


 92%|█████████▎| 185/200 [00:00<00:00, 774.83it/s]


+-----------------------+-----------------+----------------+----------------+-----------+
|     Trading System    | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-----------------------+-----------------+----------------+----------------+-----------+
| Profit ConvDuelingDQN |    -43162.95    |   391430.81    |   -700466.11   | 244463.84 |
+-----------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:22<00:00,  1.35it/s]



Test nr. 5


 92%|█████████▎| 185/200 [00:00<00:00, 753.38it/s]


+--------------------------+-----------------+----------------+----------------+-----------+
|      Trading System      | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+--------------------------+-----------------+----------------+----------------+-----------+
| Profit MomentumFollowing |     7107.32     |    28188.10    |   -21889.11    |  12228.84 |
+--------------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [00:17<00:00,  1.69it/s]



Test nr. 5


 92%|█████████▎| 185/200 [00:00<00:00, 1371.72it/s]


+-------------------+-----------------+----------------+----------------+-----------+
|   Trading System  | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+-------------------+-----------------+----------------+----------------+-----------+
| Profit RandomWalk |    -12340.76    |    8631.09     |   -25933.46    |  7714.79  |
+-------------------+-----------------+----------------+----------------+-----------+
Agent is using device:	cuda
Training:


100%|██████████| 30/30 [01:53<00:00,  3.78s/it]



Test nr. 5


 92%|█████████▎| 185/200 [00:00<00:00, 878.97it/s]

+----------------+-----------------+----------------+----------------+-----------+
| Trading System | Avg. Return ($) | Max Return ($) | Min Return ($) | Std. Dev. |
+----------------+-----------------+----------------+----------------+-----------+
| Profit ConvDQN |    116148.74    |   362024.94    |   -12534.56    |  73734.17 |
+----------------+-----------------+----------------+----------------+-----------+





In [None]:
analytic_dic

In [40]:
import pickle

# Save dictionary to a file using pickle
with open('analytic_dic.pkl', 'wb') as pickle_file:
    pickle.dump(analytic_dic, pickle_file)


In [51]:
import pickle

# Load dictionary from the pickle file
with open('analytic_dic.pkl', 'rb') as pickle_file:
    reloaded_dict = pickle.load(pickle_file)

# Print the reloaded dictionary
print(reloaded_dict["5MIN"]["LinearDuelingDQN"]["Action Type"])


[1, 0, 0, 0, 6, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 1, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 6, 0, 5, 0, 0, 0, 4, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 5, 4]
