In [39]:
from gym_trading_env.downloader import download
from datetime import datetime
import pandas as pd
import numpy as np
from ta.trend import IchimokuIndicator
from sklearn.preprocessing import StandardScaler
import gymnasium as gym
import gym_trading_env
import tensorflow as tf
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from collections import deque
from random import sample
from time import time

In [12]:
# download 1h timeframe btcusd historical data starting from 2020 Jan 1
download(exchange_names = ["binance"],
    symbols= ["BTC/USDT"],
    timeframe= "1h",
    dir = "data",
    since= datetime(year= 2020, month= 1, day=1),
)

BTC/USDT downloaded from binance and stored at data/binance-BTCUSDT-1h.pkl


In [13]:

# Import your data using pandas
df = pd.read_pickle("./data/binance-BTCUSDT-1h.pkl")

In [5]:
def add_epislon_to_zero(series: pd.Series):
  series[series.where(series == 0).index] += np.finfo(float).eps
  return series

  # Ocean Theory
def ocean_index(df,close_property, index_number, skip_log_difference = False):
  ocean_indices = pd.Series(np.zeros(len(df)))
  current_price = add_epislon_to_zero(df[close_property][index_number:])
  historical_price = add_epislon_to_zero(df.shift(index_number)[close_property][index_number:])
  
  if skip_log_difference:
    log_return = historical_price - current_price
  else:
    log_return =  np.log(historical_price) - np.log(current_price)

  ocean_indices.iloc[index_number:] = log_return / np.sqrt(index_number)
  return ocean_indices

# Natural Market Mirror
def natural_market_mirror(df,close_property, reachback):
    cumulative_indices = pd.Series(np.zeros(len(df)))
    nma = pd.Series(np.zeros(len(df)))
    for i in range(1, reachback+1):
      cumulative_indices += ocean_index(df,close_property,i)
    nma[reachback:] = cumulative_indices[reachback:] / reachback
    return nma

# Natural Market River
def natural_market_river(df,close_property, reachback, skip_log_difference = False):
  cumulative_indices = pd.Series(np.zeros(len(df)))
  nmr = pd.Series(np.zeros(len(df)))
  for i in range(1, reachback+1):
    cumulative_indices += (np.sqrt(i)-np.sqrt(i-1))*ocean_index(df,close_property,i, skip_log_difference)
  nmr[reachback:] = cumulative_indices[reachback:] / reachback
  return nmr

def exponential_moving_average(signal, points, exponential_const):
    """
    Calculate the N-point exponential moving average of a signal

    Inputs:
        signal: numpy array -   A sequence of price points in time
        points:      int    -   The size of the moving average
        exponential_const: numpy array    -   The smoothing factor

    Outputs:
        ma:     numpy array -   The moving average at each point in the signal
    """
    ema = np.zeros(len(signal))
    ema[0] = signal[0]

    for i in range(1, len(signal)):
      ema[i] = (signal[i] * exponential_const[i]) + (ema[i - 1] * (1 - exponential_const[i]))

    return pd.Series(ema)

def natural_moving_average(df, price_property, periods, skip_log_difference = False):
  nma = pd.Series(np.zeros(len(df)))
  o1_over_periods = ocean_index(df,price_property, 1, skip_log_difference).abs().rolling(min_periods=periods, window=periods).sum()[periods:]
  natural_market_river_o1 = natural_market_river(df, price_property,1, skip_log_difference).abs()[periods:]
  exponential_constant = natural_market_river_o1.divide(o1_over_periods)
  nma[periods:] = exponential_moving_average(df[price_property].iloc[periods:].to_numpy(),periods, exponential_constant.to_numpy())
  return nma

def smooth_function(df, property_name, start_component):
  smoothed_function = pd.Series(np.zeros(len(df)))
  x = df.index
  y = df[property_name]
  rft = np.fft.rfft(y) # perform real fourier transform
  rft[start_component:] = 0   # When to start removing components
  y_smooth = pd.Series(np.fft.irfft(rft)) # perform inverse fourier
  print(y_smooth)
  smoothed_function[1:] = y_smooth
  
  return smoothed_function


def print_nan_indices(series: pd.Series):
  nan_series = series[series.isnull()]
  print(nan_series.index)

def print_zero_indices(series: pd.Series):
  zero_series = series[series == 0]
  print(zero_series.index)

In [6]:
ichimoku_indicator = IchimokuIndicator(high=df.high, low=df.low)

In [7]:
df['natural_market_river'] = natural_market_river(df,'close', 10).values
df['natural_market_mirror'] = natural_market_mirror(df, 'close', 10).values

df['natural_market_mirror_nma'] = natural_moving_average(df, 'natural_market_mirror',20, True).values
df['natural_market_river_nma'] = natural_moving_average(df, 'natural_market_river',20, True).values

df['natural_market_mirror_nma_diff'] = df.natural_market_mirror_nma.diff().values
df['natural_market_river_nma_diff'] =  df.natural_market_river_nma.diff().values

df['senkou_a'] = ichimoku_indicator.ichimoku_a()
df['senkou_b'] = ichimoku_indicator.ichimoku_b()
df['kijun'] = ichimoku_indicator.ichimoku_base_line()
df['tenkan'] = ichimoku_indicator.ichimoku_conversion_line()

In [8]:
scaler = StandardScaler()
df[['open', 
    'high',
    'low',
    'close',
    'volume',
    'natural_market_river',
    'natural_market_mirror',
    'natural_market_mirror_nma',
    'natural_market_river_nma',
    'natural_market_mirror_nma_diff',
    'natural_market_river_nma_diff',
    'senkou_a',
    'senkou_b',
    'kijun',
    'tenkan',]] = scaler.fit_transform(df[['open', 
                      'high',
                      'low',
                      'close',
                      'volume',
                      'natural_market_river',
                      'natural_market_mirror',
                      'natural_market_mirror_nma',
                      'natural_market_river_nma',
                      'natural_market_mirror_nma_diff',
                      'natural_market_river_nma_diff',
                      'senkou_a',
                      'senkou_b',
                      'kijun',
                      'tenkan',]
                    ])


In [9]:
df.dropna(inplace=True)

In [65]:
env = gym.make("TradingEnv",
        name= "BTCUSD",
        df = df, # Your dataset with your custom features
        positions = [ -1, 0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        max_episode_steps=1000
    )

env.seed(42)

UserWarning: [33mWARN: env.seed to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.seed` for environment variables or `env.get_wrapper_attr('seed')` that will search the reminding wrappers.[0m

In [66]:
class TradingAgent:
    def __init__(self, state_dimensions,
                 num_actions,
                 learning_rate,
                 gamma,
                 epsilon_start,
                 epsilon_end,
                 epsilon_decay_steps,
                 epsilon_exponential_decay,
                 replay_capacity,
                 l2_reg,
                 tau,
                 batch_size):
        
        self.state_dimensions = state_dimensions
        self.num_actions = num_actions
        self.experience = deque([], maxlen=replay_capacity)
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.l2_reg = l2_reg

        # double DQN 2 models, 1 for predicting the values, another for defining the targets
        self.online_model = self.build_model()
        self.target_model = self.build_model(trainable=False)
        self.update_target_model()

        self.epsilon = epsilon_start
        self.epsilon_decay_steps = epsilon_decay_steps
        self.epsilon_decay = (epsilon_start - epsilon_end) / epsilon_decay_steps
        self.epsilon_exponential_decay = epsilon_exponential_decay
        self.epsilon_history = []

        self.total_steps = self.train_steps = 0
        self.episodes = self.episode_length = self.train_episodes = 0
        self.steps_per_episode = []
        self.episode_reward = 0
        self.rewards_history = []

        self.batch_size = batch_size
        self.tau = tau
        self.losses = []
        self.idx = tf.range(batch_size)
        self.train = True


    def build_model(self, trainable = True):
        model = Sequential([
            Dense(units=265, input_dim=self.state_dimensions, activation='relu', kernel_regularizer=l2(self.l2_reg), name=f'Dense_1', trainable=trainable),
            Dense(units=265, activation='relu', kernel_regularizer=l2(self.l2_reg), name=f'Dense_2', trainable=trainable),
            Dropout(.1),
            Dense(units=self.num_actions, trainable=trainable, name='Output')
        ])

        model.compile(loss='mean_squared-error', optimizer=Adam(learning_rate=self.learning_rate))
        return model
    
    def update_target_model(self):
        self.target_model.set_weights(self.online_model.get_weights())

    def epsilon_greedy_policy(self, state):
        self.total_steps +=1
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.num_actions)
        
        q_values = self.online_model.predict(state)
        return np.argmax(q_values, axis=1).squeeze()

    def experience_replay(self):
        if self.batch_size > len(self.experience):
            return
        
        minibatch = map(np.array, zip(*sample(self.experience, self.batch_size)))
        states, actions, rewards, next_states, not_done = minibatch

        next_q_values = self.online_model.predict_on_batch(next_states)
        best_actions = tf.argmax(next_q_values, axis=1)

        next_q_values_target = self.target_network.predict_on_batch(next_states)
        target_q_values = tf.gather_nd(next_q_values_target, tf.stack((self.idx, tf.cast(best_actions, tf.int32)), axis=1))

        targets = rewards + not_done * self.gamma * target_q_values

        q_values = self.online_network.predict_on_batch(states)
        q_values[[self.idx, actions]] = targets

        loss = self.online_network.train_on_batch(x=states, y=q_values)
        self.losses.append(loss)

        if self.total_steps % self.tau == 0:
            self.update_target()
    
    def memorize_transition(self, s, a, r, s_prime, not_done):
        if not_done:
            self.episode_reward += r
            self.episode_length += 1
        else:
            if self.train:
                if self.episodes < self.epsilon_decay_steps:
                    self.epsilon -= self.epsilon_decay
                else:
                    self.epsilon *= self.epsilon_exponential_decay

            self.episodes += 1
            self.rewards_history.append(self.episode_reward)
            self.steps_per_episode.append(self.episode_length)
            self.episode_reward, self.episode_length = 0, 0

        self.experience.append((s, a, r, s_prime, not_done))

        

In [67]:
# discount factor
gamma = .99
# update frequency between online model and target model
tau =100
# Adam learning reate
learning_rate=0.0001
 # L2 regularization using norm 2 euclidian distance
l2_reg = 1e-6
# size of the prioritized replay buffer
replay_capacity = int(1e6)
# batch size to fetch from replay buffer
batch_size=4096
# epsilon greedy policy parameters 
epsilon_start = 1.0
epsilon_end = .01
epsilon_decay_steps = 250
epsilon_exponential_decay = .99

In [68]:
tf.keras.backend.clear_session()

In [69]:
state_dimensions = env.observation_space.shape[0]
num_actions=env.action_space.n

In [70]:
trading_agent = TradingAgent(state_dimensions=state_dimensions,
                 num_actions=num_actions,
                 learning_rate=learning_rate,
                 gamma=gamma,
                 epsilon_start=epsilon_start,
                 epsilon_end=epsilon_end,
                 epsilon_decay_steps=epsilon_decay_steps,
                 epsilon_exponential_decay=epsilon_exponential_decay,
                 replay_capacity=replay_capacity,
                 l2_reg=l2_reg,
                 tau=tau,
                 batch_size=batch_size)

In [71]:
trading_agent.online_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense_1 (Dense)             (None, 265)               795       
                                                                 
 Dense_2 (Dense)             (None, 265)               70490     
                                                                 
 dropout (Dropout)           (None, 265)               0         
                                                                 
 Output (Dense)              (None, 3)                 798       
                                                                 
Total params: 72083 (281.57 KB)
Trainable params: 72083 (281.57 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [73]:
total_steps = 0
max_episodes = 1000

In [74]:
episode_time, navs, market_navs, diffs, episode_eps = [], [], [], [], []

In [75]:
def format_time(t):
    m_, s = divmod(t, 60)
    h, m = divmod(m_, 60)
    return '{:02.0f}:{:02.0f}:{:02.0f}'.format(h, m, s)

In [76]:
def track_results(episode, nav_ma_100, nav_ma_10,
                  market_nav_100, market_nav_10,
                  win_ratio, total, epsilon):
    time_ma = np.mean([episode_time[-100:]])
    T = np.sum(episode_time)
    
    template = '{:>4d} | {} | Agent: {:>6.1%} ({:>6.1%}) | '
    template += 'Market: {:>6.1%} ({:>6.1%}) | '
    template += 'Wins: {:>5.1%} | eps: {:>6.3f}'
    print(template.format(episode, format_time(total), 
                          nav_ma_100-1, nav_ma_10-1, 
                          market_nav_100-1, market_nav_10-1, 
                          win_ratio, epsilon))

In [78]:
start = time()
results = []
for episode in range(1, max_episodes + 1):
    this_state = env.reset()
    print(this_state)
    for episode_step in range(env.spec.max_episode_steps):
        action = trading_agent.epsilon_greedy_policy(this_state.reshape(-1, state_dimensions))
        next_state, reward, done, _ = env.step(action)
    
        trading_agent.memorize_transition(this_state, 
                                 action, 
                                 reward, 
                                 next_state, 
                                 0.0 if done else 1.0)
        if trading_agent.train:
            trading_agent.experience_replay()
        if done:
            break
        this_state = next_state

    # get DataFrame with seqence of actions, returns and nav values
    result = env.env.simulator.result()
    
    # get results of last step
    final = result.iloc[-1]

    # apply return (net of cost) of last action to last starting nav 
    nav = final.nav * (1 + final.strategy_return)
    navs.append(nav)

    # market nav 
    market_nav = final.market_nav
    market_navs.append(market_nav)

    # track difference between agent an market NAV results
    diff = nav - market_nav
    diffs.append(diff)
    
    if episode % 10 == 0:
        track_results(episode, 
                      # show mov. average results for 100 (10) periods
                      np.mean(navs[-100:]), 
                      np.mean(navs[-10:]), 
                      np.mean(market_navs[-100:]), 
                      np.mean(market_navs[-10:]), 
                      # share of agent wins, defined as higher ending nav
                      np.sum([s > 0 for s in diffs[-100:]])/min(len(diffs), 100), 
                      time() - start, trading_agent.epsilon)
    if len(diffs) > 25 and all([r > 0 for r in diffs[-25:]]):
        print(result.tail())
        break

env.close()

(array([0., 0.], dtype=float32), {'idx': 0, 'step': 0, 'date': numpy.datetime64('2020-01-01T00:00:00.000000000'), 'position_index': 1, 'position': 0, 'real_position': 0, 'data_volume': 511.814901, 'data_close': 7177.02, 'data_date_close': Timestamp('2020-01-01 01:00:00'), 'data_low': 7175.46, 'data_open': 7195.24, 'data_high': 7196.25, 'portfolio_valuation': 1000.0, 'portfolio_distribution_asset': 0, 'portfolio_distribution_fiat': 1000.0, 'portfolio_distribution_borrowed_asset': 0, 'portfolio_distribution_borrowed_fiat': 0, 'portfolio_distribution_interest_asset': 0, 'portfolio_distribution_interest_fiat': 0, 'reward': 0})


AttributeError: 'tuple' object has no attribute 'reshape'

In [64]:
print(env.env.)

AttributeError: 'TradingEnv' object has no attribute 'max_episode_steps'