In [1]:
from utils import in_notebook, utcToChi, getChiTimeNow
from Initial_Combination_Data import getCombDF
from Data_Preparation import locateConfigDir, processData


In [2]:
day_chg_incs = [1, 3, 5] # number of days to calculate for day_cols
minute_incs = [1, 5, 15, 30, 60] # number of minutes in the past to calculate for minute cols

In [3]:
# modeled after https://github.com/pskrunner14/trading-bot
import pandas as pd
import numpy as np

import os, pytz, datetime

from bs4 import BeautifulSoup as Soup

if in_notebook():
    from tqdm import tqdm_notebook as tqdm
else:
    from tqdm import tqdm


import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
omni_dir = '/home/andrew/All_Trading/Studies/Omni_Project/'
rl_dir = omni_dir + 'RL/'
# config_dir = omni_dir + 'Primary_Assets/US_Dollar_Index/Config1/'
tdm_dir = '/media/andrew/FreeAgent Drive/Market_Data/Tick_Data_Manager/'
regression_results_fn = '/media/andrew/FreeAgent Drive/Market_Data/Tick_Data_Manager/regression_results.csv'
data_summary_fn = tdm_dir+'data_summary.csv'
data_summaryDF = pd.read_csv(data_summary_fn)

# File Structure

# Determine which combinations of data to try


In [5]:
combDF = getCombDF(regression_results_fn, data_summary_fn)

Getting combDF using  /media/andrew/FreeAgent Drive/Market_Data/Tick_Data_Manager/regression_results.csv and /media/andrew/FreeAgent Drive/Market_Data/Tick_Data_Manager/data_summary.csv


  if (await self.run_code(code, result,  async_=asy)):


1 rows have R2 not found:

            Sec1         Sec2
66174  AETNA_INC  Natural_Gas
found 539 unique securities in combDF
[]
num_days < min_num_days_total! Skipping row for Natural_Gas
num_days < min_num_days_total! Skipping row for Coloplast_AS


<h1> Model Config </h1>

In [6]:
class ModelConfig:
    def __init__(self, input_size, model_array=None, model_str=None, actions_size=3):
        self.input_size = input_size # aka shape_size
        self.actions_size = actions_size
        self.model_array = model_array
        self.model_str = model_str
        
        if model_array is not None and model_str is not None:
            raise ValueError('Please supply only one: model_str or model_array')
        if model_array is not None:
            self.model_array = model_array
            self.model_str = self.getModelStr()
        elif model_str is not None:
            self.model_str = model_str
            self.model_array = self.getModelArray()
        else:
            raise ValueError('You have to supply either a model string or a model array')
            
        self.checkInputAndOutput()
        
        self.model = self.makeKerasModel()
        self.html_id_str = self.getHTMLIDStr()
    
    def getModelStr(self):
        if self.model_str is not None:
            return self.model_str
        return_str = ''
        for layer_dict in self.model_array:
            layer_str = 'not set yet'
            if layer_dict['layer'] == 'Dense':
                layer_str = 'Dense_'+str(layer_dict['units'])+'_'+layer_dict['activation'][:3]
            elif layer_dict['layer'] == 'Dropout':
                layer_str = 'Drop_'+str(layer_dict['rate'])
            else:
                raise ValueError(layer_dict['layer']+' not recognized.')
            return_str += '['+layer_str+']'
        return return_str
    
    def getModelArray(self):
        if self.model_array is not None:
            return self.model_array
        raise NotImplemented
        
    def makeKerasModel(self):
        model = Sequential()
        
        model.add(Dense(units=self.model_array[0]['units'], activation=self.model_array[0]['activation'], input_shape=(self.input_size,)))
        
        for layer_dict in self.model_array[1:]:
            if layer_dict['layer'] == 'Dense':
                model.add(Dense(units=layer_dict['units'], activation=layer_dict['activation']))
            elif layer_dict['layer'] == 'Dropout':
                model.add(Dropout(rate=layer_dict['rate']))
            else:
                raise ValueError(layer_dict['layer']+' not recognized.')
        return model
        
    def checkInputAndOutput(self):
        assert self.model_array[0]['layer'] == 'Dense'
        assert self.model_array[-1]['layer'] == 'Dense'
        assert self.model_array[-1]['activation'] in ['linear', 'softmax']
        assert self.model_array[-1]['units'] == self.actions_size
        
    def getHTMLIDStr(self):
        out_str = ''
        layer_reps = [i.split('[')[1] for i in self.model_str.split(']') if i!='']
        for l in layer_reps:
            out_str += l.replace('.', 'dp')+'___'
        return out_str


<h1> Data </h1>

In [7]:
class DataObj:
    def __init__(self, config_dir, 
                 train_close_pricesDF=None, trainDF=None, val_close_pricesDF=None, valDF=None,
                 load_val=True, nrows=None):
        """Data class. You have the option of loading all four dataframes to avoid reloading them."""
        self.train_close_pricesDF = None
        self.trainDF = None
        self.val_close_pricesDF = None
        self.valDF = None
        self.train_minutes = None
        self.val_minutes = None
        self.config_dir = config_dir
        self.load_val = load_val
        
        if (train_close_pricesDF is not None and trainDF is not None and 
                 val_close_pricesDF is not None and valDF is not None):
            self.train_close_pricesDF = train_close_pricesDF.copy()
            self.trainDF = trainDF.copy()
            self.val_close_pricesDF = val_close_pricesDF.copy()
            self.valDF = valDF.copy()
            if nrows is not None:
                print('presupplied data with nrows =',nrows,'... subsetting the DFs.')
                self.train_close_pricesDF = self.train_close_pricesDF[:nrows]
                self.trainDF = self.trainDF[:nrows]
                self.val_close_pricesDF = self.val_close_pricesDF[:nrows]
                self.valDF = self.valDF[:nrows]
                
        else:
            print('loading train_close_pricesDF')
            self.train_close_pricesDF = pd.read_csv(config_dir+'Postprocessed_Data/train_close_prices.csv', nrows=nrows)
            print('loading trainDF')
            self.trainDF = pd.read_csv(config_dir+'Postprocessed_Data/train_minutesDF.csv', nrows=nrows)
            print('load train complete')
            
            if load_val:
                print('loading val_close_pricesDF')
                self.val_close_pricesDF = pd.read_csv(config_dir+'Postprocessed_Data/val_close_prices.csv', nrows=nrows)
                print('loading valDF')
                self.valDF = pd.read_csv(config_dir+'Postprocessed_Data/val_minutesDF.csv', nrows=nrows)
                print('load val complete')
            
        self.splitMinutes()
        
    def getStateSize(self):
        return len(self.trainDF.columns)+1 #+1 for position
    
    def splitMinutes(self):
        print('splitting minutes')
        self.train_minutes = self.trainDF.Minute
        self.trainDF.drop(columns=['Minute'], inplace=True)
        assert all(self.train_close_pricesDF.Minute.values == self.train_minutes.values)
        self.train_close_pricesDF.drop(columns=['Minute'], inplace=True)
        
        if self.valDF is not None:
            self.val_minutes = self.valDF.Minute
            self.valDF.drop(columns=['Minute'], inplace=True)
            assert all(self.val_close_pricesDF.Minute.values == self.val_minutes.values)
            self.val_close_pricesDF.drop(columns=['Minute'], inplace=True)
            
        
        

<h1> Agent </h1>

In [39]:
#  agent.py
import random

from collections import deque

import numpy as np
import tensorflow as tf
import keras.backend as K

from keras.models import Sequential
from keras.models import clone_model
from keras.layers import Dense
from keras.optimizers import Adam


def huber_loss(y_true, y_pred, clip_delta=1.0):
    """Huber loss - Custom Loss Function for Q Learning
    
    Links: 	https://en.wikipedia.org/wiki/Huber_loss
            https://jaromiru.com/2017/05/27/on-using-huber-loss-in-deep-q-learning/
    """
    error = y_true - y_pred
    cond = K.abs(error) <= clip_delta
    squared_loss = 0.5 * K.square(error)
    quadratic_loss = 0.5 * K.square(clip_delta) + clip_delta * (K.abs(error) - clip_delta)
    return K.mean(tf.where(cond, squared_loss, quadratic_loss))


class Agent:
    """ Stock Trading Bot """
    def __init__(self, model_cfg, config_dir, strategy="t-dqn", reset_every=200, searchForPretrained=True, action_size=3):
        self.model_cfg = model_cfg
        self.config_dir = config_dir
        self.model_dir = self.config_dir + 'Deep_Models/' + self.model_cfg.model_str + '/'
        if not os.path.exists(self.model_dir):
            print(self.model_dir, 'not found. Creating it.')
            os.mkdir(self.model_dir)
            os.mkdir(self.model_dir+'Saved_Models/')
            os.mkdir(self.model_dir+'Trade_Logs/')
        
        self.strategy = strategy
        self.state_size = self.model_cfg.input_size
        self.action_size = action_size
        self.trade_log = None
        self.reset()
        self.memory = deque(maxlen=10000)
        self.first_iter = True
        
        self.position = 0
        
        self.gamma = 0.95 # affinity for long term reward
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.loss = huber_loss
#         self.custom_objects = {"huber_loss": huber_loss}  # important for loading the model from memory
        self.optimizer = Adam(lr=self.learning_rate)
    
        self.num_loss_updates = 0
        self.num_target_updates = 0
    
        
        self.episode = 0
        self.ts_seen = 1
        if searchForPretrained:
            print('searching for pretrained model')
            self.model = self.loadModelWeights()
        else:
            self.model = self.model_cfg.model
            
        self.model.compile(loss=self.loss, optimizer=self.optimizer)
        
        # strategy config
        if self.strategy in ["t-dqn", "double-dqn"]:
            self.reset_every = reset_every
            
            # target network
            self.target_model = clone_model(self.model)
            self.target_model.set_weights(self.model.get_weights())
    
    def remember(self, state, action, reward, next_state, done):
        """Adds relevant data to memory
        """
        self.memory.append((state, action, reward, next_state, done))
        self.ts_seen += 1
    
    def act(self, state, is_eval=False):
        """Take action from given possible set of actions
        """
        # take random action in order to diversify experience at the beginning
        if not is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        
        if self.first_iter:
            self.first_iter = False
            return 1 # make a definite buy on the first iter
        
        action_probs = self.model.predict(state)
        return np.argmax(action_probs[0])
    
    def getQValues(self, state):
        q_values = self.model.predict(state)
        return q_values
    
    def train_experience_replay(self, batch_size):
        """Train on previous experiences in memory
        """
        sample_dist = np.array([i for i in range(len(self.memory))])
        sample_dist = sample_dist/sum(sample_dist)
        
        mini_batch_idx = np.random.choice(range(len(self.memory)), batch_size, p=sample_dist)
        mini_batch = [self.memory[i] for i in mini_batch_idx]
        X_train, y_train = [], []
        
        # DQN
        if self.strategy == "dqn":
            for state, action, reward, next_state, done in mini_batch:
                if done:
                    target = reward
                else:
                    # approximate deep q-learning equation
                    target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
                
                # estimate q-values based on current state
                q_values = self.model.predict(state)
                # update the target for current action based on discounted reward
                q_values[0][action] = target
                
                X_train.append(state[0])
                y_train.append(q_values[0])
        
        # DQN with fixed targets
        elif self.strategy == "t-dqn":
#             if self.ts_seen % self.reset_every == 0:
            if self.ts_seen % self.reset_every < batch_size: # this is valid because now we're only training every batch_size timesteps
                # reset target model weights
                self.target_model.set_weights(self.model.get_weights())
                self.num_target_updates += 1
            
            for state, action, reward, next_state, done in mini_batch:
                if done:
                    target = reward
                else:
                    # approximate deep q-learning equation with fixed targets
                    target = reward + self.gamma * np.amax(self.target_model.predict(next_state)[0])
                
                # estimate q-values based on current state
                q_values = self.model.predict(state)
                # update the target for current action based on discounted reward
                q_values[0][action] = target
                
                X_train.append(state[0])
                y_train.append(q_values[0])
        
        # Double DQN
        elif self.strategy == "double-dqn":
#             if self.ts_seen % self.reset_every == 0:
            if self.ts_seen % self.reset_every < batch_size: # this is valid because now we're only training every batch_size timesteps
                # reset target model weights
                self.target_model.set_weights(self.model.get_weights())
                self.num_target_updates += 1
                
            for state, action, reward, next_state, done in mini_batch:
                if done:
                    target = reward
                else:
                    # approximate double deep q-learning equation
                    target = reward + self.gamma * self.target_model.predict(next_state)[0][np.argmax(self.model.predict(next_state)[0])]
                
                # estimate q-values based on current state
                q_values = self.model.predict(state)
                # update the target for current action based on discounted reward
                q_values[0][action] = target
                
                X_train.append(state[0])
                y_train.append(q_values[0])
                
        else:
            raise NotImplementedError()
        
        # update q-function parameters based on huber loss gradient
        loss = self.model.fit(
            np.array(X_train), np.array(y_train),
            epochs=1, verbose=0
        ).history["loss"][0]
        
        # as the training goes on we want the agent to
        # make less random and more optimal decisions
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
        self.num_loss_updates += 1
        
        return loss
    
    def saveTradeLog(self, sess_type='train'):
        # if there's already a file there, move it
        fn = self.model_dir+'Trade_Logs/' + str(self.episode) + '.csv'
        if sess_type != 'train':
            fn = self.model_dir+'Trade_Logs/' + sess_type + str(self.episode) + '.csv'
        if os.path.exists(fn):
            print(fn, 'already exists! Moving to Previous/')
            prev_dir = self.model_dir+'Trade_Logs/Previous/'
            if not os.path.exists(prev_dir):
                os.mkdir(prev_dir)
            os.replace(fn, prev_dir+str(self.episode) + str(getChiTimeNow())[:10] + '.csv')
        self.trade_log.to_csv(fn, index=False)
    
    def saveModelWeights(self):
        # if there's already a file there, move it
        fn = self.model_dir+'Saved_Models/'+str(self.episode)+'.h5'
        if os.path.exists(fn):
            print(fn, 'already exists! Moving to Previous/')
            prev_dir = self.model_dir+'Saved_Models/Previous/'
            if not os.path.exists(prev_dir):
                os.mkdir(prev_dir)
            os.replace(fn, prev_dir+str(self.episode) + str(getChiTimeNow())[:10] + '.h5')
        self.model.save_weights(fn)
        
    
    def loadModelWeights(self):
        saved_models_dir = self.model_dir + 'Saved_Models/'
        saved_models = os.listdir(saved_models_dir)
        model = self.model_cfg.model
        if len(saved_models) == 0:
            print('No models saved. Starting from episode 0 with a new model.')
        else:
            episodes_saved = [int(i.split('.')[0]) for i in saved_models if i.split('.')[0].isdigit()]
            print('Found models. Loading episode', max(episodes_saved))
            model.load_weights(saved_models_dir+str(max(episodes_saved))+'.h5')
            self.episode = max(episodes_saved) +1
        return model
    
    def reset(self):
        self.trade_log = pd.DataFrame(columns = ['Minute', 'DesiredAction', 'ActualAction', 'NewPosition', 'C_B', 'C_A'])
        self.position = 0
        self.num_loss_updates = 0
        self.num_target_updates = 0
        self.ts_seen = 1
    
    def updateTradeLog(self, minute, action, actual_action, new_position, close_prices):
        self.trade_log = self.trade_log.append({'Minute': minute, 
                                                'DesiredAction': action, 
                                                'ActualAction': actual_action,
                                                'NewPosition': new_position, 
                                                'C_B': close_prices.C_B1, 
                                                'C_A': close_prices.C_A1}, ignore_index=True)
    def getNumTrades(self):
        return(int((self.trade_log.ActualAction != 0).sum()))
    
    def viewResults(self):
        print('viewing results')
        # construct pnl
        full_logDF = self.trade_log.copy()
        full_logDF['Midpt'] = (full_logDF.C_B + full_logDF.C_A)/2
        full_logDF['DayPNL'] = (full_logDF.ActualAction!=0)*(full_logDF.C_B - full_logDF.Midpt) #it's always negative half the bid-ask spread
        full_logDF['OpenPNL'] = full_logDF.NewPosition.shift() * (full_logDF.Midpt - full_logDF.Midpt.shift())
        full_logDF.OpenPNL.loc[0] = 0
        full_logDF['TotalPNL'] = full_logDF.DayPNL + full_logDF.OpenPNL
        full_logDF['CumPNL'] = full_logDF.TotalPNL.cumsum()
        # [x] graph (from http://kitchingroup.cheme.cmu.edu/blog/2013/09/13/Plotting-two-datasets-with-very-different-scales/)
        buys = full_logDF.loc[full_logDF.ActualAction == 1][['Minute', 'C_A']]
        sells = full_logDF.loc[full_logDF.ActualAction == 2][['Minute', 'C_B']]
        print('viewing figure')
        
        fig = plt.figure()
        ax1 = fig.add_subplot(111)
        ax1.plot(full_logDF.Minute, full_logDF.Midpt, 'k-', linewidth=0.5)
        ax1.plot(buys.Minute, buys.C_A, 'g+')
        ax1.plot(sells.Minute, sells.C_B, 'r+')
        ax1.set_ylabel('Price')
        
        ax2 = ax1.twinx()
        ax2.plot(full_logDF.Minute, full_logDF.CumPNL, 'b-')
        ax2.set_ylabel('PNL')
        
        plt.show()
        
        # [] check that the total PNL sum is close
        

In [40]:
# agent = Agent(model_cfg, config_dir)

In [41]:
# ops.py
# import os
import math
import logging

import numpy as np

def sigmoid(x):
    """Performs sigmoid operation
    """
    try:
        if x < 0:
            return 1 - 1 / (1 + math.exp(x))
        return 1 / (1 + math.exp(-x))
    except Exception as err:
        print("Error in sigmoid: " + err)

def get_state(agent, dataObj, t):
    """Returns the position and the t-th row of the data"""
    ret_array = [agent.position] + list(dataObj.trainDF.iloc[t].values)
    ret_array = np.reshape(ret_array, len(ret_array))
    return(np.array([ret_array]))


In [42]:
# utils.py

# import os
# import math
# import logging

# import pandas as pd
# import numpy as np

# import keras.backend as K


# Formats Position
format_position = lambda price: ('-$' if price < 0 else '+$') + '{0:.2f}'.format(abs(price))


# Formats Currency
format_currency = lambda price: '${0:.2f}'.format(abs(price))


def show_train_result(result, val_position, initial_offset):
    """ Displays training results
    """
    if val_position == initial_offset or val_position == 0.0:
        logging.info('Episode {}/{} - Train Position: {}  Val Position: USELESS  Train Loss: {:.4f}'
                     .format(result[0], result[1], format_position(result[2]), result[3]))
    else:
        logging.info('Episode {}/{} - Train Position: {}  Val Position: {}  Train Loss: {:.4f})'
                     .format(result[0], result[1], format_position(result[2]), format_position(val_position), result[3],))


def show_eval_result(model_name, profit, initial_offset):
    """ Displays eval results
    """
    if profit == initial_offset or profit == 0.0:
        logging.info('{}: USELESS\n'.format(model_name))
    else:
        logging.info('{}: {}\n'.format(model_name, format_position(profit)))


# def get_stock_data(stock_file):
#     """Reads stock data from csv file
#     """
#     df = pd.read_csv(stock_file)
#     return list(df['Adj Close'])


def switch_k_backend_device():
    """ Switches `keras` backend from GPU to CPU if required.
    Faster computation on CPU (if using tensorflow-gpu).
    """
    if K.backend() == "tensorflow":
        logging.debug("switching to TensorFlow for CPU")
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [43]:
def calcOpenPnl(agent, dataObj, t):
    last_t_midpt = sum(dataObj.train_close_pricesDF[['C_B1', 'C_A1']].iloc[t-1].values)/2
    t_midpt = sum(dataObj.train_close_pricesDF[['C_B1', 'C_A1']].iloc[t].values)/2
    return (agent.position * (t_midpt - last_t_midpt))

def evaluateAction(action, agent, dataObj, t):
    """returns day_pnl and updates agent's position and trade_log"""
    close_prices = dataObj.train_close_pricesDF[['C_B1', 'C_A1']].iloc[t]
    t_midpt = sum(close_prices.values)/2
    minute = dataObj.train_minutes.iloc[t]
    
    actual_action = np.nan
    new_position = agent.position
    day_pnl = 0
    
    # BUY
    if action == 1:
        if agent.position != 1: #actually take the action
            actual_action = 1
            day_pnl = t_midpt - close_prices['C_A1']
            new_position = agent.position + 1
        else:
            actual_action = 0
            
    # SELL
    elif action == 2:
        if agent.position != -1: #actually take the action
            actual_action = 2
            day_pnl = close_prices['C_B1'] - t_midpt
            new_position = agent.position - 1
        else:
            actual_action = 0
    
    # HOLD
    elif action == 0:
        actual_action = 0
    
    else:
        raise ValueError('Action '+str(action)+' not recognized.')
    
    agent.position = new_position
    agent.updateTradeLog(minute, action, actual_action, new_position, close_prices)
    return day_pnl

In [60]:
def getMaxDraw(pnl_series, direction):
    direction = direction.lower()
    assert direction in ['down', 'up'], 'invalid direction: '+str(direction)
    cum_pnl = pnl_series.cumsum()
    result = 0
    if direction == 'down':
        highest_peak = 0
        for c in cum_pnl.values:
            result = min(result, c-highest_peak)
            highest_peak = max(highest_peak, c)
    if direction == 'up':
        lowest_trough = 0
        for c in cum_pnl.values:
            result = max(result, c-lowest_trough)
            lowest_trough = min(lowest_trough, c)
    return result

def calcSharpe(trade_log):
    daily_log = trade_log.groupby(trade_log.Minute.str[:10]).agg({'TotalPNL': 'sum'})
    trading_days = 252
    return np.sqrt(trading_days)*(daily_log.TotalPNL.mean()/daily_log.TotalPNL.std())

def getTradeLogStats(agent):
    trade_log = agent.trade_log.copy()
    trade_log['Midpt'] = (trade_log.C_B + trade_log.C_A)/2
    trade_log['DayPNL'] = (trade_log.ActualAction!=0)*(trade_log.C_B - trade_log.Midpt) #it's always negative half the bid-ask spread
    trade_log['OpenPNL'] = trade_log.NewPosition.shift() * (trade_log.Midpt - trade_log.Midpt.shift())
    trade_log.OpenPNL.loc[0] = 0
    trade_log['TotalPNL'] = trade_log.DayPNL + trade_log.OpenPNL
    
    pnl = trade_log.TotalPNL.sum()
    num_days = len(trade_log.Minute.str[:10].unique())
    pnl_per_day = pnl/num_days
    sharpe = calcSharpe(trade_log)
    num_trades = (trade_log.ActualAction!=0).sum()
    drawdown = getMaxDraw(trade_log.TotalPNL, 'down')
    drawup = getMaxDraw(trade_log.TotalPNL, 'up')
    ts_seen = len(trade_log)
    pct_flat = (trade_log.NewPosition == 0).sum()/ts_seen
    pct_long = (trade_log.NewPosition == 1).sum()/ts_seen
    pct_short = (trade_log.NewPosition == -1).sum()/ts_seen
    data_start = trade_log.Minute.iloc[0][:10]
    data_end = trade_log.Minute.iloc[-1][:10]
    
    return pnl, pnl_per_day, sharpe, num_trades, drawdown, drawup, ts_seen, pct_flat, pct_long, pct_short, data_start, data_end

def getSprCrossInfo(agent, dataObj):   
        
    trade_log = agent.trade_log.copy()
    assert trade_log.Minute.iloc[-1] in dataObj.train_minutes.values or trade_log.Minute.iloc[0] in dataObj.val_minutes.values, trade_log.Minute
    train_or_val = 'train'
    
    dataDF = dataObj.trainDF
    minutes = dataObj.train_minutes
    close_pricesDF = dataObj.train_close_pricesDF
    
    if trade_log.Minute.iloc[0] in dataObj.val_minutes.values:
        train_or_val = 'val'
        dataDF = dataObj.valDF
        minutesDF = dataObj.val_minutes
        close_pricesDF = dataObj.val_close_pricesDF
        
    # get large spread threshold
    avg_spr = (close_pricesDF.C_A1 - close_pricesDF.C_B1).mean()
    spr_quantile = (close_pricesDF.C_A1 - close_pricesDF.C_B1).quantile(q=.8)
    lrg_spr_threshold = max(avg_spr*5, spr_quantile)
    
    # get stats
    lrg_spr_trade_log = trade_log.loc[(trade_log.C_A - trade_log.C_B) > lrg_spr_threshold]
    if len(lrg_spr_trade_log) == 0:
        return np.nan, np.nan, np.nan
    
    lrg_spr_minutes = lrg_spr_trade_log.Minute
    lrg_spr_cross_pct = (lrg_spr_trade_log.ActualAction != 0).sum()/len(lrg_spr_trade_log)
    
    t_idx = minutes.loc[minutes.isin(lrg_spr_minutes)].index
    q_value_array = [agent.getQValues(get_state(agent, dataObj, t)) for t in t_idx]
    lrg_spr_cross_likelihood = np.mean([(q[0][0] != max(q[0]))  for q in q_value_array]) #needs q[0] because output is wrapped in unnecessary list
    lrg_spr_hold_preference = np.mean([q[0][0]-max(q[0][1:]) for q in q_value_array])
    
    return lrg_spr_cross_pct, lrg_spr_cross_likelihood, lrg_spr_hold_preference

def saveProgressSummary(agent, dataObj, batch_size, epsilon_start, epsilon_end, avg_loss_array, session_type, start_time, end_time):
    pnl, pnl_per_day, sharpe, num_trades, drawdown, drawup, \
        ts_seen, pct_flat, pct_long, pct_short, data_start, data_end = getTradeLogStats(agent)
    
    lrg_spr_cross_pct, lrg_spr_cross_likelihood, lrg_spr_hold_preference = getSprCrossInfo(agent, dataObj)
    
    new_row_dict = {'Epi': agent.episode,
                    'SessType': session_type,
                    'Strat': agent.strategy, 
                    'PNL': round(pnl, 2),
                    'PNLpDay': round(pnl_per_day, 2),
                    'Sharpe': round(sharpe, 3),
                    'AvgLoss': round(np.mean(avg_loss_array), 5),
                    'NumTrades': num_trades,
                    'DrawDown': round(drawdown, 2),
                    'DrawUp': round(drawup, 2),
                    'LrgSprPct': round(lrg_spr_cross_pct, 3),
                    'LrgSprLikely': round(lrg_spr_cross_likelihood, 4),
                    'LrgSprPref': round(lrg_spr_hold_preference, 5),
                    'TimeTakenMin': round((end_time-start_time).seconds/60),
                    'TsSeen': ts_seen,
                    'PctFlat': round(pct_flat, 2),
                    'PctLong': round(pct_long, 2),
                    'PctShort': round(pct_short, 2),
                    'DataStart': data_start,
                    'DataEnd': data_end,
                    'LossUpd': agent.num_loss_updates,
                    'TargetUpd': agent.num_target_updates,
                    'Gamma': agent.gamma,
                    'EpsilonStart': round(epsilon_start, 2),
                    'EpsilonEnd': round(epsilon_end, 2),
                    'EpsilonDecay': round(agent.epsilon_decay, 3),
                    'ResetEvery': agent.reset_every,
                    'LossType': str(agent.loss).split('function ')[1].split(' ')[0].split('_loss')[0],
                    'LossParam': np.nan, #maybe should actually do this later
                    'OptType': str(agent.model.optimizer.__class__).split('.')[-1].replace('\'', '').replace('>', ''), #https://stackoverflow.com/questions/49785536/get-learning-rate-of-keras-model
                    'OptLR': K.eval(agent.model.optimizer.lr),
                    'StartTime': start_time,
                    'EndTime': end_time
                   }
    
    col_order = ['Epi', 'SessType', 'Strat', 
                'PNL', 'PNLpDay', 'Sharpe', 'AvgLoss', 'NumTrades',
                'DrawDown', 'DrawUp', 'LrgSprPct', 'LrgSprLikely', 'LrgSprPref',
                'TimeTakenMin', 'TsSeen', 'PctFlat', 'PctLong', 'PctShort',
                'DataStart', 'DataEnd', 'LossUpd', 'TargetUpd',
                'Gamma', 'EpsilonStart', 'EpsilonEnd', 'EpsilonDecay', 'ResetEvery', 
                'LossType', 'LossParam', 'OptType', 'OptLR', 'StartTime', 'EndTime']
    assert(set(col_order) == set(new_row_dict.keys()))
    
    progress_summary_fn = agent.model_dir+'progress_summary.csv'
    progress_summaryDF = pd.DataFrame(new_row_dict, index=[0])
    if os.path.exists(progress_summary_fn):
        progress_summaryDF = pd.read_csv(progress_summary_fn)
        progress_summaryDF = progress_summaryDF.append(new_row_dict, ignore_index=True)
    progress_summaryDF[col_order].to_csv(progress_summary_fn, index=False)
    
    all_progress_summary_fn = agent.config_dir+'Deep_Models/all_progress_summary.csv'
    col_order = ['Model'] + col_order
    new_row_dict['Model'] = agent.model_cfg.model_str
    
    all_progress_summaryDF = pd.DataFrame(new_row_dict, index=[0])
    
    if os.path.exists(all_progress_summary_fn):
        all_progress_summaryDF = pd.read_csv(all_progress_summary_fn)
        all_progress_summaryDF = all_progress_summaryDF.append(new_row_dict, ignore_index=True)
    all_progress_summaryDF[col_order].to_csv(all_progress_summary_fn, index=False)
    print_keys = ['PNL', 'PNLpDay', 'Sharpe', 'AvgLoss', 'NumTrades', 'DrawDown', 'DrawUp', 'LrgSprPct', 'LrgSprLikely', 'LrgSprPref', 'TimeTakenMin', 'TsSeen', 'PctFlat', 'PctLong', 'PctShort', 'LossUpd', 'TargetUpd', 'EpsilonStart', 'EpsilonEnd']
    print('\t'.join([key+': '+str(new_row_dict[key]) for key in print_keys]))
    
    

In [45]:
# methods.py
# import os
# import logging

# import numpy as np
# from tqdm import tqdm


def endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, sess_type, start_time):
    agent.saveTradeLog(sess_type)
    
#         if agent.episode % 10 == 0:
    if agent.episode % 1 == 0 and sess_type == 'train':
        agent.saveModelWeights()
    
    epsilon_end = agent.epsilon if sess_type == 'train' else np.nan
    end_time = getChiTimeNow()
    
    saveProgressSummary(agent, dataObj, batch_size, epsilon_start, epsilon_end, avg_loss_array, \
                        sess_type, start_time, end_time)
    agent.episode += 1

def train_model(agent, dataObj, ep_count=20, batch_size=32):
    start_ep = agent.episode+0
    end_ep = start_ep + ep_count
    for i in range(ep_count):
        start_time = getChiTimeNow()
        epsilon_start = agent.epsilon
        total_profit = 0
        data_length = len(dataObj.trainDF) - 1
        avg_loss_array = []
        agent.reset()
        
        state = get_state(agent, dataObj, 0)
        try:
            for t in tqdm(range(data_length), total=data_length, leave=True, desc='Episode {}/{}'.format(agent.episode, end_ep)):        
                reward = 0
                
                # select an action
                action = agent.act(state)
                
                open_pnl = calcOpenPnl(agent, dataObj, t+1)
                day_pnl = evaluateAction(action, agent, dataObj, t+1)
                reward = open_pnl + day_pnl
                total_profit += reward
                
                next_state = get_state(agent, dataObj, t+1)
                done = (t == data_length - 1)
                agent.remember(state, action, reward, next_state, done)
                
                if len(agent.memory) > batch_size:
                    # train every batch_size
                    if t % batch_size == 0:
                        loss = agent.train_experience_replay(batch_size)
                        avg_loss_array.append(loss)
                
                state = next_state
                
            endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, \
                                'train', start_time)
        except (KeyboardInterrupt, SystemExit):
            print('KeyboardInterrupt or SystemExit. Ending current episode.')
            endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, \
                                'train', start_time)
            raise
        except:
            print('Unknown error...Ending current episode.')
            endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, \
                                'train', start_time)
            raise

def evaluate_model(agent, dataObj, debug=False, batch_size=32):
    print('Evaluating Model')
    start_time = getChiTimeNow()
    epsilon_start = np.nan
    total_profit = 0
    data_length = len(dataObj.trainDF) - 1
    avg_loss_array = []
    agent.reset()
    
    state = get_state(agent, dataObj, 0)
    
    try:
        for t in tqdm(range(data_length)):
            reward = 0
            # select an action
            action = agent.act(state, is_eval=True)
            
            open_pnl = calcOpenPnl(agent, dataObj, t+1)
            day_pnl = evaluateAction(action, agent, dataObj, t+1)
            reward = open_pnl + day_pnl
            total_profit += reward
            
            next_state = get_state(agent, dataObj, t+1)
            
            done = (t == data_length - 1)
            
    #         agent.memory.append((state, action, reward, next_state, done)) # don't know why this line was here instead of the below
            agent.remember(state, action, reward, next_state, done)
            
            state = next_state
        endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, \
                   'eval', start_time)
    except (KeyboardInterrupt, SystemExit):
        print('KeyboardInterrupt or SystemExit. Ending current episode.')
        endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, \
                            'eval', start_time)
        raise
    except:
        print('Unknown error...Ending current episode.')
        endEpisode(agent, dataObj, batch_size, epsilon_start, avg_loss_array, \
                            'eval', start_time)
        raise

In [46]:
# agent.episode = 0
# dataObj = DataObj(config_dir, nrows=20000)

In [47]:
# train_model(agent, dataObj)

In [48]:
# [] save results to summary_sheet.html
# [x] determine when to start and when to stop training each model
# [] convert chg columns to EMA
# [] move it all to Ext hard drive
# [x] why don't we have any target updates?
# [NA] why is LrgSprLikely likely the converse? It is (possibly) sadly likely correct
# [x] print some statistics in endEpisode

# Train

In [49]:
action_size = 3 # HOLD, BUY, SELL

keras_models = [
    [{'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}],
    
    [{'layer': 'Dense', 'activation': 'relu', 'units': 256},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}],
    
    [{'layer': 'Dense', 'activation': 'relu', 'units': 128},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}],
    
    [{'layer': 'Dense', 'activation': 'relu', 'units': 256},
     {'layer': 'Dense', 'activation': 'relu', 'units': 128},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}],
    
    [{'layer': 'Dense', 'activation': 'relu', 'units': 256},
     {'layer': 'Dense', 'activation': 'relu', 'units': 128},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}],

    [{'layer': 'Dense', 'activation': 'relu', 'units': 256},
     {'layer': 'Dense', 'activation': 'relu', 'units': 128},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}],
    
    [{'layer': 'Dense', 'activation': 'relu', 'units': 256},
     {'layer': 'Dense', 'activation': 'relu', 'units': 128},
     {'layer': 'Dropout', 'rate': 0.2},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'relu', 'units': 64},
     {'layer': 'Dense', 'activation': 'linear', 'units': action_size}]
]

In [50]:
# sec = 'TYSON_FOODS_INC-CL_A'
# comb_subDF = combDF.loc[combDF.Sec1 == sec].reset_index(drop=True)
# comb_row = comb_subDF.iloc[0]
# config_dir = locateConfigDir(comb_row, omni_dir, day_chg_incs, minute_incs)

In [51]:
# train_close_pricesDF, train_minutesDF, val_close_pricesDF, val_minutesDF = \
#             processData(comb_row, config_dir, day_chg_incs, minute_incs)

In [52]:
# dataObj = DataObj(config_dir, train_close_pricesDF, train_minutesDF, val_close_pricesDF, val_minutesDF)

In [53]:
# model_array = keras_models[0]
# model_cfg = ModelConfig(dataObj.getStateSize(), model_array=model_array)
# model_cfg.model_str

In [54]:
def modelHasPromise(model_str, config_dir, exhaustiveness_level):
    print('Determining if model has promise:',model_str)
    all_progress_summary_fn = config_dir + 'Deep_Models/all_progress_summary.csv'
    if not os.path.exists(all_progress_summary_fn):
        print('all_progress_summary doesnt exist. Still has promise.')
        return True
    
    all_progress_summaryDF = pd.read_csv(all_progress_summary_fn)
    progress_subDF = all_progress_summaryDF.loc[all_progress_summaryDF.Model == model_str]
    progress_subDF = progress_subDF.loc[progress_subDF.SessType == 'train']
    progress_subDF = progress_subDF.loc[progress_subDF.TsSeen >= 10000]
    progress_subDF.reset_index(inplace=True, drop=True)
    if len(progress_subDF) < 10:
        print("We've only had", len(progress_subDF), 'sufficient episodes. Still has promise.')
        return True
    
    min_loss_updates = 10000*exhaustiveness_level
    if progress_subDF.LossUpd.sum() < min_loss_updates:
        print('We need', min_loss_updates-progress_subDF.LossUpd.sum(),'/', min_loss_updates, 'more loss updates for this exhaustiveness level.')
        return True
    
    min_target_updates = round(min_loss_updates*.7)
    if progress_subDF.TargetUpd.sum() < min_target_updates:
        print('We need', min_target_updates-progress_subDF.TargetUpd.sum(),'/', min_target_updates, 'more target updates for this exhaustiveness level.')
        return True
    
    # check if the trajectory is improving. Make sure to divide loss and pnl ts_seen
    loss_tolerance_pct = .02*exhaustiveness_level
    pnl_tolerance_pct = .02*exhaustiveness_level
    
    # check if the best of the past 3 trials is at least <tolerance> better than the avg of (t-5, t-10)
    best_recent_loss = (progress_subDF.iloc[-3:].AvgLoss/progress_subDF.iloc[-3:].TsSeen).min()
    best_recent_pnl = (progress_subDF.iloc[-3:].PNL/progress_subDF.iloc[-3:].TsSeen).max()
    avg_prev_loss = (progress_subDF.iloc[-10:-5].AvgLoss/progress_subDF.iloc[-10:-5].TsSeen).mean()
    avg_prev_pnl = (progress_subDF.iloc[-10:-5].PNL/progress_subDF.iloc[-10:-5].TsSeen).mean()
    loss_improvement = (avg_prev_loss - best_recent_loss)/avg_prev_loss
    pnl_improvement = (best_recent_pnl - avg_prev_pnl)/avg_prev_pnl
    if pnl_improvement < pnl_tolerance_pct:
        print('PNL improvement is only ',round(pnl_improvement*100, 3),'%. Does not qualify as promising.')
        return False
    
    if loss_improvement < loss_tolerance_pct:
        print('Loss improvement is only ',round(loss_improvement*100, 3),'%. Does not qualify as promising.')
        return False
    print('PNL and Loss Improvement checks passed with values', round(pnl_improvement*100, 3), 
          '% and', round(loss_improvement*100, 3),'%. Still has promise.')
    return True
    

In [55]:
def autoTrain(sec, combDF=combDF, nrows=None, exhaustiveness_level=2, keras_models=keras_models, omni_dir=omni_dir, 
              day_chg_incs=day_chg_incs, minute_incs=minute_incs, email_progress=False):
    """
    1. Locates or prepares config_dir
    2. Locates or prepares postprepared data
    3. Check model progress. If model doesn't look promising, move to next model.
            If model still has promise, keep training it
    4. Optionally emails results
    """
    assert exhaustiveness_level in [1,2,3,4,5]
    
    comb_row = combDF.loc[combDF.Sec1 == sec].iloc[0]
    config_dir = locateConfigDir(comb_row, omni_dir, day_chg_incs, minute_incs)
    train_close_pricesDF, train_minutesDF, val_close_pricesDF, val_minutesDF = \
            processData(comb_row, config_dir, day_chg_incs, minute_incs)
    dataObj = DataObj(config_dir, train_close_pricesDF, train_minutesDF, val_close_pricesDF, val_minutesDF, nrows=nrows)
    # [x] Check model progress
    for model_array in keras_models:
        model_cfg = ModelConfig(dataObj.getStateSize(), model_array=model_array)
        agent = Agent(model_cfg, config_dir)
        model_has_promise = modelHasPromise(model_cfg.model_str, config_dir, exhaustiveness_level)
        while model_has_promise: 
            for i in range(5):
                train_model(agent, dataObj, ep_count=5)
                evaluate_model(agent, dataObj)
            model_has_promise = modelHasPromise(model_cfg.model_str, config_dir, exhaustiveness_level)
        else:
            print('Model does not have promise. Moving on.')
        

In [56]:
# sec = 'US_Dollar_Index'
sec = 'TYSON_FOODS_INC-CL_A'

In [None]:
autoTrain(sec, nrows=15000)

Existing matching config dir found: /home/andrew/All_Trading/Studies/Omni_Project/Primary_Assets/TYSON_FOODS_INC-CL_A/Config1
All Postprocessed_Data files exist. Checking columns
Columns passed the check. Loading postprocessed data.
loading train_close_pricesDF
loading train_minutesDF
loading val_close_pricesDF
loading val_minutesDF
loading complete.
presupplied data with nrows = 15000 ... subsetting the DFs.
splitting minutes
searching for pretrained model
Found models. Loading episode 61
Determining if model has promise: [Dense_64_rel][Dense_3_lin]
We need 3541 more loss updates for this exhaustiveness level.


HBox(children=(IntProgress(value=0, description='Episode 62/67', max=14999, style=ProgressStyle(description_wi…


PNL: -84.71	PNLpDay: -2.17	Sharpe: -21.969	AvgLoss: 0.32373	NumTrades: 3983	DrawDown: -87.69	DrawUp: 3.2	LrgSprPct: 0.406	LrgSprLikely: 0.971	LrgSprPref: -0.55951	TimeTakenMin: 2	TsSeen: 14999	PctFlat: 0.25	PctLong: 0.42	PctShort: 0.33	LossUpd: 468	TargetUpd: 74	EpsilonStart: 1.0	EpsilonEnd: 0.1


HBox(children=(IntProgress(value=0, description='Episode 63/67', max=14999, style=ProgressStyle(description_wi…


PNL: -27.31	PNLpDay: -0.7	Sharpe: -8.527	AvgLoss: 0.15948	NumTrades: 1321	DrawDown: -35.15	DrawUp: 4.69	LrgSprPct: 0.319	LrgSprLikely: 0.3768	LrgSprPref: 0.15974	TimeTakenMin: 3	TsSeen: 14999	PctFlat: 0.1	PctLong: 0.52	PctShort: 0.37	LossUpd: 469	TargetUpd: 75	EpsilonStart: 0.1	EpsilonEnd: 0.01


HBox(children=(IntProgress(value=0, description='Episode 64/67', max=14999, style=ProgressStyle(description_wi…


PNL: -18.56	PNLpDay: -0.48	Sharpe: -5.892	AvgLoss: 0.1829	NumTrades: 979	DrawDown: -27.26	DrawUp: 7.51	LrgSprPct: 0.261	LrgSprLikely: 1.0	LrgSprPref: -0.50929	TimeTakenMin: 2	TsSeen: 14999	PctFlat: 0.07	PctLong: 0.55	PctShort: 0.39	LossUpd: 469	TargetUpd: 75	EpsilonStart: 0.01	EpsilonEnd: 0.01


HBox(children=(IntProgress(value=0, description='Episode 65/67', max=14999, style=ProgressStyle(description_wi…


PNL: -17.88	PNLpDay: -0.46	Sharpe: -7.856	AvgLoss: 0.15196	NumTrades: 1053	DrawDown: -20.53	DrawUp: 3.11	LrgSprPct: 0.29	LrgSprLikely: 0.3188	LrgSprPref: 0.07689	TimeTakenMin: 2	TsSeen: 14999	PctFlat: 0.08	PctLong: 0.45	PctShort: 0.46	LossUpd: 469	TargetUpd: 75	EpsilonStart: 0.01	EpsilonEnd: 0.01


HBox(children=(IntProgress(value=0, description='Episode 66/67', max=14999, style=ProgressStyle(description_wi…


PNL: -9.11	PNLpDay: -0.23	Sharpe: -3.516	AvgLoss: 0.14946	NumTrades: 767	DrawDown: -15.66	DrawUp: 6.9	LrgSprPct: 0.232	LrgSprLikely: 0.5507	LrgSprPref: -0.07032	TimeTakenMin: 3	TsSeen: 14999	PctFlat: 0.06	PctLong: 0.53	PctShort: 0.41	LossUpd: 469	TargetUpd: 75	EpsilonStart: 0.01	EpsilonEnd: 0.01
Evaluating Model


HBox(children=(IntProgress(value=0, max=14999), HTML(value='')))

In [None]:
autoTrain(sec, nrows=15000*3)