In [None]:
# This allows importing Jupyter notebooks as modules
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import JupyterNotebookImporter

In [None]:
# GENERAL IMPORTS

import pandas as pd
import numpy as np
import scipy as sp
import regex as re

import plotly.offline as py
import plotly.graph_objs as go
import plotly
plotly.offline.init_notebook_mode(connected=True)

import ipywidgets

import pprint
import random
import datetime
import math
import bisect
import time
from os import listdir
from os.path import isfile, join

from io import StringIO
import dis
import hashlib

from IPython.display import clear_output
from AlgoPlotting import XYChart
from AlgoPlotting import CandlestickChart

In [None]:
def get_minute_data_dir():
    return r'''Data\KibotData\SP500_1'''

def get_single_day_data_dir():
    return r'''Data\SingleDayData\SP500'''

def get_day_data_dir():
    return r'''Data\KibotData\SP500_day'''

def get_setup_data_dir():
    return r'''Data\SetupData'''

In [None]:
# Maybe move this inside data library to keep everything like this in one place
def load_minute_data(instrument, date):
    datestr = date.strftime('_%Y%m%d')
    file_name = instrument + datestr + '.txt'
    single_day_data_dir = get_single_day_data_dir()
    single_day_file_path = single_day_data_dir + '\\' + file_name
    # Try loading single day file of minute data which takes less than 6 years
    try:
        col_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'DateTime']
        single_day_data = pd.read_csv(single_day_file_path, names=col_names)
        date_times = (single_day_data['Date']+single_day_data['Time']).map(lambda x: datetime.datetime(int(x[6:10]), int(x[0:2]), int(x[3:5]), int(x[10:12]), int(x[13:15])))
        single_day_data['DateTime'] = list(date_times)
        #print(single_day_data)
        #print(single_day_data['Open'])
    # Cry and load the full minute data file, then save only the day's worth of data from it to a better file
    except FileNotFoundError:
        single_day_data = load_raw_minute_data(instrument, date, single_day_file_path)
    return single_day_data

# Lazy name
def load_raw_minute_data(instrument, date, single_day_file_path):
    minute_data_dir = get_minute_data_dir()
    col_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
    minute_data = pd.read_csv(join(minute_data_dir, instrument+'.txt'), names=col_names)
    date_times = (minute_data['Date']+minute_data['Time']).map(lambda x: datetime.datetime(int(x[6:10]), int(x[0:2]), int(x[3:5]), int(x[10:12]), int(x[13:15])))
    minute_data['DateTime'] = list(date_times)
    minute_data = minute_data[[(dt >= datetime.datetime(date.year, date.month, date.day, 9, 30, 0)) and (dt <= datetime.datetime(date.year, date.month, date.day, 16, 0, 0)) for dt in minute_data['DateTime']]].copy()
    # No headers or index in the file, want it in the same format as the full minute data files
    minute_data.to_csv(single_day_file_path, header=False, index=False)
    return minute_data

In [None]:
# class DataLibrary:
#     def __init__(self, filename):
#         self.filename = filename
#         #self.df = pd.DataFrame()
#         self.df = pd.read_csv('DataLibrary/' + self.filename, index_col = 0)
        
#     def get_data(self, date, instrument, data_name):
#         key = self.keygen(date, instrument)
#         try:
#             data = self.df[data_name].loc[key]
#         except:
#             self.regen(date, instrument)
#             data = self.df[data_name].loc[key]
        
#         if not data or np.isnan(data):
#             self.regen(date, instrument)
#             data = self.df[data_name].loc[key]
        
#         return data
    
#     def regen(self, date, instrument):
#         key = self.keygen(date, instrument)
#         data_names = ['Open', 'High', 'Low', 'Close', 'Volume', 'FirstMinClose']
        
#         minutedatadir = get_minute_data_dir()
#         minutecolnames = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
#         pricedata = pd.read_csv(join(minutedatadir, instrument+'.txt'), names=minutecolnames)
#         pricedata['DateTime'] = (pricedata['Date']+pricedata['Time']).map(lambda x: datetime.datetime(int(x[6:10]), int(x[0:2]), int(x[3:5]), int(x[10:12]), int(x[13:15])))
#         pricedata = pricedata[[(dt >= datetime.datetime(date.year, date.month, date.day, 9, 30, 0)) and (dt <= datetime.datetime(date.year, date.month, date.day, 16, 0, 0)) for dt in pricedata['DateTime']]].copy()
# o
#         for data_name in data_names:
#             try:
#                 temp = self.df[data_name]
#             except:
#                 self.df[data_name] = [None] * len(self.df)

#         try:
#             temp = self.df.loc[key]
#         except:
#             new_row = {}
#             for data_name in data_names:
#                 new_row[data_name] = None

#             to_append = pd.DataFrame(new_row, index=[key])
#             self.df = self.df.append(to_append)

#         self.df['Open'][key] = pricedata['Open'].iloc[0]
#         self.df['Close'][key] = pricedata['Close'].iloc[-1]
#         self.df['High'][key] = np.max(pricedata['High'])
#         self.df['Low'][key] = np.min(pricedata['Low'])
#         self.df['Volume'][key] = np.sum(pricedata['Volume'])
#         self.df['FirstMinClose'][key] = pricedata['Close'].iloc[0]
        
#     def remove_col(self, data_name):
#         self.df = self.df.drop(columns=[data_name])
        
#     def keygen(self, date, instrument):
#         return str(date) + str(instrument)
        
#     def close(self):
#         timestr = time.strftime("_%Y%m%d-%H%M%S")
#         self.df.to_csv('DataLibrary/' + self.filename)
#         self.df.to_csv('DataLibrary/Backups/' + self.filename[:-4] + timestr + '.txt')

In [None]:
def get_gap_instruments():
    
    setup_param_dict = {
        'start_date': datetime.date(2017,1,1),
        'end_date': datetime.date(2018,1,1),
        'dollar_volume_threshold': 25e6,
        'min_gap_threshold': 0.05,
        'num_instruments': 3
    }
    
    start_date = setup_param_dict['start_date']
    end_date = setup_param_dict['end_date']
    dollar_volume_threshold = setup_param_dict['dollar_volume_threshold']
    min_gap_threshold = setup_param_dict['min_gap_threshold']
    num_instruments = setup_param_dict['num_instruments']
    
    daydatadir = r'''Data\KibotData\SP500_day'''
    files = [f for f in listdir(daydatadir) if os.path.isfile(os.path.join(daydatadir, f))]
    instruments = [f[0:-4] for f in files]

    colnames = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    daypricedata = {}
    for f in instruments:
        df = pd.read_csv(os.path.join(daydatadir, f+'.txt'), names=colnames)
        df['Date'] = df['Date'].map(lambda x: datetime.date(int(x[6:10]), int(x[0:2]), int(x[3:5])))
        df['Gap'] = np.insert(np.array(df['Open'][1:])-np.array(df['Close'][:-1]),0,0)
        df = df.set_index('Date')
        daypricedata[f] = df
    
    dates = []
    max_gap_instruments = []
    gaps = []

    for dd in range((end_date - start_date).days):
        date = start_date + datetime.timedelta(days=dd)
        day_gaps = [0] * len(instruments)
        for ii, instrument in enumerate(instruments):
            if date in daypricedata[instrument].index:
                gap = daypricedata[instrument].loc[date]['Gap']/daypricedata[instrument].loc[date]['Open']
                abs_gap = abs(gap)
                # 5 dollars is the currently accepted threshold for penny stock classification
                dollar_volume = daypricedata[instrument].loc[date]['Volume'] * daypricedata[instrument].loc[date]['Open']
                # above_price_filter = daypricedata[instrument].loc[date]['Open'] > 5.00
                above_dollar_volume_threshold = dollar_volume > dollar_volume_threshold
                above_min_gap_threshold = abs_gap > min_gap_threshold
#                 if abs_gap > max_abs_gap and not above_dollar_volume_threshold:
#                     print(instrument, ' did not pass the dollar volume. Value: ', dollar_volume)
#                 if abs_gap > max_abs_gap and not above_min_gap_threshold:
#                     print(instrument, ' did not pass the minimum gap threshold. Value: ', gap)
                #if above_dollar_volume_threshold and above_min_gap_threshold and gap < 0:
                if above_dollar_volume_threshold and above_min_gap_threshold:
                    day_gaps[ii] = gap
            
        max_gap_indices = (-np.abs(day_gaps)).argsort()[:num_instruments]
        day_max_gap_instruments = [instruments[idx] for idx in max_gap_indices if day_gaps[idx] is not 0]
        day_max_gaps = [day_gaps[idx] for idx in max_gap_indices if day_gaps[idx] is not 0]
        
        if day_max_gap_instruments:
            dates.append(date)
            max_gap_instruments.append(day_max_gap_instruments)
            gaps.append(day_max_gaps)
            print(date, day_max_gap_instruments, day_max_gaps)
            
    return [setup_param_dict, dates, max_gap_instruments, gaps]

In [None]:
def get_setup_data():
    # Choose a function to generate setup data
    generate_setup_data = get_gap_instruments
    
    # Get the bytecode for generate_setup_data()
    ogstdout = sys.stdout
    strio = StringIO()
    sys.stdout = strio
    dis.dis(generate_setup_data)
    sys.stdout = ogstdout
    bytecode_str = strio.getvalue()
    bytecode_split = bytecode_str.split()
    bytecode_str = re.sub("(?<=>) at.*?line","",bytecode_str)
    
    # Check if generate_setup_data() accesses any global variables (other than external modules).
    # If it does, we can't guarantee that it will produce the same results every time, so we can't cache the results.
    global_vars = [bytecode_split[i+2][1:-1] for i, x in enumerate(bytecode_split) if x == 'LOAD_GLOBAL']
    global_types = [str(type(eval(var)))[8:-2] for var in global_vars]
    acceptable_types = ['module', 'builtin_function_or_method', 'type']
    cacheable = True
    for ii in range(len(global_vars)):
        if not global_types[ii] in acceptable_types:
            cacheable = False
            print("Warning: setup_data not cacheable because generating function has global variable '" + global_vars[ii]
                 + "' of type '" + global_types[ii] + "'")
    
    
    if cacheable:
        setup_data_hash = hashlib.sha256(bytecode_str.encode()).hexdigest()
        setupdatadir = get_setup_data_dir()
        cachefile = join(setupdatadir, str(setup_data_hash)+'.txt')
        if isfile(cachefile):
            with open(cachefile, 'r') as f:
                setup_data = eval(f.readline())
        else:
            setup_data = generate_setup_data()
            with open(cachefile, 'w') as f:
                f.write(str(setup_data))
    else:
        setup_data = generate_setup_data()
        
    return setup_data

In [None]:
setup_data = get_setup_data()

setup_param_dict = setup_data[0]
data_dict = {
    'dates': setup_data[1],
    'max_gap_instruments': [x[:] for x in setup_data[2]],
    'gaps': [x[:] for x in setup_data[3]]
}

In [None]:
# idx = [ii for ii in range(len(max_gap_instruments))]
# random.shuffle(idx)

# rand_dates = list(max_gap_instruments.keys())
# rand_instruments = list(max_gap_instruments.values())

# for ii in range(len(rand_dates)):
#     max_gap_instruments[rand_dates[ii]] = rand_instruments[idx[ii]]

In [None]:
def day_gap_trading_strategy(portfolio, data_dict, param_dict):
    minutedatadir = get_minute_data_dir()
    minutecolnames = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
    
    dates = data_dict['dates']
    max_gap_instruments = data_dict['max_gap_instruments']
    gaps = data_dict['gaps']
    
    dl = DataLibrary('data_library.txt')

    for ii in range((end_date - start_date).days):
        date = start_date + datetime.timedelta(days=ii)
        
        try:
            instrument = max_gap_instruments[date]
            gap = gaps[date]
        except KeyError:
            instrument = None
            
        if instrument:
#             try:
                
#                 pricedata = dl.get_data(datetime.date(2018,1,4), instrument, 'Open')
#                 print(date, instrument)
#             except:
#                 continue

            try:
                open_price = dl.get_data(date, instrument, 'Open')
                close_price = dl.get_data(date, instrument, 'Close')
                first_min_close_price = dl.get_data(date, instrument, 'FirstMinClose')
                transaction_price = dl.get_data(date, instrument, 'FirstMinClose')
                open_transaction_dt = datetime.datetime(date.year, date.month, date.day, 9, 31, 0)
                close_transaction_dt = datetime.datetime(date.year, date.month, date.day, 15, 59, 0)
            except:
                continue
                
            initial_net_value = portfolio.net_value
            if first_min_close_price > open_price:
            #if np.sign(gap) < 0:
                quantity = np.floor(10000/transaction_price)
                portfolio.buy(instrument, quantity, transaction_price, open_transaction_dt)
                portfolio.sell(instrument, quantity, close_price, close_transaction_dt)
            elif first_min_close_price < open_price:
            #elif np.sign(gap) > 0:
                quantity = np.floor(10000/transaction_price)
                portfolio.sell(instrument, quantity, transaction_price, open_transaction_dt)
                portfolio.buy(instrument, quantity, close_price, close_transaction_dt)
            daily_profit = portfolio.net_value - initial_net_value
#             portfolio.daily_history.append(
#                 {
#                     'Date': date,
#                     'Instrument': instrument,
#                     'Profit': daily_profit
#                 }
#             )
            #portfolio.update(instrument, row)
    
    portfolio.portfolio_history = pd.DataFrame(portfolio.portfolio_history)
    portfolio.transaction_history = pd.DataFrame(portfolio.transaction_history)
    dl.close()

In [None]:
def gap_trading_strategy(portfolio, data_dict, param_dict):
    
    dates = data_dict['dates']
    max_gap_instruments = data_dict['max_gap_instruments']
    
    max_trades = param_dict['Max Trades']
    
    for ii in range(len(dates)):
        date = dates[ii]
            
        for instrument in max_gap_instruments[ii]:
            try:
                minute_data = load_minute_data(instrument, date)
                portfolio.positions[instrument] = 0
                open_price = minute_data['Open'].iloc[0]
                #close_price = dl.get_data(date, instrument, 'Close')
            except:
                continue
            
            if not minute_data.index.empty and len(minute_data) > 1:
                portfolio.daily_trades = 0
                for index in range(len(minute_data)):
                    row = minute_data.iloc[index]

                    #if index == len(minute_data)-1 or (np.sign(row['Close'] - open_price) != np.sign(portfolio.positions[instrument]) and index != 0):
                    #if index == len(minute_data)-1 or row['Close'] < stop_loss or row['Close'] > sell_target:
                    if index == len(minute_data)-1:
                        if portfolio.positions[instrument] < 0:
                            portfolio.buy(instrument, -portfolio.positions[instrument], row['Close'], row['DateTime'])
                        elif portfolio.positions[instrument] > 0:
                            portfolio.sell(instrument, portfolio.positions[instrument], row['Close'], row['DateTime'])
                    
                    elif index == 0:
                        if row['Close'] > open_price:
                            break
                        #if np.sign(gap) < 0:
                            if portfolio.positions[instrument] <= 0:
                                quantity = np.floor(10000/row['Close'])
                                portfolio.buy(instrument, -portfolio.positions[instrument] + quantity, row['Close'], row['DateTime'])
                        elif row['Close'] < open_price:
                        #elif np.sign(gap) > 0:
                            if portfolio.positions[instrument] >= 0:
                                quantity = np.floor(10000/row['Close'])
                                portfolio.sell(instrument, portfolio.positions[instrument] + quantity, row['Close'], row['DateTime'])
                    
                    elif row['Close'] > open_price:
                        if portfolio.positions[instrument] <= 0:
                            quantity = np.floor(10000/row['Close'])
                            if portfolio.daily_trades == max_trades - 1:
                                quantity = 0
                            portfolio.buy(instrument, -portfolio.positions[instrument] + quantity, row['Close'], row['DateTime'])
                    
                    elif row['Close'] < open_price:
                        if portfolio.positions[instrument] >= 0:
                            quantity = np.floor(10000/row['Close'])
                            if portfolio.daily_trades == max_trades - 1:
                                quantity = 0
                            portfolio.sell(instrument, portfolio.positions[instrument] + quantity, row['Close'], row['DateTime'])

                    if portfolio.daily_trades == max_trades:
                        break
                        
                    #portfolio.update(instrument, row)
    
    portfolio.portfolio_history = pd.DataFrame(portfolio.portfolio_history)
    portfolio.transaction_history = pd.DataFrame(portfolio.transaction_history)                

In [None]:
def bear_mode_gap_trading_strategy(portfolio, data_dict, param_dict):
    
    dates = data_dict['dates']
    max_gap_instruments = data_dict['max_gap_instruments']
    
    for ii in range(len(dates)):
        date = dates[ii]
            
        for instrument in max_gap_instruments[ii]:
            try:
                minute_data = load_minute_data(instrument, date)
                portfolio.positions[instrument] = 0
                open_price = minute_data['Open'].iloc[0]
                #close_price = dl.get_data(date, instrument, 'Close')
            except:
                continue
            
            if not minute_data.index.empty and len(minute_data) > 1:
                portfolio.daily_trades = 0
                stop_loss = open_price
                for index in range(len(minute_data)):
                    row = minute_data.iloc[index]
                    
                    if index == 0:
                        if row['Close'] >= open_price:
                            break
                        elif row['Close'] < open_price:
                            print('Trading ', instrument, ' on ', date)
                            if portfolio.positions[instrument] >= 0:
                                quantity = np.floor(10000/row['Close'])
                                portfolio.sell(instrument, portfolio.positions[instrument] + quantity, row['Close'], row['DateTime'])
                    
                    elif index == len(minute_data)-1 or row['High'] > stop_loss:
                        if portfolio.positions[instrument] < 0:
                            if row['High'] > stop_loss and False:
                                sell_price = stop_loss
                            else:
                                sell_price = row['Close']
                            portfolio.buy(instrument, -portfolio.positions[instrument], sell_price, row['DateTime'])
                            
                    # This try catch prevented a crash "can't multiply sequence with non-int float", but nothing
                    # was printed. Probably empty, should ensure that doesn't screw anything up further down.
                    try:
                        stop_loss = min(stop_loss, 1.03*row['Close'])
                    except:
                        print("Ghost at: ", instrument, " ", date, " row['Close']: ", row['Close'])
                    
    
    portfolio.portfolio_history = pd.DataFrame(portfolio.portfolio_history)
    portfolio.transaction_history = pd.DataFrame(portfolio.transaction_history)                

In [None]:
def bull_mode_gap_trading_strategy(portfolio, data_dict, param_dict):
    
    dates = data_dict['dates']
    max_gap_instruments = data_dict['max_gap_instruments']
    
    for ii in range(len(dates)):
        date = dates[ii]

        for instrument in max_gap_instruments[ii]:
            try:
                minute_data = load_minute_data(instrument, date)
                portfolio.positions[instrument] = 0
                open_price = minute_data['Open'].iloc[0]
                #close_price = dl.get_data(date, instrument, 'Close')
            except:
                continue
            
            if not minute_data.index.empty and len(minute_data) > 1:
                portfolio.daily_trades = 0
                stop_loss = open_price
                for index in range(len(minute_data)):
                    row = minute_data.iloc[index]
                    
                    if index == 0:
                        if row['Close'] <= open_price:
                            break
                        elif row['Close'] > open_price:
                            print('Trading ', instrument, ' on ', date)
                            if portfolio.positions[instrument] <= 0:
                                quantity = np.floor(10000/row['Close'])
                                portfolio.buy(instrument, -portfolio.positions[instrument] + quantity, row['Close'], row['DateTime'])
                    
                    elif index == len(minute_data)-1 or row['Low'] < stop_loss:
                        if portfolio.positions[instrument] > 0:
                            if row['Low'] < stop_loss and False:
                                sell_price = stop_loss
                            else:
                                sell_price = row['Close']
                            portfolio.sell(instrument, portfolio.positions[instrument], sell_price, row['DateTime'])
                            
                    # This try catch prevented a crash "can't multiply sequence with non-int float", but nothing
                    # was printed. Probably empty, should ensure that doesn't screw anything up further down.
                    try:
                        stop_loss = max(stop_loss, 0.97*row['Close'])
                    except:
                        print("Ghost at: ", instrument, " ", date, " row['Close']: ", row['Close'])
                    
    
    portfolio.portfolio_history = pd.DataFrame(portfolio.portfolio_history)
    portfolio.transaction_history = pd.DataFrame(portfolio.transaction_history)                

In [None]:
class Portfolio:
    def __init__(self, param_dict):
        
        self.param_dict = param_dict
        self.starting_cash = param_dict['Starting Cash']
        self.cash = self.starting_cash
        self.positions = {}
        self.commission = param_dict['Commission']
        
        self.net_value = 0
        self.portfolio_history = []
        self.transaction_history = []
        self.daily_history = []
        self.total_commission_losses = 0
        
        self.daily_trades = 0
        
    def update(self, instrument, price, date_time):
        new_net_value = self.cash + self.positions[instrument] * price
        
        self.net_value = new_net_value
            
        self.portfolio_history.append(
            {
                "DateTime": date_time,
                "Cash": self.cash,
                "Position": self.positions,
                "Net Value": self.net_value
            }
        )
        
    def buy(self, instrument, quantity, price, date_time):
            
        self.cash -= quantity*price + self.commission
        self.total_commission_losses += self.commission

        if not instrument in self.positions.keys():
            self.positions[instrument] = 0
        self.positions[instrument] += quantity

        self.cash = round(self.cash*100)/100

        self.transaction_history.append(
            {
                "DateTime": date_time,
                "Date": date_time.date(),
                "Instrument" : instrument,
                "Price": price,
                "Quantity": quantity
            }
        )
        self.daily_trades += 1
        self.update(instrument, price, date_time)
        
    def sell(self, instrument, quantity, price, date_time):
        
        self.cash += quantity*price - self.commission
        self.total_commission_losses += self.commission

        if not instrument in self.positions.keys():
            self.positions[instrument] = 0
        self.positions[instrument] -= quantity

        self.cash = round(self.cash*100)/100

        self.transaction_history.append(
            {
                "DateTime": date_time,
                "Date": date_time.date(),
                "Instrument" : instrument,
                "Price": price,
                "Quantity": -quantity
            }
        )
        self.daily_trades += 1
        self.update(instrument, price, date_time)
        
    def logging_dict(self):
        max_net_value = self.portfolio_history["Net Value"][0]
        max_drawdown = 0
        max_drawdown_percentage = 0
        for ii in range(len(self.portfolio_history)):
            max_net_value = max(max_net_value, self.portfolio_history["Net Value"][ii])
            max_drawdown = max(max_drawdown, max_net_value - self.portfolio_history["Net Value"][ii])
            max_drawdown_percentage = max(max_drawdown_percentage, 1 - self.portfolio_history["Net Value"][ii] / max_net_value)
            
        d = {
            'param_dict' : self.param_dict,
            'Starting Cash' : self.starting_cash,
            'Commission' : self.commission,
            'Profit' : self.net_value-self.starting_cash,
            #'Annualized Return (Compounding)' : (self.net_value / self.starting_cash)**(365 / (param_dict['End Date']-param_dict['Start Date']).days) - 1,
            #'Annualized Profit (Linear)' : (self.net_value - self.starting_cash)*(365 / (param_dict['End Date']-param_dict['Start Date']).days),
            'Number of Trades' : len(self.transaction_history),
            'Max Drawdown' : max_drawdown,
            'Max Drawdown %' : max_drawdown / max_net_value * 100
        }
        
        return d
    
#     def get_history_breakdown(self):
#         history_breakdown = {}
        
#         for transaction in self.transaction_history.to_dict('records'):
#             instrument = transaction['Instrument']
#             date = transaction['DateTime'].date()
            
#             key = instrument + str(date)
#             if not key in history_breakdown.keys():
#                 history_breakdown[key] = {
#                     'Date': date,
#                     'Instrument': instrument,
#                     'Profit': 0,
#                     'Transactions': []
#                 }
            
#             history_breakdown[key]['Profit'] -= transaction['Price'] * transaction['Quantity']
#             history_breakdown[key]['Transactions'].append(transaction)
            
#         return history_breakdown
    
    def get_history_breakdown(self, data_dict):
        history_breakdown = {}

#         data_dict = {
#             'dates': setup_data[1],
#             'max_gap_instruments': [x[:] for x in setup_data[2]],
#             'gaps': [x[:] for x in setup_data[3]]
#         }
        
        gap_dict = {}
        for ii in range(len(data_dict['dates'])):
            for jj, instrument in enumerate(data_dict['max_gap_instruments'][ii]):
                key = instrument + str(data_dict['dates'][ii])
                gap_dict[key] = data_dict['gaps'][ii][jj]
        
        for transaction in self.transaction_history.to_dict('records'):
            instrument = transaction['Instrument']
            date = transaction['DateTime'].date()
            
            key = instrument + str(date)
            if not key in history_breakdown.keys():
                history_breakdown[key] = {
                    'Date': date,
                    'Instrument': instrument,
                    'Profit': 0,
                    'Transactions': [],
                    'Gap': gap_dict[key]
                }
            
            history_breakdown[key]['Profit'] -= transaction['Price'] * transaction['Quantity']
            history_breakdown[key]['Transactions'].append(transaction)
            
        return history_breakdown

In [None]:
def log(log_file_path, setup_param_dict, portfolio, notes):
    log_file = open(log_file_path, 'w')
    pp = pprint.PrettyPrinter(indent=4, stream=log_file)
    pp.pprint({'setup_param_dict': setup_param_dict})
    pp.pprint(portfolio.logging_dict())
    pp.pprint({'Notes': notes})
    log_file.flush()
    log_file.close()

In [None]:
timestr = time.strftime("%Y%m%d-%H%M%S")
log_path = 'logs/AlgoTrading1/'
log_file_name = timestr + ".txt"
log_file_path = log_path + log_file_name
#notes = input("Enter notes for this run: ")
notes = ''

# For gap_trading_strategy2
param_dict = {
    'Starting Cash' : 50000,
    'Commission' : 5,
    'Start Date': datetime.date(2018,1,1),
    'End Date': datetime.date(2018,3,1),
    'Dollar Volume Threshold': 25e6,
    'Minimum Gap Threshold': 0.05
}

# For gap_trading_strategy3
param_dict = {
    'Starting Cash' : 50000,
    'Commission' : 5,
#    'Start Date': datetime.date(2017,1,1),
#    'End Date': datetime.date(2017,2,1),
#    'Dollar Volume Threshold': 25e6,
#    'Minimum Gap Threshold': 0.05,
    'Max Trades': 2,
    'Stop Loss': 0.1,
    'Sell Target': 0.1
}

In [None]:
portfolio = Portfolio(param_dict)
bear_mode_gap_trading_strategy(portfolio, data_dict, param_dict)
#bull_mode_gap_trading_strategy(portfolio, data_dict, param_dict)
#gap_trading_strategy(portfolio, data_dict, param_dict)

log(log_file_path, setup_param_dict, portfolio, notes)

In [None]:
bd = portfolio.get_history_breakdown(data_dict)

In [None]:
class ProfitCharts():
    def __init__(self, portfolio):
        self.portfolio = portfolio
        cash = portfolio.portfolio_history['Cash']
        net_value = portfolio.portfolio_history['Net Value']
        
        x_vals = [dt.strftime("%y/%m/%d, %H:%M:%S") for dt in portfolio.portfolio_history['DateTime']]
        tick_vals = [ii for ii in range(len(x_vals)) if 
                                 portfolio.portfolio_history['DateTime'][ii].time() == datetime.time(9, 30)]
        xaxisdatetimes = [portfolio.portfolio_history['DateTime'][ii] for ii in tick_vals]
        tick_text = [dt.strftime(' %y/%m/%d ') if dt.time() == datetime.time(9, 30)
                                 else dt.strftime(' %H:%M ') for dt in xaxisdatetimes]

        data1 = [ dict(
                type = 'scatter',
                x = x_vals,
                y = cash,
                name = 'Cash'),

                dict(
                type = 'scatter',
                x = x_vals,
                y = net_value - cash,
                name = 'Position'),

                dict(
                type = 'scatter',
                x = x_vals,
                y = net_value,
                name = 'Net Value')
               ]
        
        layout1 = dict(
                title = 'Profit Chart',
                xaxis = dict(
                    title = dict(
                        text = 'DateTime'
                    ),
                    type = 'category', 
                    categoryorder = 'category ascending',
                    tickvals = tick_vals,
                    ticktext = tick_text
                ),
                yaxis = dict(
                    title = dict(
                        text = 'Value'
                    )
                ))
        
        self.chart1 = go.FigureWidget( data=data1, layout=layout1)
        
        data2 = [
                dict(
                type = 'scatter',
                x = x_vals,
                y = [nv - net_value[0] for nv in net_value],
                name = 'Profit')
               ]
        
        layout2 = dict(
                title = 'Profit Chart',
                xaxis = dict(
                    title = dict(
                        text = 'DateTime'
                    ),
                    type = 'category', 
                    categoryorder = 'category ascending',
                    tickvals = tick_vals,
                    ticktext = tick_text
                ),
                yaxis = dict(
                    title = dict(
                        text = 'Profit'
                    )
                ))
        
        self.chart2 = go.FigureWidget( data=data2, layout=layout2)        
        # DEFINE DISPLAY CONFIGURATION
        self.display = ipywidgets.VBox([self.chart1, self.chart2])

In [None]:
profit_charts = ProfitCharts(portfolio)
profit_charts.display

In [None]:
# Takes daily history from portfolio class as input. It's a dict like {'Date': datetime.date, 'Instrument': 'AAPL', 'Profit': 5000} 
class TradingStrategyViewer:
    
    def __init__(self, portfolio, filter_function, data_dict, load_all=False):
        self.portfolio = portfolio
        self.history_breakdown = self.portfolio.get_history_breakdown(data_dict)
        self.filter_function = filter_function
        self.load_all = load_all
        self.index = 0
        self.last_entry = None
        
        #self.xy_chart = XYChart()
        self.xy_chart = CandlestickChart()
        
        self.filtered_history = [d for d in self.history_breakdown.values() if self.filter_function(d)]
        
        self.button_prev = ipywidgets.Button(
            description='Previous',
            disabled=False
        )
        self.button_next = ipywidgets.Button(
            description='Next',
            disabled=False
        )
        self.buttons = ipywidgets.HBox([self.button_prev, self.button_next])
        
        self.start()
        self.button_prev.on_click(self.prev_plot)
        self.button_next.on_click(self.next_plot)
    
    def update(self):
        if len(self.filtered_history) == 0:
            return
        
        clear_output(wait=False)
        history = self.filtered_history[self.index]
        minute_data = load_minute_data(history['Instrument'], history['Date'])
        #x = minute_data['DateTime']
        open_price = list(minute_data['Open'])
        close_price = list(minute_data['Close'])
        day_open_price = [open_price[0] for _ in open_price]

        #y = close_price # Otherwise bitches about key error 0
        ohlc = [list(minute_data['Open']),
               list(minute_data['High']),
               list(minute_data['Low']),
               list(minute_data['Close'])]

        title = history['Instrument'] + ', ' + history['Date'].strftime('%d %m %Y') + ', Index = ' + str(self.index)
        transactions = history['Transactions']

        #self.xy_chart.update(y=[y, day_open_price], title=title)
        self.xy_chart.update(ohlc=ohlc, title=title)

        print(len(transactions))
        for idx in range(len(transactions)):
            transaction = transactions[idx]
            #print(transaction)
            print(str(transaction['DateTime']))
            #plot_idx = list(minute_data['DateTime']).index(str(transaction['DateTime']))
            plot_idx = list(minute_data['DateTime']).index(transaction['DateTime'])

            if transaction['Quantity'] > 0:
                self.xy_chart.add_buy_marker(plot_idx, transaction['Price'])
            else:
                self.xy_chart.add_sell_marker(plot_idx, transaction['Price'])
                
        display(self.buttons)
        self.xy_chart.display()
    
    def next_plot(self, placeholder):
        self.index = (self.index + 1) % len(self.filtered_history)
        self.update()
        
    def prev_plot(self, placeholder):
        self.index = (self.index - 1) % len(self.filtered_history)
        self.update()
    
    def start(self):
        if len(self.filtered_history) != 0:
            # Process/write to file all the data beforehand if specified
            if self.load_all:
                self.load_all_data()
            self.update()
        else:
            clear_output(wait=False)
            print("No days matching the given filter!")
                
    def load_all_data(self):
        for idx, history in enumerate(self.filtered_history):
            percent = (idx/len(self.filtered_history))*100
            percent_str = f'{percent:.2f}% done loading single day data'
            print(percent_str, end='\r')
            load_minute_data(history['Instrument'], history['Date'])
            

In [None]:
def good_day_filter(daily_history):
    if daily_history['Profit'] > 100:
        return True
    else:
        return False
    
def bad_day_filter(daily_history):
    if daily_history['Profit'] < -101:
        return True
    else:
        return False

In [None]:
#tr = TradingStrategyViewer(portfolio.daily_history, good_day_filter)
tr = TradingStrategyViewer(portfolio, bad_day_filter, data_dict, load_all=True)

In [None]:
tr = TradingStrategyViewer(portfolio, good_day_filter, data_dict, load_all=True)

In [None]:
profits = [p['Profit'] for p in portfolio.get_history_breakdown(data_dict).values()]
gaps = [p['Gap'] for p in portfolio.get_history_breakdown(data_dict).values()]

In [None]:
sum(profits)

In [None]:
class Histo:
    def __init__(self):
        data = [ dict(
                type = 'histogram',
                x = profits)
               ]
        
        layout = dict(
                title = 'Hist'
        )
        
        self.display = go.FigureWidget( data=data, layout=layout)
        
Histo().display

In [None]:
x = gaps
y = profits

data = [
  go.Histogram(
    histfunc = "sum",
    y = y,
    x = x,
    name = "sum"
  )
]

py.iplot(data)

In [None]:
#def __init__(self, x=None, y=None, x_label='x', y_label='y', title='Plot', names=None):
sort_idx = np.argsort(gaps)
sort_gaps = np.array(gaps)[sort_idx]
sort_profits = np.array(profits)[sort_idx]
n_bins = 25
N = len(sort_gaps)
summed_sort_gaps = [np.mean(sort_gaps[int(np.floor(i/n_bins*N)):int(np.floor((i+1)/n_bins*N))]) for i in range(n_bins)]
summed_sort_profits = [np.sum(sort_profits[int(np.floor(i/n_bins*N)):int(np.floor((i+1)/n_bins*N))]) for i in range(n_bins)]
#gap_chart = XYChart(x=summed_sort_gaps, y=summed_sort_profits)
# Create a trace
trace = go.Scatter(
    x = summed_sort_gaps,
    y = summed_sort_profits,
    mode = 'markers'
)

data = [trace]

# Plot and embed in ipython notebook!
py.iplot(data)

In [None]:
negative_gap_profits = np.sum([profits[i] for i in range(len(profits)) if gaps[i] < 0])
positive_gap_profits = np.sum([profits[i] for i in range(len(profits)) if gaps[i] > 0])
print('Positive gap profits: ', positive_gap_profits, ' Negative gap profits: ', negative_gap_profits)

In [None]:
sum(profits)

In [None]:
# Direction/size of gap
# Profit
# Number of trades
# Number of instruments selected on that day
# Direction/size of overall market (SPY) gap
# Dollar volume
# Short or long
# Average 'volatility' of this instrument
# --- some measure of how close the trade was to tripping a stop loss, and/or how the stock moved throughout the day
# High/low of day (as percentage of open price?)
# Distance from open price to initial entry (first minute)


# Plot profit by day/murder first profit chart
# Jupyter plugin to fold code
# Check for multiple gaps in day (market gap) vs. single gap
# Remove portfolio_history