In [31]:
#
# Hedge Fund ML (sandbox/playground/harness)
#
# (c)2022 Guy Resh
#
# - start fund on 1/1/2020 with $1B seed capital
# - +$1B additional seed injections on 1/1/2021 & 1/1/2022
# - tradable securities selected from Dow 30, Nasdaq 100 & S&P 500 (528 unique; less 8 partial)
# - maintain portfolio with 50-100 "best" stocks
# - maintain diversification with 5-10 different industry sectors
# - generate Buy-and-Hold P/L % statistics for EOY 2020, 2021 & YTD 2022 for all 520 securities (long-only)
# - generate "opportunity" (measured move statistics) P/L using fractal-based reversal pivot points for all 520 securities (long-only)
# - maintain 1%-5% minimum monthly profit (stop at second consecutive losing month or if drawdown exceeds 10%)
# - features/strategies based on CCI, DC, KR, LRBO, RSI, VWAP, Half/SuperTrend, Volume, Velocity/Momentum, etc.
# - 0% commissions assumed (though can/should be accounted for at some point)
# - whole share purchases-only (no fractional; round quantities down to nearest 100?)
# - generate portfolio scenarios that rebalance daily, weekly, monthly and quarterly
# - split-handling?
# - dividend income inclusion?
#

In [32]:
import datetime
import json
import math
import os
import sys
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from finvizfinance.quote import finvizfinance
import pyiqfeed as iq
from pyiqfeed.field_readers import read_posix_ts, date_us_to_datetime, datetime_to_yyyymmdd_hhmmss, us_since_midnight_to_time

from math import floor
from tqdm.notebook import tqdm
from termcolor import colored as cl

plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (20,10)

pd.set_option( 'display.max_rows', None )
pd.set_option( 'display.max_columns', None )
pd.set_option( 'display.width', None )
pd.set_option( 'display.max_colwidth', None )

In [33]:
if sys.platform == 'linux':
  home = '/mnt/f/db/IQFeed/'
else:
  home = 'F:/db/IQFeed/'

In [34]:
Dow30Syms = [
"AAPL", "AMGN", "AXP", "BA", "CAT", "CRM", "CSCO", "CVX", "DIS", "DOW",
"GS", "HD", "HON", "IBM", "INTC", "JNJ", "JPM", "KO", "MCD", "MMM",
"MRK", "MSFT", "NKE", "PG", "TRV", "UNH", "V", "VZ", "WBA", "WMT"
]

Nasdaq100Syms = [ # 102
"AAPL", "ABNB", "ADBE", "ADI", "ADP", "ADSK", "AEP", "ALGN", "AMAT", "AMD",
"AMGN", "AMZN", "ANSS", "ASML", "ATVI", "AVGO", "AZN", "BIDU", "BIIB", "BKNG",
"CDNS", "CEG", "CHTR", "CMCSA", "COST", "CPRT", "CRWD", "CSCO", "CSX", "CTAS",
"CTSH", "DDOG", "DLTR", "DOCU", "DXCM", "EA", "EBAY", "EXC", "FAST", "FB",
"FISV", "FTNT", "GILD", "GOOG", "GOOGL", "HON", "IDXX", "ILMN", "INTC", "INTU",
"ISRG", "JD", "KDP", "KHC", "KLAC", "LCID", "LRCX", "LULU", "MAR", "MCHP",
"MDLZ", "MELI", "MNST", "MRNA", "MRVL", "MSFT", "MTCH", "MU", "NFLX", "NTES",
"NVDA", "NXPI", "ODFL", "OKTA", "ORLY", "PANW", "PAYX", "PCAR", "PDD", "PEP",
"PYPL", "QCOM", "REGN", "ROST", "SBUX", "SGEN", "SIRI", "SNPS", "SPLK", "SWKS",
"TEAM", "TMUS", "TSLA", "TXN", "VRSK", "VRSN", "VRTX", "WBA", "WDAY", "XEL",
"ZM", "ZS"
]

SP500Syms = [ # 504
"A", "AAL", "AAP", "AAPL", "ABBV", "ABC", "ABMD", "ABT", "ACN", "ADBE",
"ADI", "ADM", "ADP", "ADSK", "AEE", "AEP", "AES", "AFL", "AIG", "AIZ",
"AJG", "AKAM", "ALB", "ALGN", "ALK", "ALL", "ALLE", "AMAT", "AMCR", "AMD",
"AME", "AMGN", "AMP", "AMT", "AMZN", "ANET", "ANSS", "ANTM", "AON", "AOS",
"APA", "APD", "APH", "APTV", "ARE", "ATO", "ATVI", "AVB", "AVGO", "AVY",
"AWK", "AXP", "AZO", "BA", "BAC", "BAX", "BBWI", "BBY", "BDX", "BEN",
"BF.B", "BIIB", "BIO", "BK", "BKNG", "BKR", "BLK", "BLL", "BMY", "BR",
"BRK.B", "BRO", "BSX", "BWA", "BXP", "C", "CAG", "CAH", "CARR", "CAT",
"CB", "CBOE", "CBRE", "CCI", "CCL", "CDAY", "CDNS", "CDW", "CE", "CEG",
"CERN", "CF", "CFG", "CHD", "CHRW", "CHTR", "CI", "CINF", "CL", "CLX",
"CMA", "CMCSA", "CME", "CMG", "CMI", "CMS", "CNC", "CNP", "COF", "COO",
"COP", "COST", "CPB", "CPRT", "CPT", "CRL", "CRM", "CSCO", "CSX", "CTAS",
"CTLT", "CTRA", "CTSH", "CTVA", "CTXS", "CVS", "CVX", "CZR", "D", "DAL",
"DD", "DE", "DFS", "DG", "DGX", "DHI", "DHR", "DIS", "DISH", "DLR",
"DLTR", "DOV", "DOW", "DPZ", "DRE", "DRI", "DTE", "DUK", "DVA", "DVN",
"DXC", "DXCM", "EA", "EBAY", "ECL", "ED", "EFX", "EIX", "EL", "EMN",
"EMR", "ENPH", "EOG", "EPAM", "EQIX", "EQR", "ES", "ESS", "ETN", "ETR",
"ETSY", "EVRG", "EW", "EXC", "EXPD", "EXPE", "EXR", "F", "FANG", "FAST",
"FB", "FBHS", "FCX", "FDS", "FDX", "FE", "FFIV", "FIS", "FISV", "FITB",
"FLT", "FMC", "FOX", "FOXA", "FRC", "FRT", "FTNT", "FTV", "GD", "GE",
"GILD", "GIS", "GL", "GLW", "GM", "GNRC", "GOOG", "GOOGL", "GPC", "GPN",
"GRMN", "GS", "GWW", "HAL", "HAS", "HBAN", "HCA", "HD", "HES", "HIG",
"HII", "HLT", "HOLX", "HON", "HPE", "HPQ", "HRL", "HSIC", "HST", "HSY",
"HUM", "HWM", "IBM", "ICE", "IDXX", "IEX", "IFF", "ILMN", "INCY", "INTC",
"INTU", "IP", "IPG", "IPGP", "IQV", "IR", "IRM", "ISRG", "IT", "ITW",
"IVZ", "J", "JBHT", "JCI", "JKHY", "JNJ", "JNPR", "JPM", "K", "KEY",
"KEYS", "KHC", "KIM", "KLAC", "KMB", "KMI", "KMX", "KO", "KR", "L",
"LDOS", "LEN", "LH", "LHX", "LIN", "LKQ", "LLY", "LMT", "LNC", "LNT",
"LOW", "LRCX", "LUMN", "LUV", "LVS", "LW", "LYB", "LYV", "MA", "MAA",
"MAR", "MAS", "MCD", "MCHP", "MCK", "MCO", "MDLZ", "MDT", "MET", "MGM",
"MHK", "MKC", "MKTX", "MLM", "MMC", "MMM", "MNST", "MO", "MOH", "MOS",
"MPC", "MPWR", "MRK", "MRNA", "MRO", "MS", "MSCI", "MSFT", "MSI", "MTB",
"MTCH", "MTD", "MU", "NCLH", "NDAQ", "NDSN", "NEE", "NEM", "NFLX", "NI",
"NKE", "NLOK", "NLSN", "NOC", "NOW", "NRG", "NSC", "NTAP", "NTRS", "NUE",
"NVDA", "NVR", "NWL", "NWS", "NWSA", "NXPI", "O", "ODFL", "OGN", "OKE",
"OMC", "ORCL", "ORLY", "OTIS", "OXY", "PARA", "PAYC", "PAYX", "PCAR", "PEAK",
"PEG", "PENN", "PEP", "PFE", "PFG", "PG", "PGR", "PH", "PHM", "PKG",
"PKI", "PLD", "PM", "PNC", "PNR", "PNW", "POOL", "PPG", "PPL", "PRU",
"PSA", "PSX", "PTC", "PVH", "PWR", "PXD", "PYPL", "QCOM", "QRVO", "RCL",
"RE", "REG", "REGN", "RF", "RHI", "RJF", "RL", "RMD", "ROK", "ROL",
"ROP", "ROST", "RSG", "RTX", "SBAC", "SBNY", "SBUX", "SCHW", "SEDG", "SEE",
"SHW", "SIVB", "SJM", "SLB", "SNA", "SNPS", "SO", "SPG", "SPGI", "SRE",
"STE", "STT", "STX", "STZ", "SWK", "SWKS", "SYF", "SYK", "SYY", "T",
"TAP", "TDG", "TDY", "TECH", "TEL", "TER", "TFC", "TFX", "TGT", "TJX",
"TMO", "TMUS", "TPR", "TRMB", "TROW", "TRV", "TSCO", "TSLA", "TSN", "TT",
"TTWO", "TWTR", "TXN", "TXT", "TYL", "UA", "UAA", "UAL", "UDR", "UHS",
"ULTA", "UNH", "UNP", "UPS", "URI", "USB", "V", "VFC", "VLO", "VMC",
"VNO", "VRSK", "VRSN", "VRTX", "VTR", "VTRS", "VZ", "WAB", "WAT", "WBA",
"WBD", "WDC", "WEC", "WELL", "WFC", "WHR", "WM", "WMB", "WMT", "WRB",
"WRK", "WST", "WTW", "WY", "WYNN", "XEL", "XOM", "XRAY", "XYL", "YUM",
"ZBH", "ZBRA", "ZION", "ZTS"
]

uniqueSymbols = np.sort( np.unique( np.array( Dow30Syms + Nasdaq100Syms + SP500Syms ))).tolist()

#
# Don't include these symbols that don't have a full complement of data (IPO'd after 1/1/2020?)
#
#ABNB.pkl: 2020-12-10 13:40:00
#CARR.pkl: 2020-03-19 15:45:00
#CEG.pkl: 2022-01-19 10:25:00
#LCID.pkl: 2020-09-18 09:40:00
#OGN.pkl: 2021-05-14 11:35:00
#OTIS.pkl: 2020-03-19 11:40:00
#VTRS.pkl: 2020-11-12 09:35:00
#WBD.pkl: 2022-04-04 09:35:00
partialSymbols = ["ABNB","CARR","CEG","LCID","OGN","OTIS","VTRS","WBD"]
for symbol in partialSymbols:
  uniqueSymbols.remove( symbol ) 

print( len( uniqueSymbols ), 'unique symbols' ) # 528-8
#print( list( uniqueSymbols ))

520 unique symbols


In [35]:
#
# Download/persist (some) fundamental data from finviz
#
fundamentals = {}
fn_json = 'data/fundamentals.json'
if not os.path.isfile( fn_json ):
  for symbol in uniqueSymbols:
    finvizSymbol = symbol.replace( '.', '-' )
    try:
      stock = finvizfinance( finvizSymbol )
    except (RuntimeError, TypeError, NameError):
      pass
    finvizFundamentals = stock.ticker_fundament()
    fundamentals[symbol] = {
      'Company':   finvizFundamentals['Company'],
      'Sector':    finvizFundamentals['Sector'],
      'Industry':  finvizFundamentals['Industry'],
      'MarketCap': finvizFundamentals['Market Cap']
    }
    #print( symbol, fundamentals[symbol] )

  dfFundamentals = pd.DataFrame.from_dict( fundamentals, orient="index" )
  #print( dfFundamentals.info() )
  #print( dfFundamentals )

  jsonObj = json.loads( dfFundamentals.to_json( orient="index" ))
  jsonFundamentals = json.dumps( jsonObj, indent=2 )
  print( jsonFundamentals )
  with open( fn_json, "w" ) as f:
    f.write( jsonFundamentals )

dfFundamentals = pd.read_json( fn_json, orient="index" )
#print( dfFundamentals[0:10] )
print( "[", len( dfFundamentals ), "] fundamental data loaded..." )

[ 520 ] fundamental data loaded...


In [36]:
#
# Download 5 minute bars from IQFeed from bgn_prd to end_prd and return as a Pandas DataFrame
#
def get_historical_data( symbol, bgn_prd: datetime.datetime, end_prd: datetime.datetime ):

  print( 'get_historical_data(', symbol, ',', datetime_to_yyyymmdd_hhmmss( bgn_prd ), ',', datetime_to_yyyymmdd_hhmmss( end_prd ), ')' )

  histConn = iq.HistoryConn( name="pyiqfeed" )
  histConn.connect()
  histListener = iq.VerboseIQFeedListener( 'History Tick Listener' )
  histConn.add_listener( histListener )

  #dt, tm = read_posix_ts( bp_str )
  #bgn_prd = date_us_to_datetime( dt, tm )
  #dt, tm = read_posix_ts( ep_str )
  #end_prd = date_us_to_datetime( dt, tm )

  # dtype([('date', '<M8[D]'), ('time', '<m8[us]'), ('open_p', '<f8'), ('high_p', '<f8'), ('low_p', '<f8'), ('close_p', '<f8'), ('tot_vlm', '<u8'), ('prd_vlm', '<u8'), ('num_trds', '<u8')])
  ndarray = histConn.request_bars_in_period(
    ticker = symbol,
    interval_len = 300, # 5 min bars
    interval_type = 's',
    bgn_prd = bgn_prd,
    end_prd = end_prd,
    bgn_flt = datetime.time.fromisoformat( '09:30:00' ), # None,
    end_flt = datetime.time.fromisoformat( '16:00:00' ), # None,
    ascend = True,
    max_bars = None,
    label_at_beginning = False,
    timeout = 30
  )
  df = pd.DataFrame( ndarray )
  df['datetime'] = df['date'] + df['time']
  df.drop('date', axis=1, inplace=True)
  df.drop('time', axis=1, inplace=True)
  df.drop('tot_vlm', axis=1, inplace=True)
  df.drop('num_trds', axis=1, inplace=True)
  df.rename( columns={'open_p': 'open', 'high_p': 'high', 'low_p': 'low', 'close_p': 'close', 'prd_vlm': 'volume'}, inplace=True)
  df.set_index( 'datetime', inplace=True )
  #print( df.info() )

  histConn.remove_listener( histListener )
  histConn.disconnect()

  return df

In [37]:
#
# Generate/persist and load 5-minute OHLCV data from IQFeed for all 520 symbols
#
ohlcv = {}
bgn_prd = datetime.datetime( year=2020, month=1, day=1, hour=0, minute=0, second=0 )
end_prd = datetime.datetime.now()

for i in tqdm( range( len( uniqueSymbols )), leave=False ):
  
  symbol = uniqueSymbols[i]
  #print( 'symbol=[', symbol, ']' )

  fn_pkl = 'data/5min/' + symbol + '.pkl'

  if not os.path.isfile( fn_pkl ): # only download if we don't already have the data locally
    iqData = get_historical_data( symbol, bgn_prd, end_prd )
    #print( iqData[:5] )
    startTime = datetime.datetime.now()
    iqData.to_pickle( fn_pkl )
    deltaTime = datetime.datetime.now() - startTime
    #print( "iqData to_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

  startTime = datetime.datetime.now()
  ohlcv[symbol] = pd.read_pickle( fn_pkl )
  deltaTime = datetime.datetime.now() - startTime
  #print( len( ohlcv[symbol] ), "ohlcv read_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

print( "[", len( ohlcv ), "] 5-minute data loaded..." )

  0%|          | 0/520 [00:00<?, ?it/s]

[ 520 ] 5-minute data loaded...


In [38]:
#
# Generate and load hourly, daily and weekly data from 5 minute OHLCV data for all 520 symbols
#
ohlcvH = {}
ohlcvD = {}
ohlcvW = {}
how = {
  'open': 'first',
  'high': 'max',
  'low': 'min',
  'close': 'last',
  'volume': 'sum'
}

for i in tqdm( range( len( uniqueSymbols )), leave=False ):

  symbol = uniqueSymbols[i]

  fn_pkl = 'data/hourly/' + symbol + '.pkl'

  if not os.path.isfile( fn_pkl ): # only download if we don't already have the data locally
    df = ohlcv[symbol].resample( '1h', offset=0 ).apply( how ).dropna()
    #print( ohlcv[symbol][:20] )
    #print( df[:20] )
    startTime = datetime.datetime.now()
    df.to_pickle( fn_pkl )
    deltaTime = datetime.datetime.now() - startTime
    #print( "hourly to_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

  startTime = datetime.datetime.now()
  ohlcvH[symbol] = pd.read_pickle( fn_pkl )
  deltaTime = datetime.datetime.now() - startTime
  #print( len( ohlcvH[symbol] ), "hourly read_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

  fn_pkl = 'data/daily/' + symbol + '.pkl'

  if not os.path.isfile( fn_pkl ): # only download if we don't already have the data locally
    df = ohlcv[symbol].resample( '1d', offset=0 ).apply( how ).dropna()
    #print( ohlcv[symbol][:20] )
    #print( df[:20] )
    startTime = datetime.datetime.now()
    df.to_pickle( fn_pkl )
    deltaTime = datetime.datetime.now() - startTime
    #print( "hourly to_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

  startTime = datetime.datetime.now()
  ohlcvD[symbol] = pd.read_pickle( fn_pkl )
  deltaTime = datetime.datetime.now() - startTime
  #print( len( ohlcvH[symbol] ), "hourly read_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

  fn_pkl = 'data/weekly/' + symbol + '.pkl'

  if not os.path.isfile( fn_pkl ): # only download if we don't already have the data locally
    df = ohlcv[symbol].resample( '1w', offset=0 ).apply( how ).dropna()
    #print( ohlcv[symbol][:20] )
    #print( df[:20] )
    startTime = datetime.datetime.now()
    df.to_pickle( fn_pkl )
    deltaTime = datetime.datetime.now() - startTime
    #print( "hourly to_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

  startTime = datetime.datetime.now()
  ohlcvW[symbol] = pd.read_pickle( fn_pkl )
  deltaTime = datetime.datetime.now() - startTime
  #print( len( ohlcvH[symbol] ), "hourly read_pickle(" + fn_pkl + ") elapsed {:.3f}ms".format( deltaTime.total_seconds() * 1000 )) # milliseconds

print( "[", len( ohlcvH ), "] hourly data loaded..." )
print( "[", len( ohlcvD ), "] daily data loaded..." )
print( "[", len( ohlcvW ), "] weekly data loaded..." )

  0%|          | 0/520 [00:00<?, ?it/s]

[ 520 ] hourly data loaded...
[ 520 ] daily data loaded...
[ 520 ] weekly data loaded...


In [39]:
#
# Generate fractal/pivot reversal points
#
TP = 50 # TODO: ATR-based?
startTime = datetime.datetime.now()

for i in tqdm( range( len( uniqueSymbols )), leave=False ):

  symbol = uniqueSymbols[i]
  #print( 'symbol=[', symbol, ']' )

  df_tmp = ohlcv[symbol][['high', 'low', 'open']].copy()

  df_tmp = df_tmp.assign(fh = np.where(
    (df_tmp['high'] > df_tmp['high'].shift(1)) &
    (df_tmp['high'] > df_tmp['high'].shift(2)) &
    (df_tmp['high'] > df_tmp['high'].shift(3)) &
    (df_tmp['high'] > df_tmp['high'].shift(4)) &
    (df_tmp['high'] > df_tmp['high'].shift(-1)) &
    (df_tmp['high'] > df_tmp['high'].shift(-2)), # &
    #(df_tmp['high'] > df_tmp['high'].shift(-3)),
    1, 0
  ))
  df_tmp = df_tmp.assign(fl = np.where(
    (df_tmp['low'] < df_tmp['low'].shift(1)) &
    (df_tmp['low'] < df_tmp['low'].shift(2)) &
    (df_tmp['low'] < df_tmp['low'].shift(3)) &
    (df_tmp['low'] < df_tmp['low'].shift(4)) &
    (df_tmp['low'] < df_tmp['low'].shift(-1)) &
    (df_tmp['low'] < df_tmp['low'].shift(-2)), # &
    #(df_tmp['low'] < df_tmp['low'].shift(-3)),
    1, 0
  ))
  df_tmp = df_tmp[['fh', 'fl']]
  ohlcv[symbol].loc[:, 'fh42'] = df_tmp['fh']
  ohlcv[symbol].loc[:, 'fl42'] = df_tmp['fl']
  
  #
  # Determine if fractal/pivot reversal points were a "verified" win (within the next 3 bars)
  #
  df_tmp = ohlcv[symbol][['high', 'low', 'open']].copy()
  #print( "===[ df_tmp ]===\n", df_tmp.head(20), sep='')

  df_tmp = df_tmp.assign(fh = np.where(
    (df_tmp['high'] > df_tmp['high'].shift(1)) &
    (df_tmp['high'] > df_tmp['high'].shift(2)) &
    (df_tmp['high'] > df_tmp['high'].shift(3)) &
    (df_tmp['high'] > df_tmp['high'].shift(4)) &
    (df_tmp['high'] > df_tmp['high'].shift(-1)) &
    (df_tmp['high'] > df_tmp['high'].shift(-2)) &
    (
      (((df_tmp['open'].shift(-1) - df_tmp['low'].shift(-2)) / .01) >= TP) |
      (((df_tmp['open'].shift(-1) - df_tmp['low'].shift(-3)) / .01) >= TP) |
      (((df_tmp['open'].shift(-1) - df_tmp['low'].shift(-4)) / .01) >= TP) |
      (((df_tmp['open'].shift(-1) - df_tmp['low'].shift(-5)) / .01) >= TP) |
      (((df_tmp['open'].shift(-1) - df_tmp['low'].shift(-6)) / .01) >= TP)
    ),
    #(df_tmp['high'] > df_tmp['high'].shift(-3)),
    1, 0
  ))
  df_tmp = df_tmp.assign(fl = np.where(
    (df_tmp['low'] < df_tmp['low'].shift(1)) &
    (df_tmp['low'] < df_tmp['low'].shift(2)) &
    (df_tmp['low'] < df_tmp['low'].shift(3)) &
    (df_tmp['low'] < df_tmp['low'].shift(4)) &
    (df_tmp['low'] < df_tmp['low'].shift(-1)) &
    (df_tmp['low'] < df_tmp['low'].shift(-2)) &
    (
      (((df_tmp['high'].shift(-2) - df_tmp['open'].shift(-1)) / .01) >= TP) |
      (((df_tmp['high'].shift(-3) - df_tmp['open'].shift(-1)) / .01) >= TP) |
      (((df_tmp['high'].shift(-4) - df_tmp['open'].shift(-1)) / .01) >= TP) |
      (((df_tmp['high'].shift(-5) - df_tmp['open'].shift(-1)) / .01) >= TP) |
      (((df_tmp['high'].shift(-6) - df_tmp['open'].shift(-1)) / .01) >= TP)
    ),
    #(df_tmp['low'] < df_tmp['low'].shift(-3)),
    1, 0
  ))
  df_tmp = df_tmp[['fh', 'fl']]
  ohlcv[symbol].loc[:, 'fh42v'] = df_tmp['fh']
  ohlcv[symbol].loc[:, 'fl42v'] = df_tmp['fl']
  #print( df_tmp.head(20) )

deltaTime = datetime.datetime.now() - startTime

print( "[", len( ohlcv ), "] Fractal/Pivot generation elapsed {:.3f}s".format( deltaTime.total_seconds() )) # milliseconds

  0%|          | 0/520 [00:00<?, ?it/s]

[ 520 ] Fractal/Pivot generation elapsed 19.813s


In [40]:
ohlcv['AAPL'][0:10]

Unnamed: 0_level_0,open,high,low,close,volume,fh42,fl42,fh42v,fl42v
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02 09:35:00,296.24,296.926,295.19,296.87,1649209,0,0,0,0
2020-01-02 09:40:00,296.93,297.95,296.9,297.27,1071400,0,0,0,0
2020-01-02 09:45:00,297.27,297.71,296.87,297.12,674630,0,0,0,0
2020-01-02 09:50:00,297.13,297.42,296.8,296.83,536942,0,0,0,0
2020-01-02 09:55:00,296.8,297.3,296.7,297.28,434840,0,0,0,0
2020-01-02 10:00:00,297.3,298.15,297.25,297.97,720271,0,0,0,0
2020-01-02 10:05:00,297.99,298.08,297.51,297.8,524797,0,0,0,0
2020-01-02 10:10:00,297.8,298.41,297.8,298.26,580352,1,0,1,0
2020-01-02 10:15:00,298.2693,298.31,297.3101,297.39,556495,0,0,0,0
2020-01-02 10:20:00,297.3701,297.61,296.93,297.22,527967,0,0,0,0


In [41]:
seedCapital = 1000000000
investmentCapitalPerSymbol = seedCapital / len( uniqueSymbols )
print( 'seedCapital=[ ${:,.2f}'.format( seedCapital ), '] investmentCapitalPerSymbol=[ ${:,.2f}'.format( investmentCapitalPerSymbol ), ']' )

dictPortfolio = {}
for i in tqdm( range( len( uniqueSymbols )), leave=False ):

  symbol = uniqueSymbols[i]

  open2020   = ohlcv[symbol].open['2020-01-02 09:35:00']
  close2020  = ohlcv[symbol].close['2020-12-31 16:00:00']
  pctChg2020 = (close2020 - open2020) / open2020
  invest2020 = math.floor( investmentCapitalPerSymbol / open2020 ) * open2020

  open2021   = ohlcv[symbol].open['2021-01-04 09:35:00']
  close2021  = ohlcv[symbol].close['2021-12-31 16:00:00']
  pctChg2021 = (close2021 - open2021) / open2021
  invest2021 = math.floor( investmentCapitalPerSymbol / open2021 ) * open2021

  open2022   = ohlcv[symbol].open['2022-01-03 09:35:00']
  close2022  = ohlcv[symbol].close['2021-04-14 16:00:00']
  pctChg2022 = (close2022 - open2022) / open2022
  invest2022 = math.floor( investmentCapitalPerSymbol / open2022 ) * open2022

  dictPortfolio[symbol] = [
    open2020, close2020, pctChg2020 * 100.0, invest2020, invest2020 * pctChg2020,
    open2021, close2021, pctChg2021 * 100.0, invest2021, invest2021 * pctChg2021,
    open2022, close2022, pctChg2022 * 100.0, invest2022, invest2022 * pctChg2022
  ]
print( end="\r" )
#print( dictPortfolio )

dfPortfolio = pd.DataFrame.from_dict(
  dictPortfolio, orient='index',
  columns=[
    'open2020','close2020','pctChg2020','invest2020','PnL2020',
    'open2021','close2021','pctChg2021','invest2021','PnL2021',
    'open2022','close2022','pctChg2022','invest2022','PnL2022'
  ]
)

seedCapital=[ $1,000,000,000.00 ] investmentCapitalPerSymbol=[ $1,923,076.92 ]


  0%|          | 0/520 [00:00<?, ?it/s]



In [46]:
dfPortfolio.sort_values( by=['pctChg2020'], ascending=False, inplace=True)
df2020Top50 = dfPortfolio[['open2020','close2020','pctChg2020']][0:50]

df2020Top50.reset_index(inplace=True)
df2020Top50 = df2020Top50.rename( columns = {'index':'symbol'})

df2020Top50

dfPortfolio.sort_values( by=['pctChg2021'], ascending=False, inplace=True)
df2021Top50 = dfPortfolio[['open2021','close2021','pctChg2021']][0:50]

dfPortfolio.sort_values( by=['pctChg2022'], ascending=False, inplace=True)
df2022Top50 = dfPortfolio[['open2022','close2022','pctChg2022']][0:50]

with pd.option_context( 'display.width', 1000, 'display.precision', 2 ):
  print( df2020Top50, df2021Top50 )


   symbol  open2020  close2020  pctChg2020
0    ENPH     26.37     175.47      565.42
1    MRNA     19.57     104.47      433.83
2      ZM     68.80     337.16      390.06
3     PDD     38.50     177.67      361.48
4      ZS     46.87     199.71      326.09
5    CRWD     50.03     211.82      323.39
6    ETSY     44.71     177.84      297.76
7    PENN     25.97      86.38      232.61
8    SEDG     97.00     319.12      228.99
9    DOCU     74.31     222.39      199.27
10   MELI    576.94    1674.80      190.29
11   DDOG     38.22      98.42      157.51
12     JD     35.96      87.85      144.30
13   GNRC    101.38     227.42      124.32
14   NVDA    238.75     522.12      118.69
15   OKTA    116.75     254.27      117.79
16   PYPL    109.47     234.31      114.04
17   BBWI     18.25      37.18      103.73
18   MPWR    180.12     366.23      103.33
19    ALB     73.50     147.51      100.69
20    AMD     46.86      91.72       95.73
21    FCX     13.35      26.01       94.83
22   CDNS  

In [None]:
#with pd.option_context( 'display.width', 1000, 'display.precision', 2 ):
#  print( dfPortfolio )

BnHPnL2020All = dfPortfolio['PnL2020'].sum()
BnHPnL2021All = dfPortfolio['PnL2021'].sum()
BnHPnL2022All = dfPortfolio['PnL2022'].sum()

print( "\n2020 B&H PnL (All)=[ ${:,.2f}".format( BnHPnL2020All ), "({:.1f}%) ] ".format( (BnHPnL2020All / seedCapital) * 100.0 ))
print( "2021 B&H PnL (All)=[ ${:,.2f}".format( BnHPnL2021All ), "({:.1f}%) ] ".format( (BnHPnL2021All / seedCapital) * 100.0 ))
print( "YTD 2022 B&H PnL (All)=[ ${:,.2f}".format( BnHPnL2022All ), "({:.1f}%) ] ".format( (BnHPnL2022All / seedCapital) * 100.0 ))

BnHPnL2020WinnersOnly = dfPortfolio.query("pctChg2020 > 0.0")['PnL2020'].sum()
BnHPnL2021WinnersOnly = dfPortfolio.query("pctChg2021 > 0.0")['PnL2021'].sum()
BnHPnL2022WinnersOnly = dfPortfolio.query("pctChg2022 > 0.0")['PnL2022'].sum()

numBnHPnL2020WinnersOnly = dfPortfolio.query("pctChg2020 > 0.0")['PnL2020'].count()
numBnHPnL2021WinnersOnly = dfPortfolio.query("pctChg2021 > 0.0")['PnL2021'].count()
numBnHPnL2022WinnersOnly = dfPortfolio.query("pctChg2022 > 0.0")['PnL2022'].count()

print( "\n2020 B&H PnL (Winners-only)=[ ${:,.2f}".format( BnHPnL2020WinnersOnly ), "({:.1f}%;".format( (BnHPnL2020WinnersOnly / seedCapital) * 100.0 ), numBnHPnL2020WinnersOnly, 'of', len( uniqueSymbols ), ')]' )
print( "2021 B&H PnL (Winners-only)=[ ${:,.2f}".format( BnHPnL2021WinnersOnly ), "({:.1f}%;".format( (BnHPnL2021WinnersOnly / seedCapital) * 100.0 ), numBnHPnL2021WinnersOnly, 'of', len( uniqueSymbols ), ')]' )
print( "YTD 2022 B&H PnL (Winners-only)=[ ${:,.2f}".format( BnHPnL2022WinnersOnly ), "({:.1f}%;".format( (BnHPnL2022WinnersOnly / seedCapital) * 100.0 ), numBnHPnL2022WinnersOnly, 'of', len( uniqueSymbols ), ')]' )

#ohlcv[] for symbol in uniqueSymbols
sys.exit( 0 )

# ROC CALCULATION

def get_roc(close, n):
  difference = close.diff(n)
  nprev_values = close.shift(n)
  roc = (difference / nprev_values) * 100
  return roc

# KST CALCULATION

def get_kst(close, sma1, sma2, sma3, sma4, roc1, roc2, roc3, roc4, signal):
  rcma1 = get_roc(close, roc1).rolling(sma1).mean()
  rcma2 = get_roc(close, roc2).rolling(sma2).mean()
  rcma3 = get_roc(close, roc3).rolling(sma3).mean()
  rcma4 = get_roc(close, roc4).rolling(sma4).mean()
  kst = (rcma1 * 1) + (rcma2 * 2) + (rcma3 * 3) + (rcma4 * 4)
  signal = kst.rolling(signal).mean()
  return kst, signal

tsla['kst'], tsla['signal_line'] = get_kst(tsla['close'], 10, 10, 10, 15, 10, 15, 20, 30, 9)
tsla = tsla[tsla.index >= '2022-01-01']
print(tsla.tail())

# KST INDICATOR PLOT

ax1 = plt.subplot2grid((11,1), (0,0), rowspan = 5, colspan = 1)
ax2 = plt.subplot2grid((11,1), (6,0), rowspan = 5, colspan = 1)
ax1.plot(tsla['close'], linewidth = 2.5)
ax1.set_title('TSLA CLOSING PRICES')
ax2.plot(tsla['kst'], linewidth = 2, label = 'KST', color = 'orange')
ax2.plot(tsla['signal_line'], linewidth = 2, label = 'SIGNAL', color = 'mediumorchid')
ax2.legend()
ax2.set_title('TSLA KST')
plt.show()

# KST CROSSOVER TRADING STRATEGY

def implement_kst_strategy(prices, kst_line, signal_line):
  buy_price = []
  sell_price = []
  kst_signal = []
  signal = 0
    
  for i in range(len(kst_line)):
        
    if kst_line[i-1] < signal_line[i-1] and kst_line[i] > signal_line[i]:
        if signal != 1:
          buy_price.append(prices[i])
          sell_price.append(np.nan)
          signal = 1
          kst_signal.append(signal)
        else:
          buy_price.append(np.nan)
          sell_price.append(np.nan)
          kst_signal.append(0)
                
    elif kst_line[i-1] > signal_line[i-1] and kst_line[i] < signal_line[i]:
      if signal != -1:
        buy_price.append(np.nan)
        sell_price.append(prices[i])
        signal = -1
        kst_signal.append(signal)
      else:
        buy_price.append(np.nan)
        sell_price.append(np.nan)
        kst_signal.append(0)
                
    else:
      buy_price.append(np.nan)
      sell_price.append(np.nan)
      kst_signal.append(0)
            
  return buy_price, sell_price, kst_signal

buy_price, sell_price, kst_signal = implement_kst_strategy(tsla['close'], tsla['kst'], tsla['signal_line'])

# TRADING SIGNALS PLOT

ax1 = plt.subplot2grid((11,1), (0,0), rowspan = 5, colspan = 1)
ax2 = plt.subplot2grid((11,1), (6,0), rowspan = 5, colspan = 1)
ax1.plot(tsla['close'], linewidth = 2, label = 'TSLA')
ax1.plot(tsla.index, buy_price, marker = '^', markersize = 12, linewidth = 0, color = 'green', label = 'BUY SIGNAL')
ax1.plot(tsla.index, sell_price, marker = 'v', markersize = 12, linewidth = 0, color = 'r', label = 'SELL SIGNAL')
ax1.legend()
ax1.set_title('TSLA KST TRADING SIGNALS')
ax2.plot(tsla['kst'], linewidth = 2, label = 'KST', color = 'orange')
ax2.plot(tsla['signal_line'], linewidth = 2, label = 'SIGNAL', color = 'mediumorchid')
ax2.legend()
ax2.set_title('TSLA KST')
plt.show()

# STOCK POSITION

position = []
for i in range(len(kst_signal)):
  if kst_signal[i] > 1:
    position.append(0)
  else:
    position.append(1)
        
for i in range(len(tsla['close'])):
  if kst_signal[i] == 1:
    position[i] = 1
  elif kst_signal[i] == -1:
    position[i] = 0
  else:
    position[i] = position[i-1]
        
close_price = tsla['close']
kst = tsla['kst']
signal_line = tsla['signal_line']
kst_signal = pd.DataFrame(kst_signal).rename(columns = {0:'kst_signal'}).set_index(tsla.index)
position = pd.DataFrame(position).rename(columns = {0:'kst_position'}).set_index(tsla.index)

frames = [close_price, kst, signal_line, kst_signal, position]
strategy = pd.concat(frames, join = 'inner', axis = 1)

print( strategy )
print( strategy[35:40] )

# BACKTESTING

tsla_ret = pd.DataFrame(np.diff(tsla['close'])).rename(columns = {0:'returns'})
kst_strategy_ret = []

for i in range(len(tsla_ret)):
  returns = tsla_ret['returns'][i]*strategy['kst_position'][i]
  kst_strategy_ret.append(returns)
    
kst_strategy_ret_df = pd.DataFrame(kst_strategy_ret).rename(columns = {0:'kst_returns'})
investment_value = 100000
number_of_stocks = floor(investment_value/tsla['close'][0])
kst_investment_ret = []

for i in range(len(kst_strategy_ret_df['kst_returns'])):
  returns = number_of_stocks*kst_strategy_ret_df['kst_returns'][i]
  kst_investment_ret.append(returns)

kst_investment_ret_df = pd.DataFrame(kst_investment_ret).rename(columns = {0:'investment_returns'})
total_investment_ret = round(sum(kst_investment_ret_df['investment_returns']), 2)
profit_percentage = floor((total_investment_ret/investment_value)*100)

#print(cl('Profit gained from the KST strategy by investing $100k in TSLA : {}'.format(total_investment_ret), attrs = ['bold']))
#print(cl('Profit percentage of the KST strategy : {}%'.format(profit_percentage), attrs = ['bold']))

print( 'Profit gained from the KST strategy by investing $100k in TSLA : {}'.format( total_investment_ret ))
print( 'Profit percentage of the KST strategy : {}%'.format( profit_percentage ))

# SPY ETF COMPARISON

def get_benchmark(start_date, investment_value):
  spy = get_historical_data( 'SPY', bgn_prd, end_prd )['close']
  benchmark = pd.DataFrame(np.diff(spy)).rename(columns = {0:'benchmark_returns'})
    
  investment_value = investment_value
  number_of_stocks = floor(investment_value/spy[-1])
  benchmark_investment_ret = []
    
  for i in range(len(benchmark['benchmark_returns'])):
    returns = number_of_stocks*benchmark['benchmark_returns'][i]
    benchmark_investment_ret.append(returns)

  benchmark_investment_ret_df = pd.DataFrame(benchmark_investment_ret).rename(columns = {0:'investment_returns'})
  return benchmark_investment_ret_df

benchmark = get_benchmark('2022-01-01', 100000)

investment_value = 100000
total_benchmark_investment_ret = round(sum(benchmark['investment_returns']), 2)
benchmark_profit_percentage = floor((total_benchmark_investment_ret/investment_value)*100)

#print(cl('Benchmark profit by investing $100k : {}'.format(total_benchmark_investment_ret), attrs = ['bold']))
#print(cl('Benchmark Profit percentage : {}%'.format(benchmark_profit_percentage), attrs = ['bold']))
#print(cl('KST Strategy profit is {}% higher than the Benchmark Profit'.format(profit_percentage - benchmark_profit_percentage), attrs = ['bold']))

print( 'Benchmark profit by investing $100k : {}'.format( total_benchmark_investment_ret ))
print( 'Benchmark Profit percentage : {}%'.format( benchmark_profit_percentage ))
print( 'KST Strategy profit is {}% higher than the Benchmark Profit'.format( profit_percentage - benchmark_profit_percentage ))