In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import os
from dotenv import load_dotenv
import requests
from datetime import timedelta
import json
import numpy as np
import time


load_dotenv("../../.env",override=True)
pd.set_option("display.max_columns",500)

In [2]:
consumerKey = os.environ.get("CONSUMER_KEY")

def refresh_pricing(ticker: str="$SPX.X",
                periodType: str='day',
                period: str='250',
                frequencyType: str='daily',
                frequency: str='1'
                 ) -> pd.DataFrame:
    """_summary_

    Args:
        ticker (str, optional): _description_. Defaults to ".X".
        periodType (str, optional): _description_. Defaults to 'year'.
        period (str, optional): _description_. Defaults to '20'.
        frequencyType (str, optional): _description_. Defaults to 'monthly'.
        frequency (str, optional): _description_. Defaults to '1'.

    Returns:
        pd.DataFrame: _description_
    """
    consumerKey = os.environ.get("CONSUMER_KEY")

    url = f"https://api.tdameritrade.com/v1/marketdata/{ticker}/pricehistory?periodType={periodType}&period={period}&frequencyType={frequencyType}&frequency={frequency}"
    response = requests.get(url,
            params={'apikey' : consumerKey})
    dat = pd.DataFrame(json.loads(response.content)['candles'])
    dat['date'] = pd.to_datetime(dat['datetime'],unit='ms').dt.date
    dat.index = dat['date'].values

    dat['symbol'] = ticker
    return dat[['symbol','date','close']].copy()

In [16]:
PRC = refresh_pricing(ticker="AAPL",
                periodType='year',
                period='2',
                frequencyType='weekly',
                frequency='1'
                 )
PRC

Unnamed: 0,symbol,date,close
2020-12-07,AAPL,2020-12-07,122.410
2020-12-14,AAPL,2020-12-14,126.655
2020-12-21,AAPL,2020-12-21,131.970
2020-12-28,AAPL,2020-12-28,132.690
2021-01-04,AAPL,2021-01-04,132.050
...,...,...,...
2022-10-31,AAPL,2022-10-31,138.380
2022-11-07,AAPL,2022-11-07,149.700
2022-11-14,AAPL,2022-11-14,151.290
2022-11-21,AAPL,2022-11-21,148.110


In [21]:
weekDates = PRC[['date']].copy()
weekDates['wkstrt'] = 1

In [4]:
sourceFile='../../data/universe.xlsx'
symbolsDF = pd.read_excel(sourceFile,header=1)[['Symbol','Sector','Industry']].copy()


  warn("Workbook contains no default style, apply openpyxl's default")


In [5]:
symbolsDF

Unnamed: 0,Symbol,Sector,Industry
0,AAN,Consumer Discretionary,Specialty Retail
1,AAP,Consumer Discretionary,Specialty Retail
2,AAPL,Information Technology,"Technology Hardware, Storage & Peripherals"
3,AAWW,Industrials,Air Freight & Logistics
4,ABB,Industrials,Electrical Equipment
...,...,...,...
1697,ZEUS,Materials,Metals & Mining
1698,ZIM,Industrials,Marine
1699,ZION,Financials,Banks
1700,ZTO,Industrials,Air Freight & Logistics


In [15]:
allSymbols = pd.DataFrame()
for s in symbolsDF['Symbol'].unique():
    try:
        PRC = refresh_pricing(ticker=s,
                periodType='year',
                period='2',
                frequencyType='daily',
                frequency='1'
                 )
        allSymbols=pd.concat([allSymbols,PRC])
        print(s)
        time.sleep(.4)
    except:
        pass



AAN
AAP
AAPL
AAWW
ABB
ABBV
ABC
ABCB
ABCL
ABEV
ABG
ABM
ABR
ABT
ACA
ACCO
ACEL
ACHC
ACI
ACIW
ACLS
ACMR
ACN
ACNT
ACR
ACRE
ACTG
ADM
ADV
AEE
AEIS
AEO
AEP
AFCG
AFG
AFYA
AGCO
AGM
AGR
AGRO
AHH
AIG
AIR
AIT
AIV
AIZ
AKAM
AKO.A
AKO.B
ALB
ALE
ALG
ALLE
ALLY
ALRS
ALSN
ALTO
AM
AMAL
AMAT
AMBP
AMCR
AME
AMED
AMEH
AMG
AMGN
AMK
AMKR
AMN
AMOV
AMP
AMPH
AMR
AMRK
AMTB
AMTD
AMX
AN
ANDE
AON
AOS
AOSL
APH
APLE
APOG
AR
ARC
ARCB
ARCC
ARCH
ARCO
ARI
ARKO
ARKR
ARLP
AROW
ARTNA
ARTW
ARW
ASAI
ASB
ASC
ASGN
ASIX
ASO
ASR
ASRT
ASRV
ASTL
ASX
ASYS
ATAX
ATCO
ATKR
ATLC
ATO
ATR
ATSG
AUBN
AVA
AVAL
AVB
AVD
AVGO
AVNS
AVNT
AVNW
AVT
AVTR
AVY
AWI
AX
AXP
AXR
AXTA
AXTI
AYI
AZEK
AZZ
BAC
BAH
BAK
BALL
BAM
BANC
BANF
BANR
BANX
BAP
BBAR
BBCP
BBSI
BBVA
BBW
BC
BCBP
BCC
BCH
BCML
BCO
BCSF
BCX
BDC
BECN
BELFA
BEPC
BERY
BFC
BFH
BFIN
BFS
BFST
BG
BGSF
BHE
BHF
BHP
BJ
BK
BKCC
BKE
BKH
BKI
BKU
BLD
BLDR
BLMN
BLX
BMA
BMRC
BMY
BNL
BNTX
BOH
BOKF
BON
BOOT
BOSC
BOTJ
BPOP
BPRN
BPT
BPYPP
BRC
BRID
BRKL
BRO
BRSP
BRT
BRX
BRY
BSAC
BSBK
BSBR
BSET
BSM
BSMX
BSVN
BTI
BTU
BU

In [17]:
allSymbols.reset_index(inplace=True,drop=True)
allSymbols=allSymbols[allSymbols['close']>0].copy()
allSymbols.reset_index(inplace=True,drop=True)


In [364]:
analysis=allSymbols.copy()
analysis = analysis.merge(weekDates,on='date',how='left')
analysis['wkstrt'] = np.where(analysis['wkstrt']==1,analysis['date'],np.nan)
analysis['wkstrt'] = analysis.groupby(['symbol'])['wkstrt'].ffill()
analysis.dropna(inplace=True)
analysis.reset_index(inplace=True,drop=True)


In [365]:
analysisWeekly = analysis.groupby(['symbol','wkstrt']).tail(1).copy().reset_index()

In [366]:
analysisWeekly

Unnamed: 0,index,symbol,date,close,wkstrt
0,4,AAN,2020-12-11,17.72,2020-12-07
1,9,AAN,2020-12-18,18.16,2020-12-14
2,13,AAN,2020-12-24,16.78,2020-12-21
3,17,AAN,2020-12-31,18.96,2020-12-28
4,22,AAN,2021-01-08,20.19,2021-01-04
...,...,...,...,...,...
154690,834468,ZYXI,2022-11-04,12.77,2022-10-31
154691,834473,ZYXI,2022-11-11,13.00,2022-11-07
154692,834478,ZYXI,2022-11-18,13.85,2022-11-14
154693,834482,ZYXI,2022-11-25,13.53,2022-11-21


In [367]:

def make_short_bollinger(dat,window=25):

    d = dat.copy()
    d['bolMedian'] = d.groupby(['symbol'])['close'].shift(1).rolling(window=window).median().reset_index()['close']
    d['bolStd'] = d.groupby(['symbol'])['close'].shift(1).rolling(window=window).std().reset_index()['close']
    d['bolUpper']=d['bolMedian']+2*d['bolStd']
    d['bolLower']=d['bolMedian']-2*d['bolStd']

    return d

def make_mad(dat):

    d = dat.copy()
    d['MAD']=d['close']/d['bolMedian'] - 1

    return d

def make_low_thresh_flag(dat,thresh):
    d=dat.copy()
    d['lowRange'] = np.where(d['close']<(d['bolLower']*thresh+d['bolMedian']*(1-thresh)),
        1,0)
    return d


def make_high_thresh_flag(dat,thresh):
    d=dat.copy()
    d['highRange'] = np.where(d['close']>(d['bolUpper']*thresh+d['bolMedian']*(1-thresh)),
        1,0)
    return d


In [368]:
analysisWeekly = make_short_bollinger(analysisWeekly)
analysis = make_short_bollinger(analysis)

analysisWeekly = make_mad(analysisWeekly)
analysis = make_mad(analysis)

analysis.dropna(inplace=True)
analysisWeekly.dropna(inplace=True)


analysisWeekly = make_high_thresh_flag(analysisWeekly,thresh=0)
analysis = make_low_thresh_flag(analysis,thresh=.8)


In [369]:
#analysis['closeLongIndex']=(analysis['close'] - analysis['bolLongLower'])/(analysis['bolLongUpper'] - analysis['bolLongLower'])
analysis['closeShortIndex']=(analysis['close'] - analysis['bolLower'])/(analysis['bolUpper'] - analysis['bolLower'])

In [370]:
analysis.tail(10)

Unnamed: 0,symbol,date,close,wkstrt,bolMedian,bolStd,bolUpper,bolLower,MAD,lowRange,closeShortIndex
834478,ZYXI,2022-11-18,13.85,2022-11-14,12.08,1.989431,16.058862,8.101138,0.146523,0,0.722425
834479,ZYXI,2022-11-21,13.55,2022-11-21,12.27,1.983675,16.23735,8.30265,0.104319,0,0.661317
834480,ZYXI,2022-11-22,13.73,2022-11-21,12.56,1.965014,16.490028,8.629972,0.093153,0,0.648854
834481,ZYXI,2022-11-23,13.84,2022-11-21,12.77,1.930515,16.63103,8.90897,0.08379,0,0.638564
834482,ZYXI,2022-11-25,13.53,2022-11-21,12.86,1.870622,16.601243,9.118757,0.0521,0,0.589542
834483,ZYXI,2022-11-28,13.51,2022-11-28,13.0,1.768058,16.536116,9.463884,0.039231,0,0.572113
834484,ZYXI,2022-11-29,13.49,2022-11-28,13.05,1.654423,16.358847,9.741153,0.033716,0,0.566488
834485,ZYXI,2022-11-30,13.72,2022-11-28,13.18,1.510563,16.201125,10.158875,0.040971,0,0.589371
834486,ZYXI,2022-12-01,13.65,2022-11-28,13.24,1.336191,15.912383,10.567617,0.030967,0,0.576711
834487,ZYXI,2022-12-02,13.8,2022-11-28,13.4,1.116583,15.633166,11.166834,0.029851,0,0.589559


In [371]:
analysisWeekly

Unnamed: 0,index,symbol,date,close,wkstrt,bolMedian,bolStd,bolUpper,bolLower,MAD,highRange
25,138,AAN,2021-06-25,32.98,2021-06-21,25.39,6.313375,38.016750,12.763250,0.298937,1
26,147,AAN,2021-07-09,30.80,2021-06-28,25.54,6.284708,38.109415,12.970585,0.205951,1
27,152,AAN,2021-07-16,28.17,2021-07-12,25.78,6.139564,38.059127,13.500873,0.092708,1
28,157,AAN,2021-07-23,27.29,2021-07-19,25.86,5.806033,37.472066,14.247934,0.055298,1
29,162,AAN,2021-07-30,28.87,2021-07-26,25.99,5.565394,37.120789,14.859211,0.110812,1
...,...,...,...,...,...,...,...,...,...,...,...
154690,834468,ZYXI,2022-11-04,12.77,2022-10-31,8.39,1.125678,10.641356,6.138644,0.522050,1
154691,834473,ZYXI,2022-11-11,13.00,2022-11-07,8.58,1.413074,11.406149,5.753851,0.515152,1
154692,834478,ZYXI,2022-11-18,13.85,2022-11-14,8.73,1.638876,12.007753,5.452247,0.586483,1
154693,834482,ZYXI,2022-11-25,13.53,2022-11-21,8.80,1.854966,12.509931,5.090069,0.537500,1


In [372]:

latestWeekly = analysisWeekly[analysisWeekly['date']==max(analysisWeekly['date'])].copy()
latest = analysis[analysis['date']==max(analysis['date'])].copy()



In [373]:
weeklyAndDailyFinds = latest[(latest['lowRange']==1)][['symbol','close','bolMedian','bolLower','MAD']].merge(
    latestWeekly[(latestWeekly['highRange']==1)][['symbol']]
)
weeklyAndDailyFinds

Unnamed: 0,symbol,close,bolMedian,bolLower,MAD
0,ADM,91.71,95.6,92.43261,-0.04069
1,BCBP,19.05,19.73,19.012233,-0.034465
2,BLMN,21.24,23.2,21.812394,-0.084483
3,CVI,34.54,39.19,36.716338,-0.118653
4,DEN,87.93,92.3,87.324198,-0.047346
5,DINO,56.42,62.82,59.828311,-0.101878
6,DK,28.93,32.95,29.26684,-0.122003
7,FANG,145.46,157.11,143.902676,-0.074152
8,GHC,607.7,642.84,622.662893,-0.054664
9,MARPS,8.19,9.9,8.20586,-0.172727


In [374]:
"""for s in weeklyAndDailyFinds['symbol']:

    tmp = analysis[analysis['symbol']==s].tail(180).copy()

    plt.plot(tmp['close'])
    plt.plot(tmp['bolMedian'])
    plt.plot(tmp['bolUpper'],color='r')
    plt.plot(tmp['bolLower'],color='r')
    #plt.ylim(0)
    plt.title(s)
    plt.show()
"""

"for s in weeklyAndDailyFinds['symbol']:\n\n    tmp = analysis[analysis['symbol']==s].tail(180).copy()\n\n    plt.plot(tmp['close'])\n    plt.plot(tmp['bolMedian'])\n    plt.plot(tmp['bolUpper'],color='r')\n    plt.plot(tmp['bolLower'],color='r')\n    #plt.ylim(0)\n    plt.title(s)\n    plt.show()\n"

In [375]:
changeAnalysis = analysis.copy()
changeAnalysis['change'] = changeAnalysis.groupby(['symbol'])['close'].shift(-5)/changeAnalysis['close']-1
changeAnalysis.reset_index(drop=True,inplace=True)
changeAnalysis['5DayCloseMean'] = changeAnalysis.groupby(['symbol'])['close'].rolling(window=5).mean().reset_index()['close']
changeAnalysis['5DayChangeMean'] = changeAnalysis.groupby(['symbol'])['5DayCloseMean'].shift(-5)/changeAnalysis['close']-1

changeAnalysis.dropna(inplace=True)

In [376]:
changeAnalysisWeekly = analysisWeekly[['symbol','wkstrt','MAD','highRange']].copy()
changeAnalysisWeekly.columns = ['symbol','wkstrt','MADWkly','highRangeWkly']



In [377]:
analysisWeekly

Unnamed: 0,index,symbol,date,close,wkstrt,bolMedian,bolStd,bolUpper,bolLower,MAD,highRange
25,138,AAN,2021-06-25,32.98,2021-06-21,25.39,6.313375,38.016750,12.763250,0.298937,1
26,147,AAN,2021-07-09,30.80,2021-06-28,25.54,6.284708,38.109415,12.970585,0.205951,1
27,152,AAN,2021-07-16,28.17,2021-07-12,25.78,6.139564,38.059127,13.500873,0.092708,1
28,157,AAN,2021-07-23,27.29,2021-07-19,25.86,5.806033,37.472066,14.247934,0.055298,1
29,162,AAN,2021-07-30,28.87,2021-07-26,25.99,5.565394,37.120789,14.859211,0.110812,1
...,...,...,...,...,...,...,...,...,...,...,...
154690,834468,ZYXI,2022-11-04,12.77,2022-10-31,8.39,1.125678,10.641356,6.138644,0.522050,1
154691,834473,ZYXI,2022-11-11,13.00,2022-11-07,8.58,1.413074,11.406149,5.753851,0.515152,1
154692,834478,ZYXI,2022-11-18,13.85,2022-11-14,8.73,1.638876,12.007753,5.452247,0.586483,1
154693,834482,ZYXI,2022-11-25,13.53,2022-11-21,8.80,1.854966,12.509931,5.090069,0.537500,1


In [378]:
changeAnalysis = changeAnalysis.merge(changeAnalysisWeekly,on=['symbol','wkstrt'],how='left').dropna()

In [379]:
sp500 = refresh_pricing(ticker="$SPX.X",
                periodType='year',
                period='2',
                frequencyType='daily',
                frequency='1'
                 )
sp500['marketChange'] = sp500.groupby(['symbol'])['close'].shift(-5)/sp500['close']-1
sp500.dropna(inplace=True)

In [380]:
changeAnalysis = changeAnalysis.merge(sp500[['date','marketChange']],on=['date'],how='left')

In [381]:
changeAnalysis['perfVsMarket'] = changeAnalysis['change'] - changeAnalysis['marketChange']


In [382]:
changeAnalysis[['MAD','MADWkly','lowRange','highRangeWkly','closeShortIndex','perfVsMarket','change','5DayChangeMean']].corr()

Unnamed: 0,MAD,MADWkly,lowRange,highRangeWkly,closeShortIndex,perfVsMarket,change,5DayChangeMean
MAD,1.0,0.583298,-0.284157,0.225868,0.523107,-0.00194,-0.009844,-0.011608
MADWkly,0.583298,1.0,-0.158382,0.455347,0.301397,0.202034,0.200063,0.241698
lowRange,-0.284157,-0.158382,1.0,-0.2445,-0.589064,0.011493,0.028811,0.031276
highRangeWkly,0.225868,0.455347,-0.2445,1.0,0.388284,0.05256,0.063316,0.077913
closeShortIndex,0.523107,0.301397,-0.589064,0.388284,1.0,-0.007955,-0.021298,-0.022526
perfVsMarket,-0.00194,0.202034,0.011493,0.05256,-0.007955,1.0,0.94366,0.82979
change,-0.009844,0.200063,0.028811,0.063316,-0.021298,0.94366,1.0,0.881526
5DayChangeMean,-0.011608,0.241698,0.031276,0.077913,-0.022526,0.82979,0.881526,1.0


In [383]:
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression

In [384]:
feats = ['MAD','MADWkly','lowRange','highRangeWkly','closeShortIndex']
kpi = 'perfVsMarket'


X = changeAnalysis[feats].copy()
y = changeAnalysis[kpi].copy()

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.1, random_state=42,shuffle=False)


mod = XGBRegressor(random_state=42,max_depth=3,n_estimators=50
).fit(X_train,y_train)


In [385]:
yfit = mod.predict(X_train)
ypred = mod.predict(X_test)


In [386]:
1-sum((ypred-y_test)**2)/sum((np.mean(y_test)-y_test)**2)

0.0257880384982343

In [387]:
changeAnalysisLatest = analysis[analysis['date']==max(analysis['date'])].merge(analysisWeekly,on=['symbol','wkstrt'],how='left').copy()
changeAnalysisLatest

Unnamed: 0,symbol,date_x,close_x,wkstrt,bolMedian_x,bolStd_x,bolUpper_x,bolLower_x,MAD_x,lowRange,closeShortIndex,index,date_y,close_y,bolMedian_y,bolStd_y,bolUpper_y,bolLower_y,MAD_y,highRange
0,AAN,2022-12-02,12.57,2022-11-28,10.74,0.793367,12.326734,9.153266,0.170391,0,1.076656,501.0,2022-12-02,12.57,12.80,3.232169,19.264339,6.335661,-0.017969,0.0
1,AAP,2022-12-02,153.03,2022-11-28,181.07,17.952211,216.974422,145.165578,-0.154857,0,0.109519,1003.0,2022-12-02,153.03,181.89,16.756612,215.403223,148.376777,-0.158667,0.0
2,AAPL,2022-12-02,147.81,2022-11-28,148.11,5.285655,158.681310,137.538690,-0.002026,0,0.485811,1505.0,2022-12-02,147.81,149.70,10.362191,170.424381,128.975619,-0.012625,0.0
3,AAWW,2022-12-02,100.20,2022-11-28,100.54,0.342686,101.225373,99.854627,-0.003382,0,0.251960,2007.0,2022-12-02,100.20,99.41,16.234371,131.878742,66.941258,0.007947,1.0
4,ABB,2022-12-02,31.63,2022-11-28,30.68,1.528115,33.736229,27.623771,0.030965,0,0.655420,2509.0,2022-12-02,31.63,28.54,2.006620,32.553241,24.526759,0.108269,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1675,ZEUS,2022-12-02,35.16,2022-11-28,30.47,3.410163,37.290326,23.649674,0.153922,0,0.843825,832516.0,2022-12-02,35.16,28.51,3.667684,35.845369,21.174631,0.233251,1.0
1676,ZIM,2022-12-02,19.40,2022-11-28,24.59,1.890742,28.371484,20.808516,-0.211061,1,-0.186239,832981.0,2022-12-02,19.40,41.05,14.714981,70.479962,11.620038,-0.527406,0.0
1677,ZION,2022-12-02,52.25,2022-11-28,51.26,1.529332,54.318665,48.201335,0.019313,0,0.661835,833483.0,2022-12-02,52.25,52.79,2.981569,58.753139,46.826861,-0.010229,0.0
1678,ZTO,2022-12-02,24.98,2022-11-28,21.52,2.480257,26.480514,16.559486,0.160781,0,0.848754,833985.0,2022-12-02,24.98,25.59,2.556071,30.702142,20.477858,-0.023837,0.0


In [388]:
changeAnalysisWeekly['wkstrt'].max()

datetime.date(2022, 11, 28)