In [1]:
import os

In [2]:
baseDir = os.getcwd()
dataLocation = 'data/short_clean_closepx.csv'
dataFilePath = os.path.join(baseDir, dataLocation)

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
masterData = pd.read_csv(dataFilePath, index_col=0)
masterData.head()

Unnamed: 0_level_0,MSFT,AAPL,AMZN,JPM,GOOG,GOOGL,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-01-03,26.74,4.520714,44.52,39.150002,100.700043,101.456459,50.09
2005-01-04,26.84,4.567143,42.139999,38.41,96.621567,97.347351,49.75
2005-01-05,26.780001,4.607143,41.77,38.490002,96.129768,96.851852,49.490002
2005-01-06,26.75,4.610714,41.049999,38.709999,93.665794,94.36937,50.119999
2005-01-07,26.67,4.946429,42.32,38.400002,96.298668,97.022018,49.790001


<h2>Lets look at the rolling correlation</h2>

<ol><li><h4>First reshape the data to a dictionary. Key:stock, Value:columns=years</h4></li></ol>

In [5]:
import my_helpers

In [6]:
byStockAndYear = my_helpers.ByStockAndYear(masterData)

# drop 2019 data
for _,data in byStockAndYear.items():
    del data[2019]

print(byStockAndYear.keys())
print(byStockAndYear['MSFT'].columns) 

dict_keys(['MSFT', 'AAPL', 'AMZN', 'JPM', 'GOOG', 'GOOGL', 'XOM'])
Int64Index([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
            2016, 2017, 2018],
           dtype='int64')


<ol start="2"><li><h4>Create Rolling Correlation Data</h4></li><ol>

In [7]:
type(byStockAndYear)

dict

In [8]:
# Lets create a rolling correlation function.
def RollCorr(data, period):
    '''
    input: dictionary containing dataframes.
    output: dictionary. Key=keys of data, value=pd.DataFrame, value columns=calendar years, 
        value index=int in range(1,len of original data). This func 
        removes the datetime index type.
    '''
    request = {}
    for stock in data:
        request[stock] = data[stock].rolling(window=period)\
                                    .corr().dropna()
    return request
    

In [9]:
rollCorr_20 = my_helpers.RollCorr(byStockAndYear, period=20) 
rollCorr_40 = my_helpers.RollCorr(byStockAndYear, period=40)
rollCorr_60 = my_helpers.RollCorr(byStockAndYear, period=60)

<h4><ol start="3"><li>Lets filter the data and look at days and years where:</li><br>
    <ol><li>Corr > +-0.75 and with at least 8 events occuring</li>
        <li>Corr > +-0.65 and with at least 9 events occuring</li>
    </ol></ol>
</h4>

In [10]:
seasonCorrResultA_20 = my_helpers.SeasonCorrTest(rollCorr_20,\
                                         dropNum=10, n=0.75)
seasonCorrResultA_40 = my_helpers.SeasonCorrTest(rollCorr_40,
                                         dropNum=10, n=0.75)
seasonCorrResultA_60 = my_helpers.SeasonCorrTest(rollCorr_60,
                                         dropNum=10, n=0.75)

In [11]:
seasonCorrResultA_60['AAPL']

2005  173  2006    0.812490
           2009    0.927869
           2011    0.762949
           2012    0.810348
           2013    0.896194
           2014    0.791790
           2015   -0.751911
           2016    0.873541
           2017    0.913998
           2018    0.869016
2009  172  2005    0.931417
           2006    0.849248
           2011    0.785422
           2012    0.752693
           2013    0.855479
           2014    0.785153
           2015   -0.795852
           2016    0.905492
           2017    0.917769
           2018    0.837323
      173  2005    0.927869
           2006    0.853042
           2011    0.780289
           2012    0.752202
           2013    0.865379
           2014    0.776452
           2015   -0.798902
           2016    0.904944
           2017    0.916796
           2018    0.843626
      174  2005    0.922144
           2006    0.862209
           2011    0.768763
           2012    0.751047
           2013    0.871872
           2014    0

In [12]:
resultSeasonCorrA_20 = my_helpers.HighCorrDays(seasonCorrResultA_20)
resultSeasonCorrA_40 = my_helpers.HighCorrDays(seasonCorrResultA_40)
resultSeasonCorrA_60 = my_helpers.HighCorrDays(seasonCorrResultA_60)

In [15]:
for stock, value in resultSeasonCorrA_20.items():
    print(stock, f'Num of Days: {len(value)}', end='|')
    #print(value)
print()
for stock, value in resultSeasonCorrA_40.items():
    print(stock, f'Num of Days: {len(value)}', end='|')
    #print(value)
print()
for stock, value in resultSeasonCorrA_60.items():
    print(stock, f'Num of Days: {len(value)}', end='|')

MSFT Num of Days: 0|AAPL Num of Days: 2|AMZN Num of Days: 0|JPM Num of Days: 0|GOOG Num of Days: 0|GOOGL Num of Days: 0|XOM Num of Days: 1|
MSFT Num of Days: 2|AAPL Num of Days: 0|AMZN Num of Days: 0|JPM Num of Days: 0|GOOG Num of Days: 18|GOOGL Num of Days: 16|XOM Num of Days: 0|
MSFT Num of Days: 7|AAPL Num of Days: 3|AMZN Num of Days: 0|JPM Num of Days: 0|GOOG Num of Days: 35|GOOGL Num of Days: 35|XOM Num of Days: 0|

In [None]:
'''
request = {}
for letter in ['a','b','c','d','e']:
    requestvalue={}
    for item in list(range(1,6)):
        value={}
        #value['Day']= item
        value['AvgReturn']=item+10
        value['Details']=item+100
        requestvalue[f'Day {item}'] = value
    request[letter] = requestvalue
for k, v in request.items():
    print(k)
    print(v.keys())
    print(v.values())
'''

In [13]:
def PctReturnForDays(data, pxData, periods):
    '''
    Purpose: to extract 2 items from days that had high correlation.
             1: avg return for the period that generated a high corr.
             2: return details by calendar year for period of high corr.
    input: data = output from func SeasonCorrTest,
           pxData=price data from which to pull the %returns,
                  preferably from output of func byStockAndYear.
           periods=rolling time frame used in data.
    return: 3 level dictionary with the average return for the rolling
                period and all the percent returns by year for the period.
            level 1 key = 'ticker'
            level 2 key = 'DayN' where N=int() of the day analyzed
            level 3 key = 2 keys: key1='AvgReturn', key2='ReturnDetails'
    '''
    request = {}
    for stock, data in data.items():
        requestValue = {}
        for day in data:
            dataValue = {}
            # px at the day at which the high correlation occured
            end = pxData[stock].loc[day]
            # px N days prior to end day
            start = pxData[stock].loc[(day-periods)]
            pctChange = (end-start) / start
            dataValue['AvgReturn'] = round(pctChange.mean()*100, 2)
            dataValue['ReturnDetails'] = round(pctChange*100, 2)
            requestValue[f'Day{day}'] = dataValue
        request[stock] = requestValue
    return request

In [16]:
pctReturnOfCorr = PctReturnForDays(resultSeasonCorrA_60, byStockAndYear, periods=60)

In [None]:
# Quick summary of the num of days with high correlation
for stock, values in pctReturnOfCorr.items():
    print(stock, len(values.keys()))


In [None]:
for day, details in pctReturnOfCorr['AAPL'].items():
    print(day)
    print('details:',type(details))
    print(details.keys())
    for a,b in details['ReturnDetails'].items():
        print('a:',a, type(a))
        print('b:', type(b))
        print(b)


In [None]:
'''
Now lets try to create a summary for each time frame
'''

In [None]:
_no_value = object()
update = False
request = {}
for stock, days in pctReturnOfCorr.items():
    if update == True:
        print(stock, end='|')
    requestValue = {}
    for day, details in days.items():
        value = {}
        #print(day)
        data = details['ReturnDetails']
        posTest = data > 0 
        daysPos = data[posTest].count()
        daysNeg = data.count() - daysPos
        value['TotalTrades'] = data.count()
        value['NumPos'] = daysPos
        value['NumNeg'] = daysNeg
        value['AvgReturnOnPos'] = data[posTest].mean()
        value['AvgReturnOnNeg'] = data[data < 0].mean()
        requestValue[day] = value
    request[stock] = requestValue

In [18]:
def ExecSummaryCorr(data, printupdate=False):
    '''
    input: data = returned item from func PctReturnForDays
    output: 3 level dictionary
        level 1 keys = ticker
        level 1 value = dict
        level 2 keys = 'DayN' where the day with results
        level 2 value = dict
        level 3 keys = 'TotalTrades', 'NumPos', 'NumNeg',
                       'AvgReturnOnPos', 'AvgReturnOnNeg'
        level 3 value = results
    kwargs: printupdate = will print 'load' status
    '''
    request = {}
    status = 0
    outOf = len(data.keys())
    for stock, days in data.items():
        if printupdate == True:
            print(f'{status}/{outOf}', end=' | ')
            status += 1
        requestValue = {}
        for day, details in days.items():
            value = {}
            data = details['ReturnDetails']
            posTest = data > 0
            daysPos = data[posTest].count()
            daysNeg = data.count() - daysPos
            value['TotalTrades'] = data.count()
            value['NumPos'] = daysPos
            value['NumNeg'] = daysNeg
            value['AvgReturnOnPos'] = round(data[posTest].mean(),2)
            value['AvgReturnOnNeg'] = round(data[data<0].mean(),2)
            requestValue[day] = value
        request[stock] = requestValue
    if printupdate == True:
        print()
    return request
        
        

In [19]:
a = ExecSummaryCorr(pctReturnOfCorr, printupdate =True)
print()
a

0/7 | 1/7 | 2/7 | 3/7 | 4/7 | 5/7 | 6/7 | 



{'MSFT': {'Day112': {'TotalTrades': 14,
   'NumPos': 8,
   'NumNeg': 6,
   'AvgReturnOnPos': 12.65,
   'AvgReturnOnNeg': -10.21},
  'Day113': {'TotalTrades': 14,
   'NumPos': 8,
   'NumNeg': 6,
   'AvgReturnOnPos': 12.89,
   'AvgReturnOnNeg': -9.07},
  'Day115': {'TotalTrades': 14,
   'NumPos': 8,
   'NumNeg': 6,
   'AvgReturnOnPos': 11.62,
   'AvgReturnOnNeg': -8.18},
  'Day116': {'TotalTrades': 14,
   'NumPos': 8,
   'NumNeg': 6,
   'AvgReturnOnPos': 12.56,
   'AvgReturnOnNeg': -7.83},
  'Day117': {'TotalTrades': 14,
   'NumPos': 8,
   'NumNeg': 6,
   'AvgReturnOnPos': 13.07,
   'AvgReturnOnNeg': -8.21},
  'Day244': {'TotalTrades': 14,
   'NumPos': 11,
   'NumNeg': 3,
   'AvgReturnOnPos': 11.92,
   'AvgReturnOnNeg': -14.12},
  'Day245': {'TotalTrades': 14,
   'NumPos': 11,
   'NumNeg': 3,
   'AvgReturnOnPos': 12.8,
   'AvgReturnOnNeg': -15.31}},
 'AAPL': {'Day172': {'TotalTrades': 14,
   'NumPos': 12,
   'NumNeg': 2,
   'AvgReturnOnPos': 16.36,
   'AvgReturnOnNeg': -11.55},
  'Day173

In [None]:
request['AAPL']