# Set Up

## Import

In [1]:
import pandas as pd  
from datetime import timedelta  

### Read in PE/PS Data

In [2]:
pe_ps = pd.read_csv('forward_ps_pe.csv', parse_dates=['date'])

### Read in SPX Data

In [3]:
spx = pd.read_csv('SPX Data.csv', parse_dates=['Date'])
spx = spx[['Date', 'PX_LAST', 'BEST_PX_SALES_RATIO', 'BEST_PE_RATIO']]
spx.rename(columns={'Date' : 'date', 'PX_LAST' : 'spx_price', 
                    'BEST_PX_SALES_RATIO' : 'spx_ps', 'BEST_PE_RATIO' : 'spx_pe'}, inplace=True)
spx.head()

Unnamed: 0,date,spx_price,spx_ps,spx_pe
0,2019-08-02,2932.05,2.1136,17.677
1,2019-08-01,2953.56,2.1285,17.8006
2,2019-07-31,2980.38,2.147,17.9832
3,2019-07-30,3013.18,2.171,18.1859
4,2019-07-29,3020.97,2.1742,18.2308


In [17]:
# Check if there is any date_gap
spx_test = spx.copy()
spx_test = spx_test.sort_values('date').reset_index(drop=True)
spx_test['next_date'] = spx_test['date'].shift(-1)
spx_test['date_gap'] = spx_test.apply(lambda x:(x['next_date'] - x['date']).days, axis=1)

In [19]:
spx_test[(spx_test['date_gap'].notnull()) & (spx_test['date_gap'] > 3)].sort_values('date_gap', ascending=False).head(10)

Unnamed: 0,date,spx_price,spx_ps,spx_pe,next_date,date_gap
2140,2012-10-26,1411.94,1.2978,13.5479,2012-10-31,5.0
672,2006-12-29,1418.3,1.5539,16.3008,2007-01-03,5.0
20,2004-05-28,1120.68,1.5715,17.1858,2004-06-01,4.0
2465,2014-02-14,1838.63,1.5987,15.6149,2014-02-18,4.0
2812,2015-07-02,2076.78,1.8378,17.5411,2015-07-06,4.0
2784,2015-05-22,2126.06,1.8847,17.9873,2015-05-26,4.0
2749,2015-04-02,2066.96,1.8016,17.4884,2015-04-06,4.0
2716,2015-02-13,2096.99,1.8323,17.6526,2015-02-17,4.0
2697,2015-01-16,2019.42,1.7344,16.6241,2015-01-20,4.0
2601,2014-08-29,2003.37,1.7197,16.7694,2014-09-02,4.0


## Merge the spx data with pe/ps data

In [4]:
# try to merge the spx and pe/ps data first
with_spx = pe_ps.merge(spx, how='left', on=['date'])

In [5]:
with_spx.isnull().sum()

date              0
ticker            0
forward_ps        0
forward_pe        0
spx_price     23420
spx_ps        23420
spx_pe        23420
dtype: int64

In [6]:
# check the list of the date with null values
holiday_list = with_spx[with_spx.spx_price.isnull()].date.unique()

In [7]:
pe_ps_corr = pe_ps.copy()

In [9]:
"""
Based on the data, the p/e and p/s are predicted data of each Friday but some of these Fridays are holidays 
and do not have the corrseponding spx and price data
So we change the date of these holidays to one day before which would have the corresponding spx and price data
""" 

# Change the holiday dates to one day before and then merge it with the spx data
holiday_index = pe_ps_corr[pe_ps_corr.date.isin(holiday_list)].index
for index in holiday_index:
    pe_ps_corr.loc[index, 'date'] = pe_ps_corr.loc[index,'date'] - timedelta(days=1)

In [10]:
# Merge the adjusted pe_ps data with the spx date again
with_spx_new = pe_ps_corr.merge(spx, how='left', on=['date'])

In [11]:
with_spx_new[with_spx_new.spx_ps.isnull()].date.unique()

array(['2019-08-08T00:00:00.000000000'], dtype='datetime64[ns]')

In [25]:
with_spx_new_test = with_spx_new.copy()
with_spx_new_test = with_spx_new_test.sort_values(['ticker','date']).reset_index(drop=True)
with_spx_new_test['next_date'] = with_spx_new_test.groupby('ticker')['date'].shift(-1)
with_spx_new_test['date_gap'] = with_spx_new_test.apply(lambda x:(x['next_date'] - x['date']).days, axis=1)

In [26]:
with_spx_new_test.date_gap.unique()

array([ 7.,  6.,  8., nan])

In [27]:
with_spx_new_test[with_spx_new_test.date_gap == 6].date.nunique()

24

In [28]:
with_spx_new_test[with_spx_new_test.date_gap == 8].date.nunique()

23

In [12]:
# After check that the 2019-08-08 do not have the latest spx data so drop the records of 2019-08-08
with_spx_final = with_spx_new[with_spx_new.date != '2019-08-08']

In [13]:
# Check if there is still any null values (None)
with_spx_final.isnull().sum()

date          0
ticker        0
forward_ps    0
forward_pe    0
spx_price     0
spx_ps        0
spx_pe        0
dtype: int64

In [20]:
with_spx_final_test = with_spx_final.copy()
with_spx_final_test = with_spx_final_test.sort_values(['ticker','date']).reset_index(drop=True)
with_spx_final_test['next_date'] = with_spx_final_test.groupby('ticker')['date'].shift(-1)
with_spx_final_test['date_gap'] = with_spx_final_test.apply(lambda x:(x['next_date'] - x['date']).days, axis=1)

In [21]:
with_spx_final_test.date_gap.unique()

array([ 7.,  6.,  8., nan])

In [62]:
holiday_list[::-1]

array(['2004-12-24T00:00:00.000000000', '2005-03-25T00:00:00.000000000',
       '2006-04-14T00:00:00.000000000', '2007-04-06T00:00:00.000000000',
       '2008-03-21T00:00:00.000000000', '2008-07-04T00:00:00.000000000',
       '2009-04-10T00:00:00.000000000', '2009-07-03T00:00:00.000000000',
       '2009-12-25T00:00:00.000000000', '2010-01-01T00:00:00.000000000',
       '2010-04-02T00:00:00.000000000', '2010-12-24T00:00:00.000000000',
       '2011-04-22T00:00:00.000000000', '2012-04-06T00:00:00.000000000',
       '2013-03-29T00:00:00.000000000', '2014-04-18T00:00:00.000000000',
       '2014-07-04T00:00:00.000000000', '2015-04-03T00:00:00.000000000',
       '2015-07-03T00:00:00.000000000', '2015-12-25T00:00:00.000000000',
       '2016-01-01T00:00:00.000000000', '2016-03-25T00:00:00.000000000',
       '2017-04-14T00:00:00.000000000', '2018-03-30T00:00:00.000000000',
       '2019-04-19T00:00:00.000000000', '2019-08-09T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [63]:
with_spx_final_test[with_spx_final_test.date_gap == 8].date.unique()

array(['2004-12-23T00:00:00.000000000', '2005-03-24T00:00:00.000000000',
       '2006-04-13T00:00:00.000000000', '2007-04-05T00:00:00.000000000',
       '2008-03-20T00:00:00.000000000', '2008-07-03T00:00:00.000000000',
       '2009-04-09T00:00:00.000000000', '2009-07-02T00:00:00.000000000',
       '2009-12-31T00:00:00.000000000', '2010-04-01T00:00:00.000000000',
       '2010-12-23T00:00:00.000000000', '2011-04-21T00:00:00.000000000',
       '2012-04-05T00:00:00.000000000', '2013-03-28T00:00:00.000000000',
       '2014-04-17T00:00:00.000000000', '2014-07-03T00:00:00.000000000',
       '2015-04-02T00:00:00.000000000', '2015-07-02T00:00:00.000000000',
       '2015-12-31T00:00:00.000000000', '2016-03-24T00:00:00.000000000',
       '2017-04-13T00:00:00.000000000', '2018-03-29T00:00:00.000000000',
       '2019-04-18T00:00:00.000000000'], dtype='datetime64[ns]')

In [23]:
with_spx_final_test[with_spx_final_test.date_gap == 8].date.nunique(

23

In [None]:
with_spx_final_test[(with_spx_final_test['date_gap'].notnull()) & (with_spx_finaltest['date_gap'] != 7)].sort_values('date_gap', ascending=False).head(10)

In [15]:
# Check if there is any date in the holiday_list(None)
with_spx_final[with_spx_final.ticker.isin(holiday_list)]

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe


# Calculation

In [166]:
with_spx_final = with_spx_final.sort_values(['ticker', 'date']).reset_index(drop=True)

In [167]:
with_spx_final.loc[:,'adjusted_spx_price'] = with_spx_final.loc[:,'spx_price']/10
# with_spx_final.loc[:,'relative_price'] = with_spx_final.loc[:,'spx_price']/with_spx_final.loc[:,'adjusted_spx_price']

In [172]:
with_spx_final.loc[26,:]

date                  2005-02-04 00:00:00
ticker                                  A
forward_ps                           1.48
forward_pe                          17.69
spx_price                         1203.03
spx_ps                             1.6043
spx_pe                            16.2632
adjusted_spx_price                120.303
Name: 26, dtype: object

In [184]:
#ticker_list = with_spx_final.ticker.unique()
# calculation_price = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = with_spx_final.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[25,'26W EMA Price'] = 0
    #for n in range(26, len(sub_data)):
        #sub_data.loc[n,'26W EMA Price'] = (sub_data.loc[n,'26W EMA Price']- sub_data.loc[n-1,'26W EMA Price'])*2/27 + sub_data.loc[n-1,'26W EMA Price']
        #calculation_price = calculation_price.append(sub_data, index=False)

In [202]:
test = with_spx_final[with_spx_final.ticker == 'A']

In [251]:
test.head()

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price,26W Ann Rate of Chage
0,2004-08-06,A,1.35,15.26,1063.97,1.4408,15.9835,106.397,
1,2004-08-13,A,1.27,14.23,1064.8,1.443,16.0194,106.48,
2,2004-08-20,A,1.37,15.32,1098.35,1.4881,16.4891,109.835,
3,2004-08-27,A,1.33,14.84,1107.77,1.4989,16.6189,110.777,
4,2004-09-03,A,1.25,13.89,1113.63,1.505,16.7029,111.363,


In [247]:
test.loc[(2-1),:]

date                      2004-08-13 00:00:00
ticker                                      A
forward_ps                               1.27
forward_pe                              14.23
spx_price                              1064.8
spx_ps                                  1.443
spx_pe                                16.0194
adjusted_spx_price                     106.48
26W Ann Rate of Chage                     NaN
Name: 1, dtype: object

In [238]:
n = 38
for n in range(38, len(test)):
    test.loc[n,'26W Ann Rate of Chage '] = (test.loc[n,'spx_price']/test.loc[(n-26),'spx_price'] -1 )/ 2
    n = n+1

In [200]:
# test.loc[:,'12W SMA P/E'] = test.spx_price.rolling(window=13).mean()

In [177]:
# calculation_price2 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'12W SMA P/E'] = sub_data.forward_pe.rolling(window=13).mean()
    #calculation_price2 = calculation_price2.append(sub_data, index=False)

array(['A', 'AA', 'AAN', ..., 'ZTS', 'ZUMZ', 'ZUO'], dtype=object)

In [None]:
# calculation_price3 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price2.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(38, len(sub_data)):
        #sub_data.loc[n,'26W Ann Rate of Change'] = (sub_data.loc[n,'spx_price']/sub_data.loc[(n-26),'spx_price'] -1 )/ 2
        #calculation_price3 = calculation_price3.append(sub_data, index=False)

In [None]:
# calculation_price4 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price3.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[51, '13W EMA Ann Rate of Change'] = np.mean(sub_data.loc[38:51,'26W Ann Rate of Chage']
    #for n in range(52, len(sub_data)):
        # sub_data.loc[n,'13W EMA Ann Rate of Change'] = (sub_data.loc[n,'26W Ann Rate of Chage']-
                                                        #sub_data.loc[n-1,'13W EMA Ann Rate of Change'])*2/14 + sub_data.loc[(n-1),'13W EMA Ann Rate of Change']
        #calculation_price4 = calculation_price4.append(sub_data, index=False)

In [None]:
# calculation_price5 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price4.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'8W SMA p/s'] = sub_data.forward_ps.rolling(window=9).mean()
    #calculation_price5 = calculation_price5.append(sub_data, index=False)

In [None]:
# calculation_price6 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price5.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(34, len(sub_data)):
        #sub_data.loc[n, '26W Ann Rate Change P/S'] = ((sub_data.loc[n,'8W SMA p/s']/sub_data.loc[(n-26),'8W SMA p/s'])-1)*2
        #calculation_price6 = calculation_price6.append(sub_data, index=False)

In [None]:
# calculation_price7 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price6.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[47, '13W EMA P/S ROC'] = np.mean(sub_data.loc[34:47,'26W Ann Rate Change P/S']
    #for n in range(48, len(sub_data)):
        #sub_data.loc[n,'13W EMA P/S ROC'] = (sub_data.loc[n,'26W Ann Rate Change P/S']-
                                                        #sub_data.loc[n-1,'13W EMA P/S ROC'])*2/14 + sub_data.loc[(n-1),'13W EMA P/S ROC']
        #calculation_price7 = calculation_price7.append(sub_data, index=False)

In [None]:
# calculation_price8 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price7.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'8W SMA Price'] = sub_data.relative_price.rolling(window=9).mean()
    #calculation_price8 = calculation_price8.append(sub_data, index=False)

In [None]:
# # calculation_price9 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price8.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(34, len(sub_data)):
        #sub_data.loc[n, '26W Ann Rate Change Price'] = ((sub_data.loc[n,'8W SMA Price']/sub_data.loc[(n-26),'8W SMA Price'])-1)*2
        #calculation_price9 = calculation_price9.append(sub_data, index=False)

In [None]:
# calculation_price10 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price9.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[47, '13W EMA Price ROC'] = np.mean(sub_data.loc[34:47,'26W Ann Rate Change Price']
    #for n in range(48, len(sub_data)):
        #sub_data.loc[n,'13W EMA Price ROC'] = (sub_data.loc[n,'26W Ann Rate Change Price']-
                                                        #sub_data.loc[n-1,'13W EMA Price ROC'])*2/14 + sub_data.loc[(n-1),'13W EMA Price ROC']
        #calculation_price7 = calculation_price10.append(sub_data, index=False)

In [None]:
# calculation_price11 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price10.query("ticker == @ticker").copy().reset_index(drop=True)
    # sub_data.loc[12,'12W EMA Relative Price'] = 0
    #for n in range(13, len(sub_data)):
        #sub_data.loc[n,'12W EMA Relative Price'] = (sub_data.loc[n,'relative_price'] - 
                                                    #sub_data.loc[n-1,'12W EMA Relative Price'])*2/13 + sub_data.loc[n-1,'12W EMA Relative Price']
        #calculation_price8 = calculation_price11.append(sub_data, index=False)

In [None]:
# calculation_price12 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price11.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(26, len(sub_data)):
        #sub_data.loc[n,'MACD'] = sub_data.loc[n,'12W EMA Relative Price'] - sub_data.loc[n,'26W EMA Price']
        #calculation_price9 = calculation_price12.append(sub_data, index=False)

In [248]:
# calculation_price13 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price12.query("ticker == @ticker").copy().reset_index(drop=True)
    # sub_data.loc[35,'9W SMA MACD'] = np.mean(sub_data.loc[26:35,'MACD'])
    #for n in range(36, len(sub_data)):
        #sub_data.loc[n,'9W SMA MACD'] = (sub_data.loc[n,'MACD'] - 
                                                    #sub_data.loc[n-1,'9W SMA MACD'])*2/10 + sub_data.loc[n-1,'9W SMA MACD']
        #calculation_price13 = calculation_price13.append(sub_data, index=False)

In [249]:
# Some signals Calculation
#calculation_unfinish = calculation_price13.copy()

In [None]:
calculation_unfinish[‘P/S -.5’] = calculation_unfinish[].apply(lambda x:if x<-0.4999 1,0)

## Read in Price Data

In [3]:
price = pd.read_csv('daily_price.csv', parse_dates=['date']).sort_values(["date", "ticker"]).reset_index(drop=True)
price = price[['ticker', 'date', 'adj_close']]
price.head()

Unnamed: 0,ticker,date,adj_close
0,A,2004-05-07,17.670214
1,AABA,2004-05-07,26.4
2,AAME,2004-05-07,2.642746
3,AAN,2004-05-07,10.597434
4,AAON,2004-05-07,3.215521


In [4]:
price.head()

Unnamed: 0,ticker,date,adj_close
0,A,2004-05-07,17.670214
1,AABA,2004-05-07,26.4
2,AAME,2004-05-07,2.642746
3,AAN,2004-05-07,10.597434
4,AAON,2004-05-07,3.215521


In [5]:
with_price = pe_ps.merge(price, how='left', on=['ticker', 'date'])

In [6]:
with_price.ticker.nunique()

1550

In [9]:
with_price.isnull().sum()

date              0
ticker            0
forward_ps        0
forward_pe        0
adj_close     33162
dtype: int64

In [14]:
len(with_price)

699618

In [21]:
len(with_price[with_price.adj_close.isnull()].date.unique())

784

In [29]:
dates = with_price[with_price.adj_close.isnull()].date.unique()
mon_day = [str(x)[5:10] for x in dates]

In [31]:
len(mon_day)

784

In [15]:
with_price[(with_price.adj_close.isnull()) & (with_price.ticker == 'AAN')]

Unnamed: 0,date,ticker,forward_ps,forward_pe,adj_close
0,2019-08-09,AAN,1.04,15.16,
16,2019-04-19,AAN,0.88,13.53,
71,2018-03-30,AAN,0.85,13.13,
121,2017-04-14,AAN,0.67,12.79,
176,2016-03-25,AAN,0.51,9.93,
188,2016-01-01,AAN,0.47,9.61,
189,2015-12-25,AAN,0.48,9.86,
214,2015-07-03,AAN,0.77,15.45,
227,2015-04-03,AAN,0.62,13.17,
266,2014-07-04,AAN,0.85,15.42,


In [13]:
with_price[with_price.adj_close.isnull()].ticker.nunique()

1547

In [7]:
# Check if there is any date_gap
with_price_test = with_price.copy()
with_price_test = with_price_test.sort_values(['ticker' ,'date']).reset_index(drop=True)
with_price_test['next_date'] = with_price_test.groupby('ticker')['date'].shift(-1)
with_price_test['date_gap'] = with_price_test.apply(lambda x:(x['next_date'] - x['date']).days, axis=1)

In [19]:
with_price_test[(with_price_test['date_gap'].notnull()) & (with_price_test['date_gap'] != 7)].sort_values('date_gap', ascending=False).head()

Unnamed: 0,date,ticker,forward_ps,forward_pe,adj_close,next_date,date_gap
