# Set Up

## Import

In [1]:
import pandas as pd 
from datetime import timedelta  

### Read in PE/PS Data

In [2]:
pe_ps = pd.read_csv('forward_ps_pe.csv', parse_dates=['date'])

### Read in SPX Data

In [3]:
spx = pd.read_csv('SPX Data.csv', parse_dates=['Date'])
spx = spx[['Date', 'PX_LAST', 'BEST_PX_SALES_RATIO', 'BEST_PE_RATIO']]
spx.rename(columns={'Date': 'date', 'PX_LAST': 'spx_price', 
                    'BEST_PX_SALES_RATIO': 'spx_ps', 'BEST_PE_RATIO': 'spx_pe'}, inplace=True)
spx = spx.sort_values("date").reset_index(drop=True)
spx.head()

Unnamed: 0,date,spx_price,spx_ps,spx_pe
0,2004-04-30,1107.31,1.5678,17.185
1,2004-05-03,1117.49,1.5808,17.3119
2,2004-05-04,1119.55,1.5848,17.3311
3,2004-05-05,1121.53,1.5831,17.358
4,2004-05-06,1113.99,1.5712,17.2442


## Merge the spx data with pe/ps data

In [4]:
with_spx = pe_ps.merge(spx, how='left', on=['date'])
with_spx.isnull().sum()

date              0
ticker            0
forward_ps        0
forward_pe        0
spx_price     23420
spx_ps        23420
spx_pe        23420
dtype: int64

In [5]:
holiday_list = with_spx[with_spx.spx_price.isnull()].date.unique()

In [6]:
pe_ps_corr = pe_ps.copy()

In [7]:
"""
Based on the data, the p/e and p/s are predicted data 
of each Friday but some of these Fridays are holidays 
and do not have the corrseponding spx and price data
So we change the date of these holidays to one day before 
which would have the corresponding spx and price data
""" 
# Change the holiday dates to one day before and then merge it with the spx data
holiday_index = pe_ps_corr[pe_ps_corr.date.isin(holiday_list)].index
for index in holiday_index:
    pe_ps_corr.loc[index, 'date'] = pe_ps_corr.loc[index, 'date'] - timedelta(days=1)

In [8]:
# Merge the adjusted pe_ps data with the spx date again
with_spx_new = pe_ps_corr.merge(spx, how='left', on=['date'])

In [9]:
with_spx_new[with_spx_new.spx_ps.isnull()].date.unique()

array(['2019-08-08T00:00:00.000000000'], dtype='datetime64[ns]')

In [10]:
# After check that the 2019-08-08 do not have the latest spx data so drop the records of 2019-08-08
with_spx_final = with_spx_new[with_spx_new.date != '2019-08-08']

In [11]:
# Check if there is still any null values (None)
with_spx_final.isnull().sum()

date          0
ticker        0
forward_ps    0
forward_pe    0
spx_price     0
spx_ps        0
spx_pe        0
dtype: int64

# Calculation

In [None]:
with_spx_final = with_spx_final.sort_values(['ticker', 'date']).reset_index(drop=True)

In [None]:
with_spx_final.loc[:,'adjusted_spx_price'] = with_spx_final.loc[:,'spx_price']/10
# with_spx_final.loc[:,'relative_price'] = with_spx_final.loc[:,'spx_price']/with_spx_final.loc[:,'adjusted_spx_price']

In [None]:
with_spx_final.loc[26,:]

In [None]:
#ticker_list = with_spx_final.ticker.unique()
# calculation_price = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = with_spx_final.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[25,'26W EMA Price'] = 0
    #for n in range(26, len(sub_data)):
        #sub_data.loc[n,'26W EMA Price'] = (sub_data.loc[n,'26W EMA Price']- sub_data.loc[n-1,'26W EMA Price'])*2/27 + sub_data.loc[n-1,'26W EMA Price']
        #calculation_price = calculation_price.append(sub_data, index=False)

In [None]:
test = with_spx_final[with_spx_final.ticker == 'A']

In [None]:
test.head()

In [None]:
test.loc[(2-1),:]

In [None]:
n = 38
for n in range(38, len(test)):
    test.loc[n,'26W Ann Rate of Chage '] = (test.loc[n,'spx_price']/test.loc[(n-26),'spx_price'] -1 )/ 2
    n = n+1

In [None]:
# test.loc[:,'12W SMA P/E'] = test.spx_price.rolling(window=13).mean()

In [None]:
# calculation_price2 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'12W SMA P/E'] = sub_data.forward_pe.rolling(window=13).mean()
    #calculation_price2 = calculation_price2.append(sub_data, index=False)

In [None]:
# calculation_price3 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price2.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(38, len(sub_data)):
        #sub_data.loc[n,'26W Ann Rate of Change'] = (sub_data.loc[n,'spx_price']/sub_data.loc[(n-26),'spx_price'] -1 )/ 2
        #calculation_price3 = calculation_price3.append(sub_data, index=False)

In [None]:
# calculation_price4 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price3.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[51, '13W EMA Ann Rate of Change'] = np.mean(sub_data.loc[38:51,'26W Ann Rate of Chage']
    #for n in range(52, len(sub_data)):
        # sub_data.loc[n,'13W EMA Ann Rate of Change'] = (sub_data.loc[n,'26W Ann Rate of Chage']-
                                                        #sub_data.loc[n-1,'13W EMA Ann Rate of Change'])*2/14 + sub_data.loc[(n-1),'13W EMA Ann Rate of Change']
        #calculation_price4 = calculation_price4.append(sub_data, index=False)

In [None]:
# calculation_price5 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price4.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'8W SMA p/s'] = sub_data.forward_ps.rolling(window=9).mean()
    #calculation_price5 = calculation_price5.append(sub_data, index=False)

In [None]:
# calculation_price6 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price5.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(34, len(sub_data)):
        #sub_data.loc[n, '26W Ann Rate Change P/S'] = ((sub_data.loc[n,'8W SMA p/s']/sub_data.loc[(n-26),'8W SMA p/s'])-1)*2
        #calculation_price6 = calculation_price6.append(sub_data, index=False)

In [None]:
# calculation_price7 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price6.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[47, '13W EMA P/S ROC'] = np.mean(sub_data.loc[34:47,'26W Ann Rate Change P/S']
    #for n in range(48, len(sub_data)):
        #sub_data.loc[n,'13W EMA P/S ROC'] = (sub_data.loc[n,'26W Ann Rate Change P/S']-
                                                        #sub_data.loc[n-1,'13W EMA P/S ROC'])*2/14 + sub_data.loc[(n-1),'13W EMA P/S ROC']
        #calculation_price7 = calculation_price7.append(sub_data, index=False)

In [None]:
# calculation_price8 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price7.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'8W SMA Price'] = sub_data.relative_price.rolling(window=9).mean()
    #calculation_price8 = calculation_price8.append(sub_data, index=False)

In [None]:
# # calculation_price9 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price8.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(34, len(sub_data)):
        #sub_data.loc[n, '26W Ann Rate Change Price'] = ((sub_data.loc[n,'8W SMA Price']/sub_data.loc[(n-26),'8W SMA Price'])-1)*2
        #calculation_price9 = calculation_price9.append(sub_data, index=False)

In [None]:
# calculation_price10 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price9.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[47, '13W EMA Price ROC'] = np.mean(sub_data.loc[34:47,'26W Ann Rate Change Price']
    #for n in range(48, len(sub_data)):
        #sub_data.loc[n,'13W EMA Price ROC'] = (sub_data.loc[n,'26W Ann Rate Change Price']-
                                                        #sub_data.loc[n-1,'13W EMA Price ROC'])*2/14 + sub_data.loc[(n-1),'13W EMA Price ROC']
        #calculation_price7 = calculation_price10.append(sub_data, index=False)

In [None]:
# calculation_price11 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price10.query("ticker == @ticker").copy().reset_index(drop=True)
    # sub_data.loc[12,'12W EMA Relative Price'] = 0
    #for n in range(13, len(sub_data)):
        #sub_data.loc[n,'12W EMA Relative Price'] = (sub_data.loc[n,'relative_price'] - 
                                                    #sub_data.loc[n-1,'12W EMA Relative Price'])*2/13 + sub_data.loc[n-1,'12W EMA Relative Price']
        #calculation_price8 = calculation_price11.append(sub_data, index=False)

In [None]:
# calculation_price12 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price11.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(26, len(sub_data)):
        #sub_data.loc[n,'MACD'] = sub_data.loc[n,'12W EMA Relative Price'] - sub_data.loc[n,'26W EMA Price']
        #calculation_price9 = calculation_price12.append(sub_data, index=False)

In [None]:
# calculation_price13 = pd.Dataframe()
#for ticker in ticker_list:
    #sub_data = calculation_price12.query("ticker == @ticker").copy().reset_index(drop=True)
    # sub_data.loc[35,'9W SMA MACD'] = np.mean(sub_data.loc[26:35,'MACD'])
    #for n in range(36, len(sub_data)):
        #sub_data.loc[n,'9W SMA MACD'] = (sub_data.loc[n,'MACD'] - 
                                                    #sub_data.loc[n-1,'9W SMA MACD'])*2/10 + sub_data.loc[n-1,'9W SMA MACD']
        #calculation_price13 = calculation_price13.append(sub_data, index=False)

In [None]:
# Some signals Calculation
#calculation_unfinish = calculation_price13.copy()

In [None]:
calculation_unfinish[‘P/S -.5’] = calculation_unfinish[].apply(lambda x:if x<-0.4999 1,0)

## Read in Price Data

In [None]:
price = pd.read_csv('daily_price.csv', parse_dates=['date']).sort_values(["date", "ticker"]).reset_index(drop=True)
price = price[['ticker', 'date', 'adj_close']]
price.head()

In [None]:
price.head()

In [None]:
with_price = pe_ps.merge(price, how='left', on=['ticker', 'date'])

In [None]:
with_price.ticker.nunique()

In [None]:
with_price.isnull().sum()

In [None]:
len(with_price)

In [None]:
len(with_price[with_price.adj_close.isnull()].date.unique())

In [None]:
dates = with_price[with_price.adj_close.isnull()].date.unique()
mon_day = [str(x)[5:10] for x in dates]

In [None]:
len(mon_day)

In [None]:
with_price[(with_price.adj_close.isnull()) & (with_price.ticker == 'AAN')]

In [None]:
with_price[with_price.adj_close.isnull()].ticker.nunique()

In [None]:
# Check if there is any date_gap
with_price_test = with_price.copy()
with_price_test = with_price_test.sort_values(['ticker' ,'date']).reset_index(drop=True)
with_price_test['next_date'] = with_price_test.groupby('ticker')['date'].shift(-1)
with_price_test['date_gap'] = with_price_test.apply(lambda x:(x['next_date'] - x['date']).days, axis=1)

In [None]:
with_price_test[(with_price_test['date_gap'].notnull()) & (with_price_test['date_gap'] != 7)].sort_values('date_gap', ascending=False).head()