# Set Up

## Import

In [1]:
import pandas as pd 
from datetime import timedelta  
import numpy as np

### Read in PE/PS Data

In [2]:
pe_ps = pd.read_csv('forward_ps_pe.csv', parse_dates=['date'])

### Read in SPX Data

In [3]:
spx = pd.read_csv('SPX Data.csv', parse_dates=['Date'])
spx = spx[['Date', 'PX_LAST', 'BEST_PX_SALES_RATIO', 'BEST_PE_RATIO']]
spx.rename(columns={'Date': 'date', 'PX_LAST': 'spx_price', 
                    'BEST_PX_SALES_RATIO': 'spx_ps', 'BEST_PE_RATIO': 'spx_pe'}, inplace=True)
spx = spx.sort_values("date").reset_index(drop=True)
spx.head()

Unnamed: 0,date,spx_price,spx_ps,spx_pe
0,2004-04-30,1107.31,1.5678,17.185
1,2004-05-03,1117.49,1.5808,17.3119
2,2004-05-04,1119.55,1.5848,17.3311
3,2004-05-05,1121.53,1.5831,17.358
4,2004-05-06,1113.99,1.5712,17.2442


## Merge the spx data with pe/ps data

In [4]:
with_spx = pe_ps.merge(spx, how='left', on=['date'])
with_spx.isnull().sum()

date              0
ticker            0
forward_ps        0
forward_pe        0
spx_price     23420
spx_ps        23420
spx_pe        23420
dtype: int64

In [5]:
holiday_list = with_spx[with_spx.spx_price.isnull()].date.unique()

In [6]:
pe_ps_corr = pe_ps.copy()

In [7]:
"""
Based on the data, the p/e and p/s are predicted data of each Friday but some of these Fridays are holidays 
and do not have the corrseponding spx and price data. So we change the date of these holidays to one day before 
which would have the corresponding spx and price data
""" 

# Change the holiday dates to one day before and then merge it with the spx data
holiday_index = pe_ps_corr[pe_ps_corr.date.isin(holiday_list)].index
for index in holiday_index:
    pe_ps_corr.loc[index, 'date'] = pe_ps_corr.loc[index, 'date'] - timedelta(days=1)

In [8]:
# Merge the adjusted pe_ps data with the spx date again
with_spx_new = pe_ps_corr.merge(spx, how='left', on=['date'])

In [9]:
with_spx_new[with_spx_new.spx_ps.isnull()].date.unique()

array(['2019-08-08T00:00:00.000000000'], dtype='datetime64[ns]')

In [10]:
# After check that the 2019-08-08 do not have the latest spx data so drop the records of 2019-08-08
with_spx_final = with_spx_new[with_spx_new.date != '2019-08-08']

In [11]:
# Check if there is still any null values (None)
with_spx_final.isnull().sum()

date          0
ticker        0
forward_ps    0
forward_pe    0
spx_price     0
spx_ps        0
spx_pe        0
dtype: int64

# Calculation

In [13]:
with_spx_final = with_spx_final.sort_values(['ticker', 'date']).reset_index(drop=True)

In [14]:
"""
The 'adjusted_spx_price' Column and the 'relative_price' Column
"""

with_spx_final['adjusted_spx_price'] = with_spx_final['spx_price'] / 10
# with_spx_final['relative_price'] = with_spx_final['Equity'] / with_spx_final['adjusted_spx_price']

In [15]:
# The ticker list of all 1550 tickers need relative price
ticker_list = with_spx_final.ticker.unique()

In [16]:
with_spx_final.ticker.unique()

array(['A', 'AA', 'AAN', ..., 'ZTS', 'ZUMZ', 'ZUO'], dtype=object)

In [17]:
"""
Create a new test dataframe to do the check of the accurancy of the code
"""

# A subdata used to test the functions
test = with_spx_final[(with_spx_final.ticker == 'A') | (with_spx_final.ticker == 'AA')]

In [18]:
test.head()

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price
0,2004-08-06,A,1.35,15.26,1063.97,1.4408,15.9835,106.397
1,2004-08-13,A,1.27,14.23,1064.8,1.443,16.0194,106.48
2,2004-08-20,A,1.37,15.32,1098.35,1.4881,16.4891,109.835
3,2004-08-27,A,1.33,14.84,1107.77,1.4989,16.6189,110.777
4,2004-09-03,A,1.25,13.89,1113.63,1.505,16.7029,111.363


In [20]:
test_list = test.ticker.unique()
calculation_price = pd.DataFrame()
for ticker in test_list:
    sub_data = test.query("ticker == @ticker").copy().reset_index(drop=True)
    for n in range(38, len(sub_data)):
        sub_data.loc[n,'26W Ann Rate of Chage'] = (sub_data.loc[n,'spx_price']/sub_data.loc[(n-26),'spx_price'] -1 )/ 2
    calculation_price = calculation_price.append(sub_data, ignore_index=True)

In [23]:
sub_data

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price,26W Ann Rate of Chage
0,2016-11-04,AA,0.46,88.36,2085.18,1.7989,17.5368,208.518,
1,2016-11-11,AA,0.54,99.90,2164.45,1.8668,18.2019,216.445,
2,2016-11-18,AA,0.57,99.90,2181.90,1.8845,18.3330,218.190,
3,2016-11-25,AA,0.60,99.90,2213.35,1.9114,18.6008,221.335,
4,2016-12-02,AA,0.57,99.90,2191.95,1.8944,18.4122,219.195,
5,2016-12-09,AA,0.58,91.87,2259.53,1.9540,18.9763,225.953,
6,2016-12-16,AA,0.51,66.80,2258.07,1.9517,19.0324,225.807,
7,2016-12-23,AA,0.53,39.02,2263.79,1.9595,19.0984,226.379,
8,2016-12-30,AA,0.50,35.75,2238.83,1.9379,18.8895,223.883,
9,2017-01-06,AA,0.55,39.36,2276.98,1.8812,17.5122,227.698,


In [22]:
test_list

array(['A', 'AA'], dtype=object)

In [21]:
sub_data.head()

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price,26W Ann Rate of Chage
0,2016-11-04,AA,0.46,88.36,2085.18,1.7989,17.5368,208.518,
1,2016-11-11,AA,0.54,99.9,2164.45,1.8668,18.2019,216.445,
2,2016-11-18,AA,0.57,99.9,2181.9,1.8845,18.333,218.19,
3,2016-11-25,AA,0.6,99.9,2213.35,1.9114,18.6008,221.335,
4,2016-12-02,AA,0.57,99.9,2191.95,1.8944,18.4122,219.195,


In [46]:
calculation_price[calculation_price.ticker == 'AA'].head(39)

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price,26W Ann Rate of Chage
783,2016-11-04,AA,0.46,88.36,2085.18,1.7989,17.5368,208.518,
784,2016-11-11,AA,0.54,99.9,2164.45,1.8668,18.2019,216.445,
785,2016-11-18,AA,0.57,99.9,2181.9,1.8845,18.333,218.19,
786,2016-11-25,AA,0.6,99.9,2213.35,1.9114,18.6008,221.335,
787,2016-12-02,AA,0.57,99.9,2191.95,1.8944,18.4122,219.195,
788,2016-12-09,AA,0.58,91.87,2259.53,1.954,18.9763,225.953,
789,2016-12-16,AA,0.51,66.8,2258.07,1.9517,19.0324,225.807,
790,2016-12-23,AA,0.53,39.02,2263.79,1.9595,19.0984,226.379,
791,2016-12-30,AA,0.5,35.75,2238.83,1.9379,18.8895,223.883,
792,2017-01-06,AA,0.55,39.36,2276.98,1.8812,17.5122,227.698,


In [18]:
test.loc[:,'12W SMA P/E'] = test.spx_price.rolling(window=2).mean()

In [19]:
"""
The '26W EMA Price' Column(a little question about the start index, can refer to the word document)
"""

#calculation_price = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = with_spx_final.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[25,'26W EMA Price'] = 0
    #for n in range(26, len(sub_data)):
        #sub_data.loc[n,'26W EMA Price'] = (sub_data.loc[n,'relative_price'] - sub_data.loc[n-1,'26W EMA Price'])* 2 /27 + sub_data.loc[n-1,'26W EMA Price']
        #calculation_price = calculation_price.append(sub_data, ignore_index=True)

"\nThe '26W EMA Price' Column\n"

In [20]:
"""
The '12W SMA P/E' Column
"""

#calculation_price2 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'12W SMA P/E'] = sub_data.forward_pe.rolling(window=13).mean()
    #calculation_price2 = calculation_price2.append(sub_data, ignore_index=True)

"\nThe '12W SMA P/E' Column\n"

In [21]:
"""
The '26W Ann Rate of Change' Column
"""

# calculation_price3 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price2.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(38, len(sub_data)):
        #sub_data.loc[n,'26W Ann Rate of Change'] = (sub_data.loc[n,'12W SMA P/E']/sub_data.loc[(n-26),'12W SMA P/E'] - 1) * 2
        #calculation_price3 = calculation_price3.append(sub_data, ignore_index=True)

"\nThe '26W Ann Rate of Change' Column\n"

In [22]:
"""
The '13W EMA Ann Rate of Change' Column
"""

#calculation_price4 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price3.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[51, '13W EMA Ann Rate of Change'] = np.mean(sub_data.loc[38:51, '26W Ann Rate of Chage']
    #for n in range(52, len(sub_data)):
        # sub_data.loc[n,'13W EMA Ann Rate of Change'] = (sub_data.loc[n,'26W Ann Rate of Chage']-
                                                        #sub_data.loc[n-1,'13W EMA Ann Rate of Change'])* 2 /14 + sub_data.loc[(n-1),'13W EMA Ann Rate of Change']
        #calculation_price4 = calculation_price4.append(sub_data, ignore_index=True)

"\nThe '13W EMA Ann Rate of Change' Column\n"

In [23]:
"""
The '8W SMA p/s' Column
"""

#calculation_price5 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price4.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'8W SMA p/s'] = sub_data.forward_ps.rolling(window=9).mean()
    #calculation_price5 = calculation_price5.append(sub_data, ignore_index=True)

"\nThe '8W SMA p/s' Column\n"

In [24]:
"""
The '26W Ann Rate Change P/S' Column
"""

#calculation_price6 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price5.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(34, len(sub_data)):
        #sub_data.loc[n, '26W Ann Rate Change P/S'] = ((sub_data.loc[n,'8W SMA p/s']/sub_data.loc[(n-26),'8W SMA p/s'])- 1) * 2
        #calculation_price6 = calculation_price6.append(sub_data, ignore_index=True)

"\nThe '26W Ann Rate Change P/S' Column\n"

In [25]:
"""
The '13W EMA P/S ROC' Column
"""

#calculation_price7 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price6.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[47, '13W EMA P/S ROC'] = np.mean(sub_data.loc[34:47,'26W Ann Rate Change P/S']
    #for n in range(48, len(sub_data)):
        #sub_data.loc[n,'13W EMA P/S ROC'] = (sub_data.loc[n,'26W Ann Rate Change P/S']-
                                                        #sub_data.loc[n-1,'13W EMA P/S ROC'])*2/14 + sub_data.loc[(n-1),'13W EMA P/S ROC']
        #calculation_price7 = calculation_price7.append(sub_data, ignore_index=True)

"\nThe '13W EMA P/S ROC' Column\n"

In [26]:
"""
The '8W SMA Price' Column  
""" 
 
#calculation_price8 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price7.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[:,'8W SMA Price'] = sub_data.relative_price.rolling(window=9).mean()
    #calculation_price8 = calculation_price8.append(sub_data, ignore_index=True) 

"\nThe '8W SMA Price' Column\n"

In [27]:
"""  
The '26W Ann Rate Change Price' Column
"""

#calculation_price9 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price8.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(34, len(sub_data)):
        #sub_data.loc[n, '26W Ann Rate Change Price'] = ((sub_data.loc[n,'8W SMA Price']/sub_data.loc[(n-26),'8W SMA Price'])-1) * 2
        #calculation_price9 = calculation_price9.append(sub_data, ignore_index=True)

"\nThe '26W Ann Rate Change Price' Column\n"

In [28]:
"""
The '13W EMA Price ROC' Column
"""

#calculation_price10 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price9.query("ticker == @ticker").copy().reset_index(drop=True)
    #sub_data.loc[47, '13W EMA Price ROC'] = np.mean(sub_data.loc[34:47,'26W Ann Rate Change Price']
    #for n in range(48, len(sub_data)):
        #sub_data.loc[n,'13W EMA Price ROC'] = (sub_data.loc[n,'26W Ann Rate Change Price']-
                                                        #sub_data.loc[n-1,'13W EMA Price ROC'])* 2/14 + sub_data.loc[(n-1),'13W EMA Price ROC']
        #calculation_price10 = calculation_price10.append(sub_data, ignore_index=True)

"\nThe '13W EMA Price ROC' Column\n"

In [29]:
"""
The '12W EMA Relative Price' Column
"""

#calculation_price11 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price10.query("ticker == @ticker").copy().reset_index(drop=True)
    # sub_data.loc[12,'12W EMA Relative Price'] = 0
    #for n in range(13, len(sub_data)):
        #sub_data.loc[n,'12W EMA Relative Price'] = (sub_data.loc[n,'relative_price'] - 
                                                    #sub_data.loc[n-1,'12W EMA Relative Price'])*2/13 + sub_data.loc[n-1,'12W EMA Relative Price']
        #calculation_price11 = calculation_price11.append(sub_data, ignore_index=True)

"\nThe '12W EMA Relative Price' Column\n"

In [30]:
"""
The 'MACD' Column
"""

#calculation_price12 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price11.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(26, len(sub_data)):
        #sub_data.loc[n,'MACD'] = sub_data.loc[n,'12W EMA Relative Price'] - sub_data.loc[n,'26W EMA Price']
        #calculation_price12 = calculation_price12.append(sub_data, ignore_index=True)

"\nThe 'MACD' Column(a little question about the start index)\n"

In [31]:
"""
The '9W SMA MACD' Column
"""

#calculation_price13 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price12.query("ticker == @ticker").copy().reset_index(drop=True)
    # sub_data.loc[35,'9W SMA MACD'] = np.mean(sub_data.loc[26:35,'MACD'])
    #for n in range(36, len(sub_data)):
        #sub_data.loc[n,'9W SMA MACD'] = (sub_data.loc[n,'MACD'] - 
                                                    #sub_data.loc[n-1,'9W SMA MACD'])* 2/10 + sub_data.loc[n-1,'9W SMA MACD']
        #calculation_price13 = calculation_price13.append(sub_data, ignore_index=True)

"\nThe '9W SMA MACD' Column\n"

In [33]:
for n in range(47, len(test)):
    if (test.loc[n, 'spx_ps'] - test.loc[n-1, 'spx_ps'])>0:
        test.loc[n, 'Slope +' ] = 1
    else:
        test.loc[n, 'Slope +' ] = 0

In [34]:
test.head()

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price,26W Ann Rate of Chage,12W SMA P/E,Slope +
0,2004-08-06,A,1.35,15.26,1063.97,1.4408,15.9835,106.397,,,
1,2004-08-13,A,1.27,14.23,1064.8,1.443,16.0194,106.48,,1064.385,
2,2004-08-20,A,1.37,15.32,1098.35,1.4881,16.4891,109.835,,1081.575,
3,2004-08-27,A,1.33,14.84,1107.77,1.4989,16.6189,110.777,,1103.06,
4,2004-09-03,A,1.25,13.89,1113.63,1.505,16.7029,111.363,,1110.7,


In [35]:
test.loc[:,'P/S -.5'] = test['spx_ps'].apply(lambda x: 1 if x >1.5 else 0)

In [36]:
test.loc[:,'difference'] = test['12W SMA P/E'] - test['spx_pe']
test.loc[:,'P/S Above MA'] = test.loc[:, 'difference'].apply(lambda x: 1 if x > 0 else 0)
idx = test[test['difference'].isnull()].index
for i in idx:
    test.loc[i,'P/S Above MA'] = np.nan
# test = test.drop(['difference'], axis =1)

In [48]:
test.head()

Unnamed: 0,date,ticker,forward_ps,forward_pe,spx_price,spx_ps,spx_pe,adjusted_spx_price
0,2004-08-06,A,1.35,15.26,1063.97,1.4408,15.9835,106.397
1,2004-08-13,A,1.27,14.23,1064.8,1.443,16.0194,106.48
2,2004-08-20,A,1.37,15.32,1098.35,1.4881,16.4891,109.835
3,2004-08-27,A,1.33,14.84,1107.77,1.4989,16.6189,110.777
4,2004-09-03,A,1.25,13.89,1113.63,1.505,16.7029,111.363


In [32]:
# Some signals Calculation

In [38]:
"""
The 'P/S -.5' Column
"""

#calculation_price13['P/S -.5'] = calculation_price13['26W Ann Rate Change P/S'].apply(lambda x:1 if x<-0.4999 else 0)
#idx = calculation_price13[calculation_price13['26W Ann Rate Change P/S'].isnull()].index
#for i in idx:
    # calculation_price14.loc[i,'P/S -.5'] = np.nan

"\nThe 'P/S -.5' Column\n"

In [39]:
"""
The 'Slope +' Column(a little question about the start index, can refer to the word document)
"""

#calculation_price14 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price13.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(47, len(sub_data)):
        #if sub_data.loc[n,'26W Ann Rate Change P/S'] - sub_data.loc[n-1,'26W Ann Rate Change P/S'] > 0:
            #sub_data.loc[n, 'Slope +' ] = 1
        #else:
            #sub_data.loc[n, 'Slope +' ] = 0
    #calculation_price14 = calculation_price14.append(sub_data, ignore_index=True)

"\nThe 'Slope +'' Column(a little question about the start index)\n"

In [40]:
"""
The 'P/S Above MA?'' Column
"""

#calculation_price14['difference'] = cacluation_price14['26W Ann Rate Change P/S'] - cacluation_price14['13W EMA P/S ROC']
#calculation_price14['P/S Above MA?'] = calculation_price14.apply(lambda x:1 if calculation_price14['difference'] > 0 else 0)
#idx = calculation_price14[calculation_price14['13W EMA P/S ROC'].isnull()].index
#for i in idx:
    #calculation_price14.loc[i,'P/S Above MA'] = np.nan
#calculation_price14 = calculation_price14.drop(['difference'], axis =1)

"\nThe 'P/S Above MA?'' Column\n"

In [41]:
"""
The 'MACD Above MA?'  Column(a little question about the start index, can refer to the word document)
"""

#calculation_price14['difference'] = cacluation_price14['MACD'] - cacluation_price14['9W SMA MACD']
#calculation_price14['MACD Above MA?'] = calculation_price14.apply(lambda x:1 if calculation_price14['difference'] > 0 else 0)
#idx = calculation_price14[calculation_price14['9W SMA MACD'].isnull()].index
#for i in idx:
    #calculation_price14.loc[i,'P/S Above MA'] = np.nan
#calculation_price14 = calculation_price14.drop(['difference'], axis =1)

"\nThe 'MACD Above MA?''  Column(a little question about the start index)\n"

In [42]:
"""
The 'P/E -.5'  Column(a little question about the start index, can refer to the word document)
"""

#calculation_price14['P/E -.5'] = calculation_price14['26W Ann Rate of Change'].apply(lambda x:1 if x<-0.4999 else 0)
#idx = calculation_price14[calculation_price14['26W Ann Rate of Change'].isnull()].index
#for i in idx:
    # calculation_price14.loc[i,'P/E -.5'] = np.nan

"\nThe 'P/E -.5'  Column\n"

In [43]:
"""
The 'Slope.2 +' Column(a little question about the start index, can refer to the word document)
"""

#calculation_price15 = pd.DataFrame()
#for ticker in ticker_list:
    #sub_data = calculation_price14.query("ticker == @ticker").copy().reset_index(drop=True)
    #for n in range(38, len(sub_data)):
        #if sub_data.loc[n,'26W Ann Rate of Change'] - sub_data.loc[n-1,'26W Ann Rate of Change'] > 0:
            #sub_data.loc[n, 'Slope.2 +' ] = 1
        #else:
            #sub_data.loc[n, 'Slope.2 +' ] = 0
    #calculation_price15 = calculation_price15.append(sub_data, ignore_index=True)

"\nThe 'Slope.2 +'' Column(a little question about the start index)\n"

In [44]:
"""
The 'P/E Above MA?' Column(a little question about the start index, can refer to the word document)
"""

#calculation_price15['difference'] = cacluation_price15['26W Ann Rate of Change'] - cacluation_price15['13W EMA Ann Rate of Change']
#calculation_price15['P/E Above MA?'] = calculation_price15.apply(lambda x:1 if calculation_price15['difference'] > 0 else 0)
#idx = calculation_price15[calculation_price15['13W EMA Ann Rate of Change'].isnull()].index
#for i in idx:
    #calculation_price15.loc[i,'P/E Above MA'] = np.nan
#calculation_price15 = calculation_price15.drop(['difference'], axis =1)

"\nThe 'P/E Above MA?' Column\n"

In [45]:
"""
The 'Price Momentum Above MA?' Column
"""

#calculation_price15['difference'] = cacluation_price15['26W Ann Rate Change Price'] - cacluation_price15['13W EMA Price ROC']
#calculation_price15['The Price Momentum Above MA?'] = calculation_price15.apply(lambda x:1 if calculation_price15['difference'] > 0 else 0)
#idx = calculation_price15[calculation_price15['13W EMA Price ROC'].isnull()].index
#for i in idx:
    #calculation_price15.loc[i,'The Price Momentum Above MA?'] = np.nan
#calculation_price15 = calculation_price15.drop(['difference'], axis =1)

"\nThe 'Price Momentum Above MA?'' Column\n"

## Read in Price Data

In [None]:
price = pd.read_csv('daily_price.csv', parse_dates=['date']).sort_values(["date", "ticker"]).reset_index(drop=True)
price = price[['ticker', 'date', 'adj_close']]
price.head()

In [None]:
price.head()

In [None]:
with_price = pe_ps.merge(price, how='left', on=['ticker', 'date'])

In [None]:
with_price.ticker.nunique()

In [None]:
with_price.isnull().sum()

In [None]:
len(with_price)

In [None]:
len(with_price[with_price.adj_close.isnull()].date.unique())

In [None]:
dates = with_price[with_price.adj_close.isnull()].date.unique()
mon_day = [str(x)[5:10] for x in dates]

In [None]:
len(mon_day)

In [None]:
with_price[(with_price.adj_close.isnull()) & (with_price.ticker == 'AAN')]

In [None]:
with_price[with_price.adj_close.isnull()].ticker.nunique()

In [None]:
# Check if there is any date_gap
with_price_test = with_price.copy()
with_price_test = with_price_test.sort_values(['ticker' ,'date']).reset_index(drop=True)
with_price_test['next_date'] = with_price_test.groupby('ticker')['date'].shift(-1)
with_price_test['date_gap'] = with_price_test.apply(lambda x:(x['next_date'] - x['date']).days, axis=1)

In [None]:
with_price_test[(with_price_test['date_gap'].notnull()) & (with_price_test['date_gap'] != 7)].sort_values('date_gap', ascending=False).head()