In [3]:
#!pip install pandas-datareader

In [7]:
import numpy as np
import pandas as pd
import pandas_datareader
import datetime as dt
import os

PATH = os.getcwd()

In [8]:
#Grab sentiments
#Big-cap daily sentiments
d_big_sent_all = pd.read_csv(os.path.join(PATH, 'dataSent12_21Good', 'USbig_Sent12_21.csv'))
d_big_sent_all['date'] = pd.to_datetime(d_big_sent_all['date']).dt.date
#Mid-cap
d_mid_sent_all = pd.read_csv(os.path.join(PATH, 'dataSent12_21Good', 'USmed_Sent12_21.csv'))
d_mid_sent_all['date'] = pd.to_datetime(d_mid_sent_all['date']).dt.date
#Small-caps
d_small_sent_all = pd.read_csv(os.path.join(PATH, 'dataSent12_21Good', 'USsmall_Sent12_21.csv'))
d_small_sent_all['date'] = pd.to_datetime(d_small_sent_all['date']).dt.date

## Sentiment Indicators

In [9]:
## The sentiment dataset in the 'dataSent12_21Good' folder is in long format, with each column being a different sentiment indicator.
# We create a table for each sentiment indicator, and pivot them to into familiar wide format:
# Each table will align all stocks by date and those which has no data for given date will be fill with NaN

#Make a table for each sentiment indicator
d_big_sent_tables = {}
for i in d_big_sent_all.columns[2:]:   
    d_big_sent_pivot = d_big_sent_all.pivot(index="date", columns="stock", values= i)
    d_big_sent_pivot.index = pd.to_datetime(d_big_sent_pivot.index)
    d_big_sent_tables['d_big_'+i] = d_big_sent_pivot
    d_big_sent_pivot.to_csv(os.path.join(PATH,'Tables','d_big_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder

print('List of tables created: ',d_big_sent_tables.keys())
print('RCV Table:')
d_big_sent_tables['d_big_RCV'].head()

List of tables created:  dict_keys(['d_big_RCV', 'd_big_RVT', 'd_big_positivePartscr', 'd_big_negativePartscr', 'd_big_splogscr', 'd_big_linscr'])
RCV Table:


stock,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,,,,,,,,,,,...,0.0,,,,,,,,,0.0
2012-01-03,,0.0,,0.0,0.0,,,,,,...,2.632,,,0.0,,0.0,0.0,0.0,,33.333
2012-01-04,0.0,41.667,,44.444,-14.286,0.0,0.0,0.0,0.0,0.0,...,47.692,0.0,0.0,30.0,0.0,22.222,0.0,37.5,0.0,43.478
2012-01-05,25.0,45.455,,0.0,33.333,38.889,13.333,-7.692,4.167,-9.091,...,38.571,0.0,25.0,57.333,38.462,49.383,51.852,48.148,-20.0,38.889
2012-01-06,46.666,45.395,,-33.333,-73.333,57.384,-60.0,-43.75,41.935,-35.714,...,26.25,33.335,-57.142,27.941,-28.571,14.706,36.765,54.412,,20.0


In [10]:
#Make a table for each sentiment indicator for medium sized companies
d_mid_sent_tables = {}
for i in d_mid_sent_all.columns[2:]:   
    d_mid_sent_pivot = d_mid_sent_all.pivot(index="date", columns="stock", values= i)
    d_mid_sent_pivot.index = pd.to_datetime(d_mid_sent_pivot.index)
    d_mid_sent_tables['d_big_'+i] = d_mid_sent_pivot
    d_mid_sent_pivot.to_csv(os.path.join(PATH,'Tables','d_mid_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder

#Make a table for each sentiment indicator
d_small_sent_tables = {}
for i in d_small_sent_all.columns[2:]:   
    d_small_sent_pivot = d_small_sent_all.pivot(index="date", columns="stock", values= i)
    d_small_sent_pivot.index = pd.to_datetime(d_small_sent_pivot.index)
    d_small_sent_tables['d_big_'+i] = d_small_sent_pivot
    d_small_sent_pivot.to_csv(os.path.join(PATH,'Tables','d_small_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder

In [11]:
def to_weekly_sent(dataframe, tolerance=2, operation = 'mean'):
    '''
    Function to aggregate sentiment data by week
    Can be modified to accept more or less NaN's in a week, default is 2.

    Returns a dataframe
    '''
    df = dataframe.copy()
    df['W_Date'] = pd.to_datetime(df.index)
    if operation == 'mean':
        df = df.groupby(pd.Grouper(key='W_Date', freq='W-SUN')).agg(lambda x: x.mean() if (x.isnull().sum() <= tolerance) else np.NaN).reset_index().sort_values('W_Date')
    elif operation == 'max':
        df = df.groupby(pd.Grouper(key='W_Date', freq='W-SUN')).agg(lambda x: x.max() if (x.isnull().sum() <= tolerance) else np.NaN).reset_index().sort_values('W_Date')
    elif operation == 'min':
        df = df.groupby(pd.Grouper(key='W_Date', freq='W-SUN')).agg(lambda x: x.min() if (x.isnull().sum() <= tolerance) else np.NaN).reset_index().sort_values('W_Date')
    df = pd.DataFrame(df.set_index('W_Date'))
    df.index.names = ['Date'] #Rename index
    return df

In [12]:
#Make each daily sentiment table weekly
big_sent_tables = {}
for i, d_t_name in enumerate(d_big_sent_tables): #Take the same columns as the daily table
    w_t_name = d_big_sent_all.columns[2:][i]
    w_sent = to_weekly_sent(d_big_sent_tables[d_t_name] , tolerance=2, operation = 'mean')
    big_sent_tables['big_'+ w_t_name] = w_sent
    w_sent.to_csv(os.path.join(PATH,'Tables','big_{}.csv'.format(w_t_name))) #Store in csv format in the 'Tables' folder

print('List of weekly tables created: ',big_sent_tables.keys())
print('RCV Table:')
big_sent_tables['big_RCV'].head()

List of weekly tables created:  dict_keys(['big_RCV', 'big_RVT', 'big_positivePartscr', 'big_negativePartscr', 'big_splogscr', 'big_linscr'])
RCV Table:


stock,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-08,8.476,13.162167,,0.404,-24.6072,,,,-3.37,,...,12.493714,,-9.7618,8.849167,,8.0314,7.3165,33.6786,,1.957143
2012-01-15,,11.965,,-4.760143,-3.706,,-11.584333,16.8346,-18.815571,-10.443167,...,12.935143,11.4355,-1.28,-17.410857,-33.0858,-10.427667,21.078333,15.6566,,-18.02
2012-01-22,9.9768,-10.776667,,-17.240429,-22.233833,,-10.8984,,15.001167,0.860833,...,2.058714,17.2168,-8.6162,-24.0268,,-31.776,-0.7585,-14.170714,,-19.522714
2012-01-29,-20.94,-9.334,,11.74,-8.1658,-42.6148,-22.226,,-7.894167,-33.329167,...,9.101,,22.8662,-52.884,,8.221,-20.2015,0.0368,,6.625571
2012-02-05,,-33.438857,,-2.313,-30.0204,-39.871,-18.9598,-30.327667,-3.668714,-20.110167,...,-5.529429,-36.037833,4.4648,-42.687167,14.249667,-47.767143,-7.9482,-45.108167,-28.786,8.348429


## Stock Price Data - Log Retruns and Lag variables

In [34]:
start_date = (d_big_sent_tables['d_big_RCV'].index[0]-dt.timedelta(days=31)).strftime('%Y-%m-%d') #Set Start date to 31 lag from 1st sentiment data to compute 1-month leading return as feature 
#Sentiment goes up until Wed,2021-12-01. Since we only have data for 2 days of that week, we will drop it. Thus, we need return data up until the end of that week Sun,2021-12-05 
end_date = (d_big_sent_tables['d_big_RCV'].index[-1]+dt.timedelta(days=4)).strftime('%Y-%m-%d') 

# User pandas_reader to load the desired data in a weekly format. As simple as that.
abbv = pandas_datareader.yahoo.daily.YahooDailyReader(['ABBV'], interval='d', start=start_date, end=end_date).read()['Adj Close'] 
abbv = pd.DataFrame(abbv)
#d_big_panel_data.to_csv(os.path.join(PATH,'Tables','d_big_prices.csv')) #Store in csv format in the 'Tables' folder
abbv

Symbols,ABBV
Date,Unnamed: 1_level_1
2013-01-02,23.783297
2013-01-03,23.58691
2013-01-04,23.288944
2013-01-07,23.336355
2013-01-08,22.828442


In [35]:
abbv

Symbols,ABBV
Date,Unnamed: 1_level_1
2013-01-02,23.783297
2013-01-03,23.586910
2013-01-04,23.288944
2013-01-07,23.336355
2013-01-08,22.828442
...,...
2021-11-29,114.714546
2021-11-30,113.134506
2021-12-01,113.752792
2021-12-02,114.694923


In [41]:
# Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
big_tickers = d_big_sent_tables['d_big_RCV'].columns

# We would like all available data from 2012/01/02 until 2021/12/01.
start_date = (d_big_sent_tables['d_big_RCV'].index[0]-dt.timedelta(days=31)).strftime('%Y-%m-%d') #Set Start date to 31 lag from 1st sentiment data to compute 1-month leading return as feature 
#Sentiment goes up until Wed,2021-12-01. Since we only have data for 2 days of that week, we will drop it. Thus, we need return data up until the end of that week Sun,2021-12-05 
end_date = (d_big_sent_tables['d_big_RCV'].index[-1]+dt.timedelta(days=4)).strftime('%Y-%m-%d') 

# User pandas_reader to load the desired data in a weekly format. As simple as that.
d_big_panel_data = pandas_datareader.yahoo.daily.YahooDailyReader(big_tickers, interval='d', start=start_date, end=end_date).read()['Adj Close'] 
d_big_panel_data = pd.DataFrame(d_big_panel_data)
#d_big_panel_data.to_csv(os.path.join(PATH,'Tables','d_big_prices.csv')) #Store in csv format in the 'Tables' folder
d_big_panel_data.head()

Symbols,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-12-02,4.562868,11.900019,,31.348175,21.009624,35.238888,19.103489,5.65,196.029999,41.508991,...,19.26,40.957939,52.573318,19.492741,22.572746,23.860762,19.417192,45.592926,11.392083,52.3913
2011-12-05,4.826837,12.001092,,31.009373,21.071701,35.78154,19.424904,5.76,196.240005,41.982338,...,19.84,41.130913,53.224277,19.659475,22.308006,23.986839,19.91621,45.789154,11.589082,52.824661
2011-12-06,4.949393,11.938188,,30.865381,21.160942,35.815876,19.82873,5.66,191.990005,41.793007,...,20.129999,41.147957,53.113342,19.864101,22.147774,24.157053,19.84918,46.134487,11.602669,53.067593
2011-12-07,5.269926,11.88139,,30.992435,21.223017,36.110046,20.158382,5.72,195.320007,42.309395,...,20.690001,42.042667,54.230328,20.015684,22.542559,24.150751,20.147104,46.209515,11.724944,53.238323
2011-12-08,5.03424,11.929333,,30.729855,21.021267,35.722443,19.045801,5.47,190.479996,41.147511,...,19.709999,41.037189,53.276043,19.439693,22.247633,23.835552,19.506567,45.790936,11.419252,52.443813


In [43]:
# Weekly panel data
big_panel_data = d_big_panel_data.copy()
big_panel_data['W_Date'] = pd.to_datetime(big_panel_data.index)
big_panel_data = big_panel_data.groupby(pd.Grouper(key='W_Date', freq='W-SUN', label= 'left', sort=True)).agg(lambda x: x[-1]).reset_index().sort_values('W_Date')
#Notice that label='left' showing the label of previous Sunday so that the returns have the same index as the Sentiment that will predict them
big_panel_data = pd.DataFrame(big_panel_data.set_index('W_Date'))
big_panel_data.index.names = ['Date'] #Rename index

#Log return:
big_log_ret = np.log(big_panel_data) - np.log(big_panel_data.shift(1))
big_log_ret.drop(index= big_log_ret.index[0], axis = 0, inplace=True) #Drop the 1st row as it's undefined
big_log_ret.to_csv(os.path.join(PATH,'Tables','big_log_ret.csv')) #Store in csv format in the 'Tables' folder

#Lagged log return:
#Lag 1:
big_log_ret.shift(1).to_csv(os.path.join(PATH,'Tables','big_lag1_log_ret.csv')) #Store in csv format in the 'Tables' folder
#Lag 4:
big_log_ret.shift(4).to_csv(os.path.join(PATH,'Tables','big_lag4_log_ret.csv')) #Store in csv format in the 'Tables' folder

#Lagged MONTHLY log return:
#Lag 1:
big_lag1_month_log_ret = big_log_ret.shift(1)+big_log_ret.shift(2)+big_log_ret.shift(3)+big_log_ret.shift(4)
big_lag1_month_log_ret.to_csv(os.path.join(PATH,'Tables','big_lag1_month_log_ret.csv')) #Store in csv format in the 'Tables' folder

big_log_ret.head(6)

Symbols,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-12-04,0.149417,0.010008,,-0.017444,0.007726,0.030345,0.039337,-0.019661,-0.015422,0.011749,...,0.068231,0.017386,0.019231,0.02192,-0.000103,0.015207,0.031712,0.010181,0.02706,0.01924
2011-12-11,0.094986,-0.032534,,0.010394,0.005847,-0.014394,-0.031601,-0.076889,-0.062913,-0.040139,...,0.029625,0.001839,-0.009569,-0.011092,0.002569,0.009067,-0.035171,-0.000857,-0.031243,-0.014613
2011-12-18,-0.094986,0.056903,,0.018067,0.020377,0.049039,0.035327,0.053143,-0.022202,0.022568,...,-0.067682,0.046845,0.02355,0.055726,0.050431,0.030475,0.067349,0.02909,0.084363,0.061211
2011-12-25,-0.102991,0.004132,,-0.006165,0.003742,-0.005908,-0.0422,-0.00185,-0.023861,-0.0164,...,-0.050631,-0.013134,-0.003818,-0.011548,-0.009313,0.003495,-0.008311,-0.003842,0.027146,-0.005412
2012-01-01,0.099426,0.042066,,0.036953,-0.006602,0.01598,0.014549,0.00554,0.053483,0.026779,...,-0.035603,0.040601,0.003955,0.025549,-0.008109,-0.032718,0.048859,-0.012799,0.018573,0.004238
2012-01-08,0.06398,-0.00615,,-0.020683,0.000864,0.001275,0.058173,0.041485,-0.023212,0.030401,...,0.012009,-0.001517,0.009211,0.045094,0.000298,0.015275,0.022887,0.009111,0.061181,-0.002824


In [45]:
big_panel_data.describe()

Symbols,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
count,523.0,523.0,466.0,523.0,523.0,523.0,523.0,523.0,523.0,523.0,...,523.0,523.0,523.0,523.0,523.0,523.0,523.0,523.0,523.0,523.0
mean,29.621758,45.819841,62.172579,75.770885,55.475852,100.001704,43.102142,23.584512,1218.356499,85.622379,...,53.682467,171.962315,97.87214,39.021908,105.412022,41.098041,39.550793,82.657379,24.275664,59.707245
std,13.323828,37.797341,23.960808,22.825159,29.545806,48.844464,9.880544,31.427245,1037.865112,30.603149,...,21.050069,109.521016,36.043033,9.703802,63.786527,9.212601,9.060521,29.459039,5.083398,8.558725
min,4.562868,11.63496,23.19615,30.69887,21.009624,35.238888,18.120354,1.67,173.100006,40.347111,...,17.780001,40.957939,52.573318,19.492741,22.570431,23.762419,19.350159,45.592926,11.344529,28.654726
25%,16.68,20.597323,43.078212,63.096085,33.733799,58.770519,37.35812,3.65,330.430008,64.766159,...,36.809999,71.654575,78.250076,32.789038,50.814415,34.413578,32.195522,61.351738,21.381495,57.45092
50%,31.548483,29.11076,56.255686,79.307777,41.128761,86.223244,45.967785,8.77,795.98999,78.830536,...,52.82,146.164902,90.655563,36.550842,79.161774,39.403206,42.303295,68.263275,24.077946,61.363258
75%,40.438002,50.908155,79.14897,88.5508,75.130054,137.869537,50.376778,29.27,1813.36499,100.464935,...,69.365002,246.822166,104.253983,46.580585,159.730553,50.700394,46.34333,101.099957,27.12883,65.341488
max,56.988728,161.396927,116.638062,126.305649,129.296783,235.679321,59.332623,155.410004,3719.340088,186.083435,...,96.699997,458.973022,212.720993,62.230099,247.840775,58.181309,58.148552,150.842651,37.384274,72.813065


## Ranking Tables

In [13]:
big_rankings = big_log_ret.rank(1, ascending=False, method='first')
#big_rankings.to_csv(os.path.join(PATH,'Tables','big_rankings.csv')) #Store in csv format in the 'Tables' folder

big_rankings.head()

Symbols,AAL,AAPL,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,BAC,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-12-04,1.0,58.0,86.0,60.0,17.0,12.0,89.0,84.0,50.0,47.0,...,3.0,38.0,32.0,26.0,71.0,43.0,16.0,57.0,18.0,31.0
2011-12-11,1.0,69.0,14.0,20.0,42.0,68.0,93.0,88.0,76.0,97.0,...,6.0,25.0,37.0,40.0,24.0,16.0,72.0,28.0,67.0,43.0
2011-12-18,103.0,17.0,82.0,81.0,32.0,53.0,23.0,99.0,76.0,7.0,...,100.0,35.0,73.0,19.0,29.0,59.0,10.0,62.0,5.0,16.0
2011-12-25,104.0,17.0,52.0,18.0,50.0,99.0,36.0,91.0,80.0,55.0,...,102.0,73.0,41.0,69.0,60.0,21.0,57.0,42.0,3.0,48.0
2012-01-01,4.0,25.0,34.0,78.0,52.0,55.0,62.0,17.0,40.0,3.0,...,95.0,27.0,66.0,43.0,81.0,94.0,20.0,84.0,47.0,65.0


## Same procedure for mid and small-cap

In [14]:
#Mid companies
mid_sent_tables = {}
mid_sent_nan_tables = {}
for i in mid_sent_all.columns[2:]:   
    mid_sent_pivot = mid_sent_all.pivot(index="date", columns="stock", values= i)
    mid_sent_pivot.index = pd.to_datetime(mid_sent_pivot.index)
    mid_sent_tables['mid_'+i] = mid_sent_pivot
    mid_sent_pivot.to_csv(os.path.join(PATH,'Tables','mid_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder
     
#Small companies
small_sent_tables = {}
small_sent_nan_tables = {}
for i in small_sent_all.columns[2:]:   
    small_sent_pivot = small_sent_all.pivot(index="date", columns="stock", values= i)
    small_sent_pivot.index = pd.to_datetime(small_sent_pivot.index)
    small_sent_tables['small_'+i] = small_sent_pivot
    small_sent_pivot.to_csv(os.path.join(PATH,'Tables','small_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder

NameError: name 'mid_sent_all' is not defined

In [None]:
mid_tickers = mid_sent_tables['mid_RCV'].columns
small_tickers = small_sent_tables['small_RCV'].columns

In [None]:
#Same procedure for mid and small
#Mid-caps
mid_tickers = mid_sent_tables['mid_RCV'].columns
start_date = mid_sent_tables['mid_RCV'].index[0].strftime('%Y-%m-%d')
end_date = mid_sent_tables['mid_RCV'].index[-1].strftime('%Y-%m-%d')
mid_panel_data = pandas_datareader.yahoo.daily.YahooDailyReader(mid_tickers, interval='w', start=start_date, end=end_date).read()['Adj Close'] 
mid_panel_data = pd.DataFrame(mid_panel_data)
#mid_panel_data.to_csv(os.path.join(PATH,'Tables','mid_prices.csv')) #Store in csv format in the 'Tables' folder

In [None]:
#Small-caps
small_tickers = small_sent_tables['small_RCV'].columns
start_date = small_sent_tables['small_RCV'].index[0].strftime('%Y-%m-%d')
end_date = small_sent_tables['small_RCV'].index[-1].strftime('%Y-%m-%d')
small_panel_data = pandas_datareader.yahoo.daily.YahooDailyReader(small_tickers, interval='w', start=start_date, end=end_date).read()['Adj Close'] 
small_panel_data = pd.DataFrame(small_panel_data)
#small_panel_data.to_csv(os.path.join(PATH,'Tables','small_prices.csv')) #Store in csv format in the 'Tables' folder

In [None]:
#Same for mid and small-caps
#Log return:
mid_log_ret = np.log(mid_panel_data) - np.log(mid_panel_data.shift(1))
#mid_log_ret.to_csv(os.path.join(PATH,'Tables','mid_log_ret.csv')) #Store in csv format in the 'Tables' folder
small_log_ret = np.log(small_panel_data) - np.log(small_panel_data.shift(1))
#small_log_ret.to_csv(os.path.join(PATH,'Tables','small_log_ret.csv')) #Store in csv format in the 'Tables' folder

#Percentage change:
#mid_pct_change = mid_panel_data.pct_change()
#mid_pct_changet.to_csv(os.path.join(PATH,'Tables','mid_pct_change.csv'))
#small_pct_change = small_panel_data.pct_change()
#small_pct_changet.to_csv(os.path.join(PATH,'Tables','small_pct_change.csv'))

In [None]:
#Same for Mid and Small-caps
mid_rankings = mid_log_ret.rank(1, ascending=False, method='first')
#mid_rankings.to_csv(os.path.join(PATH,'Tables','mid_rankings.csv')) #Store in csv format in the 'Tables' folder
small_rankings = small_log_ret.rank(1, ascending=False, method='first')
#small_rankings.to_csv(os.path.join(PATH,'Tables','small_rankings.csv')) #Store in csv format in the 'Tables' folder