In [3]:
#!pip install pandas-datareader

In [2]:
import numpy as np
import pandas as pd
import pandas_datareader
import datetime as dt
import os

PATH = os.getcwd()

In [3]:
#Grab sentiments
#Big-cap
big_sent_all = pd.read_csv(os.path.join(PATH, 'dataSent12_21Good', 'USbig_Sent12_21.csv'))
big_sent_all['date'] = pd.to_datetime(big_sent_all['date']).dt.date
#Mid-cap
mid_sent_all = pd.read_csv(os.path.join(PATH, 'dataSent12_21Good', 'USmed_Sent12_21.csv'))
mid_sent_all['date'] = pd.to_datetime(mid_sent_all['date']).dt.date
#Small-caps
small_sent_all = pd.read_csv(os.path.join(PATH, 'dataSent12_21Good', 'USsmall_Sent12_21.csv'))
small_sent_all['date'] = pd.to_datetime(small_sent_all['date']).dt.date

## Sentiment Indicators

In [4]:
## The sentiment dataset in the 'dataSent12_21Good' folder is in long format, with each column being a different sentiment indicator.
# We create a table for each sentiment indicator, and pivot them to into familiar wide format:
# Each table will align all stocks by date and those which has no data for given date will be fill with NaN

#Make a table for each sentiment indicator
big_sent_tables = {}
big_sent_nan_tables = {}
for i in big_sent_all.columns[2:]:   
    big_sent_pivot = big_sent_all.pivot(index="date", columns="stock", values= i)
    big_sent_pivot.index = pd.to_datetime(big_sent_pivot.index)
    big_sent_tables['big_'+i] = big_sent_pivot
    big_sent_pivot.to_csv(os.path.join(PATH,'Tables','big_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder

print('List of tables created: ',big_sent_tables.keys())
print('RCV Table:')
big_sent_tables['big_RCV'].head()

List of tables created:  dict_keys(['big_RCV', 'big_RVT', 'big_positivePartscr', 'big_negativePartscr', 'big_splogscr', 'big_linscr'])
RCV Table:


stock,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,,,,,,,,,,,...,0.0,,,,,,,,,0.0
2012-01-03,,0.0,,0.0,0.0,,,,,,...,2.632,,,0.0,,0.0,0.0,0.0,,33.333
2012-01-04,0.0,41.667,,44.444,-14.286,0.0,0.0,0.0,0.0,0.0,...,47.692,0.0,0.0,30.0,0.0,22.222,0.0,37.5,0.0,43.478
2012-01-05,25.0,45.455,,0.0,33.333,38.889,13.333,-7.692,4.167,-9.091,...,38.571,0.0,25.0,57.333,38.462,49.383,51.852,48.148,-20.0,38.889
2012-01-06,46.666,45.395,,-33.333,-73.333,57.384,-60.0,-43.75,41.935,-35.714,...,26.25,33.335,-57.142,27.941,-28.571,14.706,36.765,54.412,,20.0


In [5]:
#Same procedure for mid and small-cap

#Mid companies
mid_sent_tables = {}
mid_sent_nan_tables = {}
for i in mid_sent_all.columns[2:]:   
    mid_sent_pivot = mid_sent_all.pivot(index="date", columns="stock", values= i)
    mid_sent_pivot.index = pd.to_datetime(mid_sent_pivot.index)
    mid_sent_tables['mid_'+i] = mid_sent_pivot
    mid_sent_pivot.to_csv(os.path.join(PATH,'Tables','mid_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder
     
#Small companies
small_sent_tables = {}
small_sent_nan_tables = {}
for i in small_sent_all.columns[2:]:   
    small_sent_pivot = small_sent_all.pivot(index="date", columns="stock", values= i)
    small_sent_pivot.index = pd.to_datetime(small_sent_pivot.index)
    small_sent_tables['small_'+i] = small_sent_pivot
    small_sent_pivot.to_csv(os.path.join(PATH,'Tables','small_{}.csv'.format(i))) #Store in csv format in the 'Tables' folder

## Stock Price Data

In [6]:
big_tickers = big_sent_tables['big_RCV'].columns
mid_tickers = mid_sent_tables['mid_RCV'].columns
small_tickers = small_sent_tables['small_RCV'].columns

In [8]:
# Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
big_tickers = big_sent_tables['big_RCV'].columns.drop('ABBV') # ABBV returns weekly returns starting on Tuesday in  for some unknown reason

# We would like all available data from 2012/01/02 until 2021/12/01.
start_date = big_sent_tables['big_RCV'].index[0].strftime('%Y-%m-%d') 
end_date = big_sent_tables['big_RCV'].index[-1].strftime('%Y-%m-%d')

# User pandas_reader to load the desired data in a weekly format. As simple as that.
big_panel_data = pandas_datareader.yahoo.daily.YahooDailyReader(big_tickers, interval='w', start=start_date, end=end_date).read()['Adj Close'] 
big_panel_data = pd.DataFrame(big_panel_data)
#big_panel_data.to_csv(os.path.join(PATH,'Tables','big_prices.csv')) #Store in csv format in the 'Tables' folder
big_panel_data.head()

Symbols,AAL,AAPL,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,BAC,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,5.279352,12.972254,32.78595,21.759474,38.519989,19.50679,5.43,182.610001,41.661335,5.321596,...,18.209999,44.973885,54.356098,21.127947,23.387878,24.163364,21.554792,46.596504,12.920537,55.891048
2012-01-09,5.628166,12.892712,32.114777,21.591969,38.569134,20.675205,5.66,178.419998,43.107704,5.691868,...,18.43,44.905727,54.859089,22.102499,23.394844,24.854429,22.053812,47.022987,13.735711,55.733452
2012-01-16,6.005263,12.907758,34.060341,21.907923,39.860619,21.25527,6.42,190.929993,43.350266,6.087974,...,19.33,44.539322,55.791195,21.881701,23.357691,24.886356,22.746492,48.183941,14.027818,57.447227
2012-01-23,7.711625,13.736339,33.117283,21.617182,38.842865,20.923809,6.82,195.369995,43.185661,6.277415,...,23.09,43.47419,56.249825,21.211702,23.466835,23.762419,22.04637,47.947014,13.905539,56.357254
2012-01-30,9.088029,14.117155,33.329678,21.609324,38.962196,22.514845,7.08,187.679993,45.264812,6.75102,...,24.969999,43.721306,56.738041,22.231934,24.855572,24.164743,22.813524,48.98951,14.469367,55.759727


In [9]:
#Same procedure for mid and small
#Mid-caps
mid_tickers = mid_sent_tables['mid_RCV'].columns
start_date = mid_sent_tables['mid_RCV'].index[0].strftime('%Y-%m-%d')
end_date = mid_sent_tables['mid_RCV'].index[-1].strftime('%Y-%m-%d')
mid_panel_data = pandas_datareader.yahoo.daily.YahooDailyReader(mid_tickers, interval='w', start=start_date, end=end_date).read()['Adj Close'] 
mid_panel_data = pd.DataFrame(mid_panel_data)
#mid_panel_data.to_csv(os.path.join(PATH,'Tables','mid_prices.csv')) #Store in csv format in the 'Tables' folder



In [10]:
#Small-caps
small_tickers = small_sent_tables['small_RCV'].columns
start_date = small_sent_tables['small_RCV'].index[0].strftime('%Y-%m-%d')
end_date = small_sent_tables['small_RCV'].index[-1].strftime('%Y-%m-%d')
small_panel_data = pandas_datareader.yahoo.daily.YahooDailyReader(small_tickers, interval='w', start=start_date, end=end_date).read()['Adj Close'] 
small_panel_data = pd.DataFrame(small_panel_data)
#small_panel_data.to_csv(os.path.join(PATH,'Tables','small_prices.csv')) #Store in csv format in the 'Tables' folder



In [156]:
big_panel_data.describe()

Symbols,AAL,AAPL,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,BAC,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
count,518.0,518.0,518.0,518.0,518.0,518.0,518.0,518.0,518.0,518.0,...,518.0,518.0,518.0,518.0,518.0,518.0,518.0,518.0,518.0,518.0
mean,29.853003,46.20852,76.403275,55.778054,100.56758,43.311936,23.769633,1228.443396,86.017446,20.245517,...,54.004903,172.67198,98.232929,38.866198,106.222788,40.722022,39.71623,82.847009,24.377418,59.714999
std,13.165709,37.902158,22.562542,29.474035,48.64867,9.6428,31.566222,1038.071789,30.405967,9.44748,...,20.889458,108.886416,35.902708,9.485844,63.517024,8.99389,8.890773,29.13635,4.951926,8.584846
min,5.279352,12.119164,30.683222,21.506247,35.50515,18.120352,1.67,178.419998,41.543427,5.321594,...,17.780001,43.536598,52.966915,20.957052,23.405643,23.475946,20.970749,46.49416,12.920536,28.654728
25%,17.079329,21.081754,64.1609,33.962798,59.090282,38.240262,3.65,332.880005,65.31468,13.154113,...,37.3525,72.228413,78.439589,32.618833,51.127288,34.041476,32.530491,61.721056,21.400329,57.597895
50%,31.701316,29.332281,79.77747,41.31975,87.409885,45.946999,9.84,809.265015,78.896275,19.514085,...,53.095001,147.995499,90.94566,36.268644,79.447773,38.961329,42.324793,68.720882,24.13257,61.392267
75%,40.459659,51.459453,88.960617,75.740543,138.991318,50.422192,29.515001,1816.377502,100.601908,26.946706,...,69.429998,246.156433,104.375635,46.20542,159.927593,50.045856,46.344869,102.208635,27.132939,65.256172
max,56.988731,164.560349,126.690041,127.927086,235.679306,59.332623,155.410004,3719.340088,186.083435,47.319759,...,96.699997,457.618835,212.720993,61.726746,247.840775,57.480377,58.148563,150.26149,37.384274,72.81308


## Log Retruns

In [92]:
#Log return:
big_log_ret = np.log(big_panel_data) - np.log(big_panel_data.shift(1))
big_log_ret.index = big_log_ret.index.shift(-7, freq='D')
big_log_ret.drop(index= big_log_ret.index[0], axis = 0, inplace=True)
#big_log_ret.to_csv(os.path.join(PATH,'Tables','big_log_ret.csv')) #Store in csv format in the 'Tables' folder

#Percentage change:
#big_pct_change = big_panel_data.pct_change()
#big_pct_changet.to_csv(os.path.join(PATH,'Tables','big_pct_change.csv'))

big_log_ret.head()

Symbols,AAL,AAPL,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,BAC,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,0.06398,-0.006151,-0.020684,-0.007728,0.001275,0.058173,0.041485,-0.023212,0.034128,0.067265,...,0.012009,-0.001517,0.009211,0.045094,0.000298,0.028198,0.022887,0.009111,0.061181,-0.002824
2012-01-09,0.064853,0.001166,0.058817,0.014527,0.032937,0.02767,0.125994,0.067767,0.005611,0.067277,...,0.047679,-0.008193,0.016848,-0.01004,-0.001589,0.001284,0.030925,0.024389,0.021043,0.030286
2012-01-16,0.250093,0.062216,-0.028078,-0.01336,-0.025864,-0.015717,0.060441,0.022988,-0.003804,0.030643,...,0.177741,-0.024205,0.008187,-0.031098,0.004662,-0.046214,-0.031263,-0.004929,-0.008755,-0.019156
2012-01-23,0.164229,0.027346,0.006393,-0.000364,0.003067,0.073287,0.037414,-0.040157,0.047021,0.072735,...,0.078275,0.005668,0.008642,0.046977,0.057494,0.016789,0.034206,0.02151,0.039747,-0.010659
2012-01-30,-0.085504,0.07083,-0.016189,0.001998,-0.027394,-0.018949,-0.004246,-0.011468,-0.008457,0.028915,...,-0.050093,0.038426,-0.00013,-0.006528,0.062212,-0.003972,-0.008037,-0.002098,-0.061975,-0.013277


In [16]:
big_log_ret.index

DatetimeIndex(['2012-01-02', '2012-01-09', '2012-01-16', '2012-01-23',
               '2012-01-30', '2012-02-06', '2012-02-13', '2012-02-20',
               '2012-02-27', '2012-03-05',
               ...
               '2021-09-27', '2021-10-04', '2021-10-11', '2021-10-18',
               '2021-10-25', '2021-11-01', '2021-11-08', '2021-11-15',
               '2021-11-22', '2021-11-29'],
              dtype='datetime64[ns]', name='Date', length=518, freq=None)

In [121]:
def big_mean_sentiment(sentiment:str, tolerance = 2):
    '''
    Function to aggregate sentiment data by week (indexed by big_log_retruns index) calculates the mean of a week's worth of sentiment.
    Can be modified to accept more or less NaN's in a week, default is 2.

    Returns a matrix with the same index as big_log_ret which corresponds to our observed ranking
    '''

    big_sentiment = big_sent_tables[sentiment]
    columns = big_sentiment.columns
    rows = []
    for i in range(len(big_log_ret.index)-1):
        step1 = big_sentiment[big_log_ret.index[i]:big_log_ret.index[i+1]-dt.timedelta(days=1)]
        trial = np.zeros(columns.shape)
        for idx, column in enumerate(columns):
            if step1[column].isnull().sum() > tolerance:
                trial[idx] = np.NaN
            else:
                trial[idx] = step1[column].mean(skipna = True)
        rows.append(trial)
    trial = np.zeros(columns.shape)
    step2 = big_sentiment[big_log_ret.index[-1]:big_log_ret.index[-1] + dt.timedelta(days = 6)]
    for idx, column in enumerate(columns):
        if step2[column].isnull().sum() > tolerance:
            trial[idx] = np.NaN
        else:
            trial[idx] = step2[column].mean(skipna = True)
    rows.append(trial)


    df = pd.DataFrame(rows, columns=big_sentiment.columns)
    df.index = big_log_ret.index
    return df

In [106]:
def big_max_sentiment(sentiment:str, tolerance = 2):
    '''
    Function to aggregate sentiment data by week (indexed by big_log_retruns index) calculates the maximum of a week's worth of sentiment.
    Can be modified to accept more or less NaN's in a week, default is 2.

    Returns a matrix with the same index as big_log_ret which corresponds to our observed ranking
    '''

    big_sentiment = big_sent_tables[sentiment]
    columns = big_sentiment.columns
    rows = []
    for i in range(len(big_log_ret.index) -1):
        step1 = big_sentiment[big_log_ret.index[i]:big_log_ret.index[i+1] - dt.timedelta(days=1)]
        trial = np.zeros(columns.shape)
        for idx, column in enumerate(columns):
            if step1[column].isnull().sum() > tolerance:
                trial[idx] = np.NaN
            else:
                trial[idx] = step1[column].max()
        rows.append(trial)
    trial = np.zeros(columns.shape)
    step2 = big_sentiment[big_log_ret.index[-1]:big_log_ret.index[-1] + dt.timedelta(days = 6)]
    for idx, column in enumerate(columns):
        if step2[column].isnull().sum() > tolerance:
            trial[idx] = np.NaN
        else:
            trial[idx] = step2[column].max()
    rows.append(trial)

    df = pd.DataFrame(rows, columns=big_sentiment.columns)
    df.index = big_log_ret.index
    return df


In [120]:
def big_min_sentiment(sentiment:str, tolerance = 2):
    '''
    Function to aggregate sentiment data by week (indexed by big_log_retruns index) calculates the minimum of a week's worth of sentiment.
    Can be modified to accept more or less NaN's in a week, default is 2.

    Returns a matrix with the same index as big_log_ret which corresponds to our observed ranking
    '''
    big_sentiment = big_sent_tables[sentiment]
    columns = big_sentiment.columns
    rows = []
    for i in range(len(big_log_ret.index) -1):
        step1 = big_sentiment[big_log_ret.index[i]:big_log_ret.index[i+1] - dt.timedelta(days=1)]
        trial = np.zeros(columns.shape)
        for idx, column in enumerate(columns):
            if step1[column].isnull().sum() > tolerance:
                trial[idx] = np.NaN
            else:
                trial[idx] = step1[column].min()
        rows.append(trial)
    trial = np.zeros(columns.shape)
    step2 = big_sentiment[big_log_ret.index[-1]:big_log_ret.index[-1] + dt.timedelta(days = 6)]
    for idx, column in enumerate(columns):
        if step2[column].isnull().sum() > tolerance:
            trial[idx] = np.NaN
        else:
            trial[idx] = step2[column].min()
    rows.append(trial)

    df = pd.DataFrame(rows, columns=big_sentiment.columns)
    df.index = big_log_ret.index[:-1]
    return df


In [122]:
big_linscr_mean = big_mean_sentiment("big_linscr")
big_linscr_mean.to_csv(os.path.join(PATH,'Tables','big_linscr_mean.csv'))

In [123]:
big_linscr_mean.tail()

stock,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-25,46.559083,32.474271,39.217117,54.785571,61.44512,38.76512,51.095533,58.297283,39.488743,32.917867,...,54.2298,66.06406,42.34778,61.799267,41.517433,51.19665,29.271743,57.352933,40.0,51.421343
2021-11-01,29.84174,42.8235,37.000033,49.182617,50.94392,63.38036,59.38106,72.9462,53.312783,27.996833,...,42.8561,52.98864,52.196833,72.08504,47.60036,45.08374,33.516717,40.389883,40.0,56.95666
2021-11-08,33.506867,39.82345,45.19214,41.99635,49.5261,71.18925,52.57668,71.762229,51.1106,19.6561,...,53.54514,33.20624,49.384567,66.564683,33.91414,50.169233,31.468771,70.813883,23.3008,42.53195
2021-11-15,23.31025,44.2488,42.07206,53.351643,49.41964,73.70314,48.6958,80.232886,46.7999,33.4266,...,12.12906,26.24382,53.3903,47.5066,23.94118,40.72495,30.282343,45.305786,10.0,43.201717
2021-11-22,20.06715,45.430929,56.1441,48.90362,66.5182,27.71554,,86.595986,46.718471,,...,14.67994,,46.677617,68.229333,,28.958114,15.077883,50.108929,,


In [98]:
big_log_ret

Symbols,AAL,AAPL,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,BAC,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,0.063980,-0.006151,-0.020684,-0.007728,0.001275,0.058173,0.041485,-0.023212,0.034128,0.067265,...,0.012009,-0.001517,0.009211,0.045094,0.000298,0.028198,0.022887,0.009111,0.061181,-0.002824
2012-01-09,0.064853,0.001166,0.058817,0.014527,0.032937,0.027670,0.125994,0.067767,0.005611,0.067277,...,0.047679,-0.008193,0.016848,-0.010040,-0.001589,0.001284,0.030925,0.024389,0.021043,0.030286
2012-01-16,0.250093,0.062216,-0.028078,-0.013360,-0.025864,-0.015717,0.060441,0.022988,-0.003804,0.030643,...,0.177741,-0.024205,0.008187,-0.031098,0.004662,-0.046214,-0.031263,-0.004929,-0.008755,-0.019156
2012-01-23,0.164229,0.027346,0.006393,-0.000364,0.003067,0.073287,0.037414,-0.040157,0.047021,0.072735,...,0.078275,0.005668,0.008642,0.046977,0.057494,0.016789,0.034206,0.021510,0.039747,-0.010659
2012-01-30,-0.085504,0.070830,-0.016189,0.001998,-0.027394,-0.018949,-0.004246,-0.011468,-0.008457,0.028915,...,-0.050093,0.038426,-0.000130,-0.006528,0.062212,-0.003972,-0.008037,-0.002098,-0.061975,-0.013277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-25,0.127917,0.009831,0.044480,-0.035698,0.022508,-0.002202,0.125745,0.042540,0.016607,-0.017098,...,0.133125,-0.010172,-0.024901,0.005287,0.022875,-0.014255,-0.014173,0.005406,0.070522,0.008495
2021-11-01,-0.072699,-0.008564,-0.017635,0.032279,0.005776,-0.012802,0.081317,0.001749,0.029005,-0.001278,...,-0.056188,0.006952,0.023589,-0.003136,-0.021365,0.001912,0.014110,-0.016578,-0.037209,-0.018628
2021-11-08,-0.051060,0.068037,-0.035548,-0.012613,0.029778,-0.057633,0.049598,0.042057,-0.046994,-0.032719,...,-0.077587,-0.042254,-0.022389,-0.049294,-0.052644,-0.028684,-0.042901,-0.037020,0.029614,-0.037268
2021-11-15,-0.082683,-0.023571,-0.002409,-0.007597,-0.035467,0.019820,-0.003868,-0.047915,-0.101309,0.007898,...,-0.087189,-0.000205,-0.010394,-0.001564,-0.016110,0.018313,-0.008021,0.017474,0.009940,0.009515


In [118]:
big_sent_tables["big_linscr"].tail(18)

stock,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-11-14,19.9319,38.5744,,18.267,,100.0,,80.1111,53.1847,,...,,,31.6574,50.0,,0.0,0.0,100.0,,12.462
2021-11-15,,,,64.6904,,,,100.0,60.3152,,...,,,41.7249,0.0,,0.0,0.0,40.0,,
2021-11-16,0.0,40.72,31.5201,73.9193,80.0659,69.7516,67.5895,84.6569,55.0357,21.2008,...,0.0,25.9546,52.9359,66.1319,0.0,23.4082,37.1295,10.9854,0.0,52.9107
2021-11-17,30.6147,57.5039,55.3639,54.979,56.5075,90.2235,55.2395,77.6961,39.5957,66.7014,...,50.0,85.5356,67.5974,84.539,90.2448,85.4158,50.7388,49.8122,50.0,62.4465
2021-11-18,23.5548,46.3775,43.5464,36.6368,48.7776,58.0087,42.2468,80.9779,49.3891,35.0988,...,5.0597,0.0,26.3381,57.2677,20.2114,66.1548,44.3975,60.2423,0.0,55.6241
2021-11-19,56.4943,45.2053,54.9753,43.87,22.4398,100.0,40.6897,66.0155,59.9037,28.1208,...,0.0,0.0,35.4822,74.3104,9.2497,69.3709,26.9517,73.8821,0.0,24.4926
2021-11-20,19.017,34.9969,24.9546,51.278,39.3074,50.5319,37.7135,75.5305,31.7087,16.0112,...,5.5856,19.7289,49.6536,50.2972,0.0,0.0,52.7589,37.6841,0.0,13.7364
2021-11-21,10.1807,40.6892,,48.088,,,,76.7533,31.6512,,...,,,100.0,0.0,,,0.0,44.5344,,50.0
2021-11-22,,36.4162,,,,,,100.0,33.5544,,...,,,,57.0605,,33.3333,0.0,33.3333,,
2021-11-23,22.4649,76.4777,0.0,56.9295,62.4108,0.0,26.506,88.353,56.9654,43.9234,...,17.0159,0.0,71.3928,71.4641,27.7496,66.3948,46.763,73.7994,0.0,43.5976


In [88]:
big_linscr_mean.tail()
#big_log_ret.tail()

stock,AAL,AAPL,ABBV,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-25,46.559083,32.474271,39.217117,54.785571,61.44512,38.76512,51.095533,58.297283,39.488743,32.917867,...,54.2298,66.06406,42.34778,61.799267,41.517433,51.19665,29.271743,57.352933,40.0,51.421343
2021-11-01,29.84174,42.8235,37.000033,49.182617,50.94392,63.38036,59.38106,72.9462,53.312783,27.996833,...,42.8561,52.98864,52.196833,72.08504,47.60036,45.08374,33.516717,40.389883,40.0,56.95666
2021-11-08,33.506867,39.82345,45.19214,41.99635,49.5261,71.18925,52.57668,71.762229,51.1106,19.6561,...,53.54514,33.20624,49.384567,66.564683,33.91414,50.169233,31.468771,70.813883,23.3008,42.53195
2021-11-15,23.31025,44.2488,42.07206,53.351643,49.41964,73.70314,48.6958,80.232886,46.7999,33.4266,...,12.12906,26.24382,53.3903,47.5066,23.94118,40.72495,30.282343,45.305786,10.0,43.201717
2021-11-22,20.06715,45.430929,56.1441,48.90362,66.5182,27.71554,,86.595986,46.718471,,...,14.67994,,46.677617,68.229333,,28.958114,15.077883,50.108929,,


In [90]:
#Same for mid and small-caps
#Log return:
mid_log_ret = np.log(mid_panel_data) - np.log(mid_panel_data.shift(1))
#mid_log_ret.to_csv(os.path.join(PATH,'Tables','mid_log_ret.csv')) #Store in csv format in the 'Tables' folder
small_log_ret = np.log(small_panel_data) - np.log(small_panel_data.shift(1))
#small_log_ret.to_csv(os.path.join(PATH,'Tables','small_log_ret.csv')) #Store in csv format in the 'Tables' folder

#Percentage change:
#mid_pct_change = mid_panel_data.pct_change()
#mid_pct_changet.to_csv(os.path.join(PATH,'Tables','mid_pct_change.csv'))
#small_pct_change = small_panel_data.pct_change()
#small_pct_changet.to_csv(os.path.join(PATH,'Tables','small_pct_change.csv'))

## Ranking Tables

In [165]:
big_rankings = big_log_ret.rank(1, ascending=False, method='first')
#big_rankings.to_csv(os.path.join(PATH,'Tables','big_rankings.csv')) #Store in csv format in the 'Tables' folder

#Same for Mid and Small-caps
mid_rankings = mid_log_ret.rank(1, ascending=False, method='first')
#mid_rankings.to_csv(os.path.join(PATH,'Tables','mid_rankings.csv')) #Store in csv format in the 'Tables' folder
small_rankings = small_log_ret.rank(1, ascending=False, method='first')
#small_rankings.to_csv(os.path.join(PATH,'Tables','small_rankings.csv')) #Store in csv format in the 'Tables' folder
big_rankings.head()

Symbols,AAL,AAPL,ABC,ABT,ADP,AIG,AMD,AMZN,AXP,BAC,...,UAL,UNH,UPS,USB,V,VZ,WFC,WMT,WY,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,,,,,,,,,,,...,,,,,,,,,,
2012-01-09,10.0,88.0,95.0,89.0,78.0,13.0,28.0,98.0,33.0,9.0,...,63.0,82.0,65.0,21.0,79.0,36.0,45.0,66.0,12.0,86.0
2012-01-16,11.0,76.0,15.0,60.0,31.0,41.0,2.0,8.0,71.0,9.0,...,21.0,90.0,55.0,93.0,82.0,75.0,34.0,46.0,53.0,38.0
2012-01-23,1.0,9.0,92.0,73.0,90.0,75.0,10.0,25.0,55.0,16.0,...,3.0,88.0,42.0,95.0,45.0,102.0,96.0,57.0,66.0,79.0
2012-01-30,1.0,53.0,82.0,90.0,86.0,12.0,38.0,104.0,28.0,13.0,...,10.0,84.0,79.0,29.0,19.0,67.0,40.0,61.0,35.0,96.0
