In [2]:
import pickle
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta
import warnings
warnings.filterwarnings("ignore")
from tqdm.auto import tqdm
import pingouin as pg
from functools import reduce
from scipy.stats.mstats import winsorize

import datetime
def last_day_of_month(any_day):
    # this will never fail
    # get close to the end of the month for any day, and add 4 days 'over'
    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
    # subtract the number of remaining 'overage' days to get last day of current month, or said programattically said, the previous day of the first of next month
    return next_month - datetime.timedelta(days=next_month.day)

# define data path
with open('data_path.txt') as f:
    lines = f.read().splitlines()
data_path = lines[0]

# load key data
f = open(data_path+'clean_data.pckl', 'rb')
mcdata, keydata, ids, returns, EU_4F, US_4F = pickle.load(f)
f.close()

# load betas
f = open(data_path+'betas.pckl', 'rb')
betas_onlyMkt, betas_4FBMG, betas_4F, betas_onlyBMG, betas_onlyBMG2, all_sample, bmg_sample = pickle.load(f)
f.close()

# load bmg data
f = open(data_path+'bmg.pckl', 'rb')
bmg_factors_value, bmg_factors_equal, valid_dict = pickle.load(f)
f.close()




In [3]:
### Define time period here ######
#end_date = pd.Timestamp("2021-06-30")
end_date = pd.Timestamp("2019-12-31")
latest_index = returns[returns.Date == end_date].index.item()

# determine valid series of returns and start_date
earliest_start_date = pd.Timestamp("2009-01-31")
earliest_index = returns[returns.Date == earliest_start_date].index.item()

dates_list = returns.loc[earliest_index:latest_index,"Date"]

cs_dict = {}

In [4]:
# Fama MacBeth Second Step Regression for Cross Section Returns
# build matrices with return and betas for every month
for date in tqdm(dates_list):
    # compute date specific variables
    return_index = returns[returns.Date == date].index.item()
    key_year = date.year-1
    key_stamp = pd.Timestamp(str(key_year)+"-12-31")
    mc_date = date - relativedelta(months=1)
    mc_date = last_day_of_month(mc_date)
    
    
    
    cs_df = pd.DataFrame(columns=["RIC","Return","Mkt","BMG","BMG2","log_bm","log_size","mom","log_co2score"])
    for stock in tqdm(bmg_sample,leave=False):
        
        # return
        #tmp_return = returns[returns.Date == date][stock].item()
        tmp_return = returns.loc[return_index,stock].item()
        
        zero_check = (-0.001 > tmp_return) or (tmp_return > 0.001)
        if zero_check == False:
            continue
        
        # RF
        #if ids[ids.RIC == stock]["Country"].item() == "USA":
        #    rf_column = "RiskfreeUS"
        #    factors = US_4F.copy()
        #else:
        #    rf_column = "RiskfreeEU"
        #    factors = EU_4F.copy()
        
        #temp_rf = returns[returns.Date == date][rf_column].item()
        
        # excess return
        #tmp_return = tmp_return
        
        
        # value control: book to market
        temp_bm = keydata[keydata.Date == key_stamp][keydata.RIC == stock]["BtoM"].item()
        if temp_bm <= 0:
            continue
        log_bm = np.log(temp_bm)
        
        # co2 score
        co2_score = keydata[keydata.Date == key_stamp][keydata.RIC == stock]["AnalyticCO2"].item()
        if co2_score <= 0:
            continue
        log_co2score = np.log(co2_score)
        
        # size control: market cap lagged one month
        temp_mc = mcdata[mcdata.Date == mc_date][stock].item()
        if temp_mc <= 0:
            continue
        log_size = np.log(temp_mc)
        
        ### betas
        
        ## non-rolling betas:
        #linreg = betas_onlyBMG[stock]
        #bmg_beta = linreg[linreg.names == "BMG"]["coef"].item()
        #mkt_beta = linreg[linreg.names == "Mkt-RF"]["coef"].item()
        
        #BMG2 Test
        #linreg = betas_onlyBMG2[stock]
        #bmg2_beta = linreg[linreg.names == "BMG2"]["coef"].item()
        #mkt2_beta = linreg[linreg.names == "Mkt-RF"]["coef"].item()
        
        # Only Market Beta
        linreg = betas_onlyMkt[stock]
        mkt_beta = linreg[linreg.names == "Mkt-RF"]["coef"].item()
        


        # momentum control (raw return from months -12 to -1)
        return_series = returns.loc[return_index-12:return_index-1][stock]
        return_series = 1+(return_series/100)
        mom_return = list(return_series.cumprod())[-1]-1
        log_mom = np.log(1+mom_return)

        temp_df = pd.DataFrame({
        "RIC":[stock],
        "Return":[tmp_return],
        #"Rf":[temp_rf],
        "Mkt":[mkt_beta],
        "log_bm":[log_bm],
        "log_size":[log_size],
        "mom":[mom_return],
        "logmom":[log_mom],
        #"BMG":[bmg_beta],
        "log_co2score":[log_co2score]
        #"Mkt2":[mkt2_beta],
        #"BMG2":[bmg2_beta]
        })
        
        cs_df = cs_df.append(temp_df)
        
    cs_df = cs_df.reset_index(drop=True)    
    cs_dict[date] = cs_df
        

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=132.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1709.0), HTML(value='')))




KeyboardInterrupt: 

In [15]:
# Save csdict to not rerun everything
f = open(data_path+'cs_dict.pckl', 'wb')
pickle.dump(cs_dict, f)
f.close()

In [39]:
# load cs_dict (can skip big block before)
f = open(data_path+'cs_dict.pckl', 'rb')
cs_dict = pickle.load(f)
f.close()

In [40]:
# Cross Section Regressions


lambdas_BMG = {}
lambdas_co2 = {}
lambdas_BMG2 = {}

# regress all asset returns for each of T time periods against the previously estimated betas to determine the risk premium for each factor (lambda)
for date in dates_list:
    temp_cs = cs_dict[date]
    temp_cs = temp_cs.fillna(0)
    
    #risk free return
    y = temp_cs[["Return"]]
    
    # winsorize independent variables
    x = temp_cs[["Mkt","Mkt2","BMG","BMG2","log_bm","log_size","mom","logmom","log_co2score"]]
    for col in list(x.columns):
        x[col] = winsorize(x[col],limits=[0.01,0.01])
    
    #beta values
    #x_BMG = x[["Mkt","BMG","log_bm","log_size","logmom"]]
    #x_BMG2 = x[["Mkt2","BMG2","log_bm","log_size","logmom"]]
    #x_co2 = x[["Mkt","log_co2score","log_bm","log_size","logmom"]]
    x_co2 = x[["Mkt","log_co2score","log_size","log_bm","logmom"]]
    
    #Regression
    #lmBMG = pg.linear_regression(x_BMG,y["Return"])
    #lmBMG2 = pg.linear_regression(x_BMG2,y["Return"])
    lmco2 = pg.linear_regression(x_co2,y["Return"])

    # Save stats
    #lambdas_BMG[date] = lmBMG
    #lambdas_BMG2[date] = lmBMG2
    lambdas_co2[date] = lmco2
    
    

In [41]:
# Save Data for use in other files
f = open(data_path+'cross_section.pckl', 'wb')
pickle.dump([cs_dict, lambdas_BMG, lambdas_BMG2, lambdas_co2], f)
f.close()