In [1]:
import pickle
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta
import warnings
warnings.filterwarnings("ignore")
import pickle
from tqdm.auto import tqdm

# define data path
with open('data_path.txt') as f:
    lines = f.read().splitlines()
data_path = lines[0]

# load data
f = open(data_path+'clean_data.pckl', 'rb')
mcdata, keydata, ids, returns, EU_4F, US_4F = pickle.load(f)
f.close()

In [5]:
# Make list with eligible stocks for a given year
# The portfolios, which are constructed at the end of each June, are the intersections of 2 portfolios formed on size (market equity, ME) and 3 portfolios formed on the ratio of book equity to market equity (BE/ME). The size breakpoint for year t is the median NYSE market equity at the end of June of year t. BE/ME for June of year t is the book equity for the last fiscal year end in t-1 divided by ME for December of t-1. The BE/ME breakpoints are the 30th and 70th NYSE percentiles.
# http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/six_portfolios.html

id_list = ids[ids.Sector != "Financials"]
id_list = id_list.RIC.unique()

# create dict to save dfs
valid_dict = {}

# Factor calculation from 2007 to 2021 (first portfolio rebalancing June 2007, last June 2020)
num_years = 14

# takes very long
for year_num in tqdm(range(num_years)):
    rebalance_date = pd.Timestamp("2007-06-30") + relativedelta(years=year_num)
    check_date = pd.Timestamp("2006-12-31") + relativedelta(years=year_num)

    valid_df = pd.DataFrame(columns=["RIC","MarketCap","TotalCO2","AnalyticCO2"])
    for i in tqdm(id_list,leave=False):
        if (i in list(keydata.RIC)) == True:
            temp_co2 = keydata[keydata.RIC == i][keydata.Date == check_date]["TotalCO2"].item()
            temp_mc = mcdata.loc[mcdata.Date == rebalance_date,i].item()
            temp_bv = keydata[keydata.RIC == i][keydata.Date == check_date]["BookValue"].item()
            temp_co2score = keydata[keydata.RIC == i][keydata.Date == check_date]["AnalyticCO2"].item()
            
            # check if Market Cap Data in June_t and December_t-1 is > 100 mill (filter out micro/penny stocks with bad data)
            if (temp_mc > 100) & (mcdata.loc[mcdata.Date == check_date,i].item() > 100):
                # check if book value in December_t-1 is >1 (sometimes bad 0.001002010 data ...)
                if  (temp_bv > 1) and (temp_co2 > 0.1):
                    temp_df = pd.DataFrame({
                            "RIC":[i],
                            "MarketCap":[temp_mc],
                            "TotalCO2":[temp_co2],
                            "AnalyticCO2":[temp_co2score]})
                    valid_df = valid_df.append(temp_df)

    valid_df = valid_df.reset_index(drop=True)
    
    # save in dict for this year
    valid_dict[rebalance_date] = valid_df
    
    

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16464.0), HTML(value='')))




In [18]:
# Save valid_dict to not run it again
f = open(data_path+'valid_dict.pckl', 'wb')
pickle.dump(valid_dict, f)
f.close()

In [3]:
# load valid_dict
f = open(data_path+'valid_dict.pckl', 'rb')
valid_dict = pickle.load(f)
f.close()

In [4]:
# Create flags
num_years = 14
for year_num in range(num_years):
    rebalance_date = pd.Timestamp("2007-06-30") + relativedelta(years=year_num)

    valid_df = valid_dict[rebalance_date]
    
    
    # Small/Big flags calculated determined by median market cap
    median_mc = np.median(valid_df.MarketCap)
    valid_df["small_flag"] = np.where(valid_df["MarketCap"]<= median_mc,1,0)
    valid_df["big_flag"] = np.where(valid_df["MarketCap"]> median_mc,1,0)
    
    # co2 score terciles
    co2_30 = np.percentile(valid_df["AnalyticCO2"],30)
    co2_70 = np.percentile(valid_df["AnalyticCO2"],70)
    valid_df["brown_flag"] = np.where(valid_df["AnalyticCO2"]> co2_70,1,0)
    valid_df["green_flag"] = np.where(valid_df["AnalyticCO2"]< co2_30,1,0)
    valid_df["neutral_flag"] = np.where((valid_df["AnalyticCO2"]<= co2_70) & (valid_df["AnalyticCO2"] >= co2_30),1,0)

    # co2 score quintiles
    co2_20 = np.percentile(valid_df["AnalyticCO2"],20)
    co2_40 = np.percentile(valid_df["AnalyticCO2"],40)
    co2_60 = np.percentile(valid_df["AnalyticCO2"],60)    
    co2_80 = np.percentile(valid_df["AnalyticCO2"],80)
    valid_df["q20"] = np.where(valid_df["AnalyticCO2"]<= co2_20,1,0)
    valid_df["q40"] = np.where((valid_df["AnalyticCO2"]<= co2_40) & (valid_df["AnalyticCO2"] > co2_20),1,0)
    valid_df["q60"] = np.where((valid_df["AnalyticCO2"]<= co2_60) & (valid_df["AnalyticCO2"] > co2_40),1,0)
    valid_df["q80"] = np.where((valid_df["AnalyticCO2"]< co2_80) & (valid_df["AnalyticCO2"] > co2_60),1,0)
    valid_df["q100"] = np.where(valid_df["AnalyticCO2"]>= co2_80,1,0)


In [5]:
###### Average Monthly Return ############
portfolio_names = ["Small_Green","Small_Neutral","Small_Brown","Big_Green","Big_Neutral","Big_Brown","Q20","Q40","Q60","Q80","Q100"]
column_names = portfolio_names.copy()
column_names.insert(0,"Date")
equal_portfolio_returns = pd.DataFrame(columns=column_names)
equal_portfolio_returns["Date"] = mcdata["Date"]
value_portfolio_returns = equal_portfolio_returns.copy()

for rebalance_date in tqdm(valid_dict.keys()):
    valid_df = valid_dict[rebalance_date]
    
    # Make stock lists for portfolios
    small_brown_list = list(valid_df[valid_df.small_flag == 1][valid_df.brown_flag == 1]["RIC"])
    small_neutral_list = list(valid_df[valid_df.small_flag == 1][valid_df.neutral_flag == 1]["RIC"])
    small_green_list = list(valid_df[valid_df.small_flag == 1][valid_df.green_flag == 1]["RIC"])
    big_brown_list = list(valid_df[valid_df.big_flag == 1][valid_df.brown_flag == 1]["RIC"])
    big_neutral_list = list(valid_df[valid_df.big_flag == 1][valid_df.neutral_flag == 1]["RIC"])
    big_green_list = list(valid_df[valid_df.big_flag == 1][valid_df.green_flag == 1]["RIC"])
    
    #quintile portfolios
    q20_list = list(valid_df[valid_df.q20 == 1]["RIC"])
    q40_list = list(valid_df[valid_df.q40 == 1]["RIC"])
    q60_list = list(valid_df[valid_df.q60 == 1]["RIC"])
    q80_list = list(valid_df[valid_df.q80 == 1]["RIC"])
    q100_list = list(valid_df[valid_df.q100 == 1]["RIC"])
    
    portfolio_list = [small_green_list,small_neutral_list,small_brown_list,big_green_list,big_neutral_list,big_brown_list,q20_list,q40_list,q60_list,q80_list,q100_list]


    start_index = mcdata[mcdata.Date == rebalance_date].index.item()+1
    for mon_date in list(mcdata.loc[start_index:start_index+11,"Date"]):
        return_date_index = returns[returns.Date == mon_date].index.item()
        mc_data_index = mcdata[mcdata.Date == mon_date].index.item()

        iter = 0
        for portfolio in portfolio_list:
            return_sum = 0
            mc_sum = 0
            num_stocks = 0
            value_return_sum = 0
            port_name = portfolio_names[iter]
            iter += 1

            for stock in portfolio:
                mon_return = returns.loc[return_date_index,stock]
                last_6_months = list(returns.loc[return_date_index-6:return_date_index,stock])
                last_months_check = any((i <= 0.01) & (i >= (-0.01)) for i in last_6_months)== False
                mon_mc = mcdata.loc[mc_data_index,stock]
                # filter for data errors
                if ((-0.01 > mon_return) or (mon_return > 0.01)) and (mon_return < 200) and (mon_return > -99) and last_months_check and (mon_mc > 100):
                    # value weighted returns:
                    value_return_sum += mon_return * mon_mc
                    # equal weighted returns:
                    return_sum += mon_return
                    # market cap sum
                    mc_sum += mon_mc
                    # num of stocks
                    num_stocks += 1

            value_average_return = value_return_sum/mc_sum
            equal_average_return = return_sum/num_stocks
            value_portfolio_returns.loc[mc_data_index,port_name] = value_average_return
            equal_portfolio_returns.loc[mc_data_index,port_name] = equal_average_return



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=14.0), HTML(value='')))




In [6]:
# Brown Minus Green Factor (following FF HML factor logic)
# Average return of a portoflio that is long brown stocks and short green stocks
# (1/2) * (Small_Brown + Big_Brown) - (1/2) * (Small_Green + Big_Green)
#value_portfolio_returns["BMG"] = 0.5 * (value_portfolio_returns["Small_Brown"] + value_portfolio_returns["Big_Brown"]) - 0.5 * (value_portfolio_returns["Small_Green"] + value_portfolio_returns["Big_Green"])
#equal_portfolio_returns["BMG"] = 0.5 * (equal_portfolio_returns["Small_Brown"] + equal_portfolio_returns["Big_Brown"]) - 0.5 * (equal_portfolio_returns["Small_Green"] + equal_portfolio_returns["Big_Green"])

# Own Factor calculation
value_portfolio_returns["BMG"] = 0.5 * (value_portfolio_returns["Q100"] + value_portfolio_returns["Q80"]) - 0.5 * (value_portfolio_returns["Q20"] + value_portfolio_returns["Q40"])
equal_portfolio_returns["BMG"] = 0.5 * (equal_portfolio_returns["Q100"] + equal_portfolio_returns["Q80"]) - 0.5 * (equal_portfolio_returns["Q20"] + equal_portfolio_returns["Q40"])

# Test
value_portfolio_returns["BMG2"] = 0.5 * (value_portfolio_returns["Small_Brown"] + value_portfolio_returns["Big_Brown"]) - 0.5 * (value_portfolio_returns["Small_Green"] + value_portfolio_returns["Big_Green"])


value_portfolio_returns = value_portfolio_returns.loc[19:186]
equal_portfolio_returns = equal_portfolio_returns.loc[19:186]
value_portfolio_returns = value_portfolio_returns.reset_index(drop=True)
equal_portfolio_returns = equal_portfolio_returns.reset_index(drop=True)

bmg_factors_value = value_portfolio_returns.copy()
bmg_factors_equal = equal_portfolio_returns.copy()


In [7]:
# Save Data for use in other files
f = open(data_path+'bmg.pckl', 'wb')
pickle.dump([bmg_factors_value,bmg_factors_equal,valid_dict], f)
f.close()