# BEM114 Homework 3 - Value of Intangibles
**Names:** Andrew Zabelo, Daniel Wen, Kyle McCandless  
**Student IDs:** 2176083, 2159859, 2157818

## Setup
Imports, Helper Functions, and DataFrames

In [1]:
'''
Imports
'''

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [236]:
'''
Helper functions
'''

# Given a group of stocks, calculate equal-weighted and value-weighted weights
def calc_weights(group):
    if group['rebalance'].sum() > 0:
        # Calc equal weights
        group['weights_eq'] = 1 / float(group['PERMNO'].count())
        # Calc value weights
        group['weights_val'] = group['MV'] / group['MV'].sum()
        return group
    else:
        print('test')
        group['weights_eq'] = -1
        group['weights_val'] = -1

# Calculates returns and prints the returns mean, vol, and Sharpe ratio for a strategy
def analyze(df, strat_name, return_col_name):
    strat_mean = df[return_col_name].mean()
    strat_vol = df[return_col_name].std()
    strat_sharpe = strat_mean / strat_vol

    print(f"{strat_name} monthly returns have mean {strat_mean}%, vol {strat_vol}%, and Sharpe {strat_sharpe}")
    
# Estimates the CAPM and FF3 models on df_old using the returns found in ret_col_name
def estimate_capm_and_ff3(df_old, return_col_name, ff3):
    # Merge in ff3 data. Keep separate from ff5 because there is a larger data range available in ff3.
    # May lose a few rows since ff3 goes back to July 1926 and our data starts Jan 1926
    df = pd.merge(df_old, ff3, how='inner', on=['date'])
    
    # Estimate CAPM
    print('CAPM')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF']])).fit().summary())
    
    # Estimate FF3
    print('FF3')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML']])).fit().summary())

def estimate_carhart(df_old, return_col_name, carhart):
    # Merge in carhart data
    df = pd.merge(df_old, carhart, how='inner', on=['date'])
    
    # Estimate Carhart
    print('Carhart')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'nuggets']])).fit().summary())
    
# Estimates the FF5 model on df_old using the returns found in ret_col_name
def estimate_ff5(df_old, return_col_name, ff5, add_momentum=False, mom_rets=None):
    # Merge in ff5 data. Truncates dates so create a df separate from ff3.
    df = pd.merge(df_old, ff5, how='inner', on=['date'])
    
    # Estimate FF5
    print('FF5')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit().summary())
    
# Plots the cumulative returns for a strategy versus the CAPM-implied returns
def plot_cum_returns(df, return_col_name, capm_beta):
    df[return_col_name + '_MIR'] = df['RF'] + capm_beta * df['Mkt-RF']
    
    dates = df_total['date'] // 100 + (df_total['date'] % 100) / 12
    strategy_cumulative = (df[return_col_name] / 100 + 1.0).cumprod()
    mir_cumulative = (df[return_col_name + '_MIR'] / 100 + 1.0).cumprod()

    plt.figure()
    plt.plot(dates, strategy_cumulative, label=f'{return_col_name} Portfolio Value')
    plt.plot(dates, mir_cumulative, label=f'CAPM-Implied Portfolio Value')

    plt.title(f'{return_col_name} Model Performance')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Portfolio Value')

    plt.legend()
    plt.show()

In [213]:
'''
Load CRSP data
'''

crsp = pd.read_csv('crsp_1926_2020.zip')

# Convert prices and returns to numeric and drop NaNs
crsp['PRC'] = pd.to_numeric(crsp['PRC'], errors='coerce')
crsp['RET'] = pd.to_numeric(crsp['RET'], errors='coerce')
crsp = crsp.dropna(subset=['PRC', 'RET'])

# Set types for relevant columns
crsp = crsp.astype({'date': 'string', 'SHRCD': 'int', 'EXCHCD': 'int'})

# [From HW2] Filter SHRCD and EXCHCD, remove negative prices
crsp = crsp[crsp['SHRCD'].isin([10, 11])]
crsp = crsp[crsp['EXCHCD'].isin([1, 2, 3])]
crsp = crsp[crsp['PRC'] >= 0]

# Reformat date column and add market value column
crsp['date'] = crsp['date'].str[:-3]
crsp['year'] = crsp['date'].str[:-3].astype('int')
crsp['MV'] = crsp['PRC'] * crsp['SHROUT']
crsp

Unnamed: 0,PERMNO,date,SHRCD,EXCHCD,PRC,RET,SHROUT,year,MV
28,10001,1986-09,11,3,6.37500,-0.003077,991.0,1986,6.317625e+03
29,10001,1986-10,11,3,6.62500,0.039216,991.0,1986,6.565375e+03
30,10001,1986-11,11,3,7.00000,0.056604,991.0,1986,6.937000e+03
31,10001,1986-12,11,3,7.00000,0.015000,991.0,1986,6.937000e+03
32,10001,1987-01,11,3,6.75000,-0.035714,991.0,1987,6.689250e+03
...,...,...,...,...,...,...,...,...,...
4705164,93436,2020-08,11,3,498.32001,0.741452,931809.0,2020,4.643391e+08
4705165,93436,2020-09,11,3,429.01001,-0.139087,948000.0,2020,4.067015e+08
4705166,93436,2020-10,11,3,388.04001,-0.095499,947901.0,2020,3.678235e+08
4705167,93436,2020-11,11,3,567.59998,0.462736,947901.0,2020,5.380286e+08


In [9]:
'''
Load FF3 and FF5 data
'''

ff3 = pd.read_csv('ff3_factors.csv')
ff3 = ff3.astype({'date': 'string'})
ff3['date'] = ff3['date'].apply(lambda x: x[:4] + '-' + x[4:])

ff5 = pd.read_csv('ff5_factors.csv')
ff5 = ff5.astype({'date': 'string'})
ff5['date'] = ff5['date'].apply(lambda x: x[:4] + '-' + x[4:])

In [249]:
'''
Load 100 Best Companies to Work for in America
'''

bcw = pd.read_csv('bcwlist_modified.csv')
bcw = bcw.dropna(subset=['permno'])

# Set types for relevant columns
bcw = bcw.astype({'rank': 'int', 'company': 'string', 'year': 'int'})
bcw = bcw.sort_values(by=['year', 'rank'])
bcw

Unnamed: 0,rank,company,permno,year
0,1,AT&T Bell Laboratories,66093.0,1984
1,2,Trammell Crow Company,85629.0,1984
2,3,Delta Airlines,26112.0,1984
3,4,Federal Express,60628.0,1984
4,5,Goldman Sachs,86868.0,1984
...,...,...,...,...
2486,87,AbbVie,13721.0,2020
2487,88,Encompass Home Health & Hospice,10693.0,2020
2493,94,Goldman Sachs,86868.0,2020
2498,99,Delta Airlines,91926.0,2020


## Problem 1

### Part A

We assume the same data cleaning of the CRSP dataset as homework 2: common shares (shrcd = 10 or 11) and NYSE, AMEX, or NASDAQ stocks (exchcd = 1, 2, or 3). this causes us to lose a couple hundred rows from bcw. We dropped rows with negative prices.

Assume that if no PERMNO then not publicly traded --> leave out

Use all best companies to work for firms at each release

calculate weights: equal or value weight firms based on January data

In [250]:
# Process bcw data

# Reformat for merge with CRSP
bcw.rename(columns={'permno': 'PERMNO', 'year': 'year_formed'}, inplace=True)

# Add years between 1984 - 1993, 1993 - 1998 for merge
bcw['year'] = bcw['year_formed']

bcw_extra = []
for year_formed, gap_length in zip([1984, 1993], [1993 - 1984, 1998 - 1993]):
    to_increment = bcw[bcw['year_formed'] == year_formed]
    for increment in range(gap_length):
        for _, row in to_increment.iterrows():
            row['year'] += increment
            bcw_extra.append(row.to_dict())

bcw_extra = pd.DataFrame(bcw_extra, columns=bcw.columns)

bcw = pd.concat([bcw, bcw_extra], ignore_index=True)
bcw

Unnamed: 0,rank,company,PERMNO,year_formed,year
0,1,AT&T Bell Laboratories,66093.0,1984,1984
1,2,Trammell Crow Company,85629.0,1984,1984
2,3,Delta Airlines,26112.0,1984,1984
3,4,Federal Express,60628.0,1984,1984
4,5,Goldman Sachs,86868.0,1984,1984
...,...,...,...,...,...
2482,101,Viking Freight System,80814.0,1993,1997
2483,101,Wal-Mart Stores,55976.0,1993,1997
2484,101,Weyerhaeuser Company,39917.0,1993,1997
2485,101,Worthington Industries,83601.0,1993,1997


In [251]:
# Merge bcw and crsp
df = pd.merge(bcw, crsp, how='inner', on=['year', 'PERMNO'])

# Add special date flag
min_max = df.groupby('PERMNO')['date'].agg(['min', 'max'])
special_dates = np.array(min_max['min'].tolist() + min_max['max'].tolist())
df['is_special_date'] = df['date'].isin(special_dates)

# Add rebalance column
df['rebalance'] = df['is_special_date'].shift(1)
df['rebalance'] = (df['year'] == df['year_formed']) | (df['rebalance'] == True)

df = df.drop(['rank', 'SHRCD', 'EXCHCD', 'PRC', 'SHROUT', 'is_special_date'], axis=1)
df = df.sort_values(by=['date'])
df

Unnamed: 0,company,PERMNO,year_formed,year,date,RET,MV,rebalance
1398,Security Pacific Corporation,60839.0,1984,1984,1984-01,-0.013529,1.826571e+06,True
1530,Tenneco,26542.0,1984,1984,1984-01,0.020122,5.731262e+06,True
116,Pitney Bowes,24459.0,1984,1984,1984-01,-0.011494,1.107142e+06,True
656,General Electric Company,12060.0,1984,1984,1984-01,-0.072495,2.472056e+07,True
1064,Maytag Company,13119.0,1984,1984,1984-01,-0.085366,6.512344e+05,True
...,...,...,...,...,...,...,...,...
14970,Adobe Systems,75510.0,2020,2020,2020-12,0.045248,2.395575e+08,True
14826,Workday,13628.0,2020,2020,2020-12,0.065928,4.312980e+07,True
15018,Progressive,64390.0,2020,2020,2020-12,0.135116,5.787446e+07,True
15150,Nutanix,16304.0,2020,2020,2020-12,0.163563,6.058009e+06,True


In [252]:
# Group by date and calculate weights
df_weights = df.groupby('date', group_keys=False).apply(calc_weights)

In [254]:
df_weights[df_weights['weights_eq'] < 0]

Unnamed: 0,company,PERMNO,year_formed,year,date,RET,MV,rebalance,weights_eq,weights_val


In [172]:
# Filter crsp stocks by PERMNO for computation speed
crsp = crsp[crsp['PERMNO'].isin(bcw['PERMNO'].unique())]

# Calculate annual returns for CRSP stocks. The annual_ret of a given month
# is the product of the monthly returns for the 12 months immediately AFTER that month
# This way, January will have the annual returns from Feb - next Jan
# Set min_periods = 1 so that we can deal with delisted stocks
crsp = crsp.sort_values(by=['PERMNO', 'date'])
annual_returns = crsp.groupby('PERMNO')['RET'].rolling(window=12, min_periods=1).apply(lambda x: np.prod(1+x)-1).shift(-12)

idx = annual_returns.index.get_level_values(1)
annual_returns = pd.Series(annual_returns.values, index=idx).rename('annual_ret')
annual_returns

14057      1.416664
14058      1.566929
14059      1.270491
14060      1.743361
14061      1.589286
             ...   
4700009         NaN
4700010         NaN
4700011         NaN
4700012         NaN
4700013         NaN
Name: annual_ret, Length: 105348, dtype: float64

In [173]:
# Merge in annual returns
crsp_annual = pd.merge(crsp, annual_returns, left_index=True, right_index=True)
assert(len(crsp_annual) == len(crsp))

# Manual test of annual return calculation
a = np.array(crsp[crsp['PERMNO'] == 10078]['RET'])[1:13] 
print(a) # Returns of next 12 months
value = 1
for ret in a:
    value *= (ret + 1)
print(value - 1)
assert(value - 1 == crsp_annual['annual_ret'][14057])

crsp_annual

[-0.118056 -0.03937  -0.07377  -0.00885   0.348214  0.145695  0.109827
  0.135417  0.036697  0.09292   0.165992  0.208333]
1.416664348482748


Unnamed: 0,PERMNO,date,SHRCD,EXCHCD,PRC,RET,SHROUT,year,MV,annual_ret
14057,10078,1986-05,11,3,18.000,0.111969,27108.0,1986,487944.000,1.416664
14058,10078,1986-06,11,3,15.875,-0.118056,27179.0,1986,431466.625,1.566929
14059,10078,1986-07,11,3,15.250,-0.039370,27179.0,1986,414479.750,1.270491
14060,10078,1986-08,11,3,14.125,-0.073770,27179.0,1986,383903.375,1.743361
14061,10078,1986-09,11,3,14.000,-0.008850,27239.0,1986,381346.000,1.589286
...,...,...,...,...,...,...,...,...,...,...
4700009,93374,2020-08,11,1,52.570,0.030582,111519.0,2020,5862553.830,
4700010,93374,2020-09,11,1,50.910,-0.023207,111519.0,2020,5677432.290,
4700011,93374,2020-10,11,1,44.590,-0.124141,111645.0,2020,4978250.550,
4700012,93374,2020-11,11,1,48.440,0.086342,111645.0,2020,5408083.800,


In [168]:
# Merge bcw with crsp including annual returns
df = pd.merge(bcw, crsp_annual, how='inner', on=['date', 'PERMNO'])
df = df[df['year'] < '2020'] # Drop the last year of data as the annual returns aren't known
assert(len(df[pd.isna(df['annual_ret'])]) == 0)

# Group by date and calculate weights
df = df.groupby('date', group_keys=False).apply(calc_weights)

df = df.drop(['SHRCD', 'EXCHCD', 'PRC', 'SHROUT', 'RET', 'MV'], axis=1)
df

Unnamed: 0,rank,company,PERMNO,year,date,annual_ret,weights_eq,weights_val
0,3,Delta Airlines,26112.0,1984,1984-01,0.120081,0.014706,0.005297
1,4,Federal Express,60628.0,1984,1984-01,-0.141026,0.014706,0.006099
2,7,Hewlett-Packard,27828.0,1984,1984-01,-0.063142,0.014706,0.034827
3,8,IBM,12490.0,1984,1984-01,0.237520,0.014706,0.236584
4,9,Pitney Bowes,24459.0,1984,1984-01,0.313698,0.014706,0.003758
...,...,...,...,...,...,...,...,...
1095,93,Box,15145.0,2019,2019-01,-0.281549,0.026316,0.001715
1096,94,Alliance Data Systems,89002.0,2019,2019-01,-0.410101,0.026316,0.005530
1097,95,Federal Express,60628.0,2019,2019-01,-0.172086,0.026316,0.026499
1098,96,Activision Blizzard,79678.0,2019,2019-01,0.247994,0.026316,0.020617


## Problem 2

### Part A

In [169]:
# Add returns
df['weighted_eq_ret'] = df['weights_eq'] * df['annual_ret']
df['weighted_val_ret'] = df['weights_val'] * df['annual_ret']

eq_returns = df.groupby('date')['weighted_eq_ret'].sum()
val_returns = df.groupby('date')['weighted_val_ret'].sum()

print('Equal-weighted returns:')
print(eq_returns)

print('\n\nValue-weighted returns:')
print(val_returns)

Equal-weighted returns:
date
1984-01    0.100780
1993-01    0.202081
1998-01    0.263116
1999-01    0.410408
2000-01    0.128314
2001-01   -0.132742
2002-01   -0.192051
2003-01    0.582125
2004-01    0.102118
2005-01    0.161929
2006-01    0.091604
2007-01   -0.127062
2008-01   -0.378090
2009-01    0.678200
2010-01    0.284795
2011-01    0.041811
2012-01    0.140598
2013-01    0.258238
2014-01    0.123008
2015-01   -0.080626
2016-01    0.264379
2017-01    0.347668
2018-01   -0.002792
2019-01    0.166839
Name: weighted_eq_ret, dtype: float64


Value-weighted returns:
date
1984-01    0.158176
1993-01    0.093447
1998-01    0.543405
1999-01    0.335708
2000-01   -0.161225
2001-01   -0.209143
2002-01   -0.232591
2003-01    0.357498
2004-01   -0.050737
2005-01    0.148622
2006-01    0.094934
2007-01   -0.044436
2008-01   -0.401402
2009-01    0.447709
2010-01    0.126968
2011-01    0.032112
2012-01    0.099072
2013-01    0.297857
2014-01    0.057727
2015-01    0.003315
2016-01    0.238772
20