# BEM114 Homework 3 - Value of Intangibles
**Names:** Andrew Zabelo, Daniel Wen, Kyle McCandless  
**Student IDs:** 2176083, 2159859, 2157818

## Setup
Imports, Helper Functions, and DataFrames

In [15]:
'''
Imports
'''

import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [48]:
'''
Helper functions
'''

# Given a group of stocks, calculate equal-weighted and value-weighted weights
def calc_weights(group):
    if group['rebalance'].sum() > 0:
        # Calc equal weights
        group['weights_eq'] = 1 / float(group['PERMNO'].count())
        assert(group['PERMNO'].count() == group['PERMNO'].nunique())
        # Calc value weights
        group['weights_val'] = group['MV'] / group['MV'].sum()
        return group
    else:
        group['weights_eq'] = np.nan
        group['weights_val'] = np.nan
        return group

# Calculates returns and prints the returns mean, vol, and Sharpe ratio for a strategy
def analyze(returns, strat_name):
    strat_mean = returns.mean()
    strat_vol = returns.std()
    strat_sharpe = strat_mean / strat_vol
    print(f"{strat_name} monthly returns:\nMean = {strat_mean}%\nVolatility = {strat_vol}%\nSharpe Ratio = {strat_sharpe}")
    
# Estimates the CAPM and FF3 models on df_old using the returns found in ret_col_name
def estimate_models(df_old, return_col_name, ff5):
    df = pd.merge(df_old, ff5, how='inner', on=['date'])
    assert(len(df) == len(df_old))
    
    # Estimate CAPM
    print('CAPM')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF']])).fit().summary())
    
    # Estimate FF3
    print('FF3')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML']])).fit().summary())
    
    # Estimate Carhart
    print('Carhart')
    # print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'nuggets']])).fit().summary())
    
    # Estimate FF5
    print('FF5')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit().summary())
    
# Plots the cumulative returns for a strategy versus the CAPM-implied returns
def plot_cum_returns(df, return_col_name, capm_beta):
    df[return_col_name + '_MIR'] = df['RF'] + capm_beta * df['Mkt-RF']
    
    dates = df_total['date'] // 100 + (df_total['date'] % 100) / 12
    strategy_cumulative = (df[return_col_name] / 100 + 1.0).cumprod()
    mir_cumulative = (df[return_col_name + '_MIR'] / 100 + 1.0).cumprod()

    plt.figure()
    plt.plot(dates, strategy_cumulative, label=f'{return_col_name} Portfolio Value')
    plt.plot(dates, mir_cumulative, label=f'CAPM-Implied Portfolio Value')

    plt.title(f'{return_col_name} Model Performance')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Portfolio Value')

    plt.legend()
    plt.show()

In [3]:
'''
Load CRSP data
'''

crsp = pd.read_csv('crsp_1926_2020.zip')

# Convert prices and returns to numeric and drop NaNs
crsp['PRC'] = pd.to_numeric(crsp['PRC'], errors='coerce')
crsp['RET'] = pd.to_numeric(crsp['RET'], errors='coerce')
crsp = crsp.dropna(subset=['PRC', 'RET'])

# Set types for relevant columns
crsp = crsp.astype({'date': 'string', 'SHRCD': 'int', 'EXCHCD': 'int'})

# [From HW2] Filter SHRCD and EXCHCD, remove negative prices
crsp = crsp[crsp['SHRCD'].isin([10, 11])]
crsp = crsp[crsp['EXCHCD'].isin([1, 2, 3])]
crsp = crsp[crsp['PRC'] >= 0]

# Reformat date column and add market value column
crsp['date'] = crsp['date'].str[:-3]
crsp['year'] = crsp['date'].str[:-3].astype('int')
crsp['MV'] = crsp['PRC'] * crsp['SHROUT']
crsp['RET'] *= 100
crsp

Unnamed: 0,PERMNO,date,SHRCD,EXCHCD,PRC,RET,SHROUT,year,MV
28,10001,1986-09,11,3,6.37500,-0.3077,991.0,1986,6.317625e+03
29,10001,1986-10,11,3,6.62500,3.9216,991.0,1986,6.565375e+03
30,10001,1986-11,11,3,7.00000,5.6604,991.0,1986,6.937000e+03
31,10001,1986-12,11,3,7.00000,1.5000,991.0,1986,6.937000e+03
32,10001,1987-01,11,3,6.75000,-3.5714,991.0,1987,6.689250e+03
...,...,...,...,...,...,...,...,...,...
4705164,93436,2020-08,11,3,498.32001,74.1452,931809.0,2020,4.643391e+08
4705165,93436,2020-09,11,3,429.01001,-13.9087,948000.0,2020,4.067015e+08
4705166,93436,2020-10,11,3,388.04001,-9.5499,947901.0,2020,3.678235e+08
4705167,93436,2020-11,11,3,567.59998,46.2736,947901.0,2020,5.380286e+08


In [45]:
'''
Load FF5 data
'''

ff5 = pd.read_csv('ff5_factors.csv')
ff5 = ff5.astype({'date': 'string'})
ff5['date'] = ff5['date'].apply(lambda x: x[:4] + '-' + x[4:])

In [5]:
'''
Load 100 Best Companies to Work for in America
'''

bcw = pd.read_csv('bcwlist_modified.csv')
bcw = bcw.dropna(subset=['permno'])

# Set types for relevant columns
bcw = bcw.astype({'rank': 'int', 'company': 'string', 'year': 'int'})
bcw.rename(columns={'permno': 'PERMNO'}, inplace=True)
bcw = bcw.sort_values(by=['year', 'rank'])
bcw

Unnamed: 0,rank,company,PERMNO,year
0,1,AT&T Bell Laboratories,66093.0,1984
1,2,Trammell Crow Company,85629.0,1984
2,3,Delta Airlines,26112.0,1984
3,4,Federal Express,60628.0,1984
4,5,Goldman Sachs,86868.0,1984
...,...,...,...,...
2486,87,AbbVie,13721.0,2020
2487,88,Encompass Home Health & Hospice,10693.0,2020
2493,94,Goldman Sachs,86868.0,2020
2498,99,Delta Airlines,91926.0,2020


## Problem 1

### Part A

We assume the same data cleaning of the CRSP dataset as homework 2: common shares (shrcd = 10 or 11) and NYSE, AMEX, or NASDAQ stocks (exchcd = 1, 2, or 3). this causes us to lose a couple hundred rows from bcw. We dropped rows with negative prices.

Assume that if no PERMNO then not publicly traded --> leave out

Use all best companies to work for firms at each release

Take firm listed and delisted as its min and max dates -- can improve this by grouping by permno and applying smart function over the list

calculate weights: equal or value weight firms based on January data

In [6]:
# Prepare bcw for merge by adding years between 1984 - 1993, 1993 - 1998
bcw['year_formed'] = bcw['year']

bcw_extra = []
for year_formed, gap_length in zip([1984, 1993], [1993 - 1984, 1998 - 1993]):
    to_increment = bcw[bcw['year_formed'] == year_formed]
    for increment in range(1, gap_length):
        for _, row in to_increment.iterrows():
            row['year'] += increment
            bcw_extra.append(row.to_dict())

bcw_extra = pd.DataFrame(bcw_extra, columns=bcw.columns)

bcw = pd.concat([bcw, bcw_extra], ignore_index=True)
bcw

Unnamed: 0,rank,company,PERMNO,year,year_formed
0,1,AT&T Bell Laboratories,66093.0,1984,1984
1,2,Trammell Crow Company,85629.0,1984,1984
2,3,Delta Airlines,26112.0,1984,1984
3,4,Federal Express,60628.0,1984,1984
4,5,Goldman Sachs,86868.0,1984,1984
...,...,...,...,...,...
2320,101,Viking Freight System,80814.0,1997,1993
2321,101,Wal-Mart Stores,55976.0,1997,1993
2322,101,Weyerhaeuser Company,39917.0,1997,1993
2323,101,Worthington Industries,83601.0,1997,1993


In [7]:
# Merge bcw and crsp
df = pd.merge(bcw, crsp, how='inner', on=['year', 'PERMNO'])

# Find dates where firms were listed and de-listed
min_max = df.groupby('PERMNO')['date'].agg(['min', 'max'])
special_dates = set(min_max['min'].tolist() + min_max['max'].tolist())
prev = df['date'][0]
for _, row in df.iloc[1:].iterrows():
    if row['date'] != prev:
        special_dates.add(row['date'])
    prev = row['date']

# Add rebalance column and shift down to the month it will apply
df['rebalance'] = (df['date'].str[-2:] == '01') & (df['year'] == df['year_formed']) | (df['date'].isin(special_dates))
df['rebalance'] = df.groupby('PERMNO')['rebalance'].shift(1)
df = df.dropna(subset='rebalance')

df = df.drop(['rank', 'SHRCD', 'EXCHCD', 'PRC', 'SHROUT'], axis=1)
df = df.sort_values(by=['date'])
df

Unnamed: 0,company,PERMNO,year,year_formed,date,RET,MV,rebalance
275,Doyle Dane Bernbach,30681.0,1984,1984,1984-02,-8.5714,9.702400e+04,True
670,Ryder System,27633.0,1984,1984,1984-02,-14.0230,1.048322e+06,True
479,Liebert Corporation,49411.0,1984,1984,1984-02,-13.4503,2.696930e+05,True
467,Levi Strauss & Company,52564.0,1984,1984,1984-02,-6.7568,1.445240e+06,True
71,Time,40483.0,1984,1984,1984-02,-9.5710,2.422500e+06,True
...,...,...,...,...,...,...,...,...
13288,Stryker,73139.0,2020,2020,2020-12,5.2571,9.208382e+07,True
13408,Adobe Systems,75510.0,2020,2020,2020-12,4.5248,2.395575e+08,True
13240,Hilton,14338.0,2020,2020,2020-12,7.3627,3.086864e+07,True
13384,Hyatt Hotels Corporation,93098.0,2020,2020,2020-12,3.1680,2.905922e+06,True


In [11]:
# Group by date and calculate weights
df_weights = df.groupby('date', group_keys=False).apply(calc_weights)

# Assert that calc_weights is returning weights when rebalance is needed only
assert len(df_weights[(df_weights['rebalance'] == True) & (np.isnan(df_weights['weights_eq']))]) == 0
assert len(df_weights[(df_weights['rebalance'] == True) & (np.isnan(df_weights['weights_val']))]) == 0

# Fill the NaNs returned from calc_weights when there are no rebalances
# using the weights for that PERMNO on the previous date
df_weights = df_weights.sort_values(['PERMNO', 'date'])
df_weights['weights_eq'] = df_weights['weights_eq'].fillna(method='ffill')
df_weights['weights_val'] = df_weights['weights_val'].fillna(method='ffill')

# Assert that weights add up to one for all dates
test1 = df_weights.groupby('date')['weights_eq'].sum()
test2 = df_weights.groupby('date')['weights_val'].sum()
assert test1.apply(lambda x: np.isclose(x, 1.0, atol=0.00001)).all()
assert test2.apply(lambda x: np.isclose(x, 1.0, atol=0.00001)).all()

df_weights = df_weights.drop(['year', 'MV'], axis=1)
df_weights = df_weights.sort_values(['date', 'PERMNO'])
df_weights

Unnamed: 0,company,PERMNO,year_formed,date,RET,rebalance,weights_eq,weights_val
167,Atlantic Richfield Company,10604.0,1984,1984-02,-0.5525,True,0.014706,0.040119
227,Dana Corporation,11607.0,1984,1984-02,-13.3739,True,0.014706,0.004965
287,Du Pont,11703.0,1984,1984-02,-3.3668,True,0.014706,0.040396
299,Eastman Kodak Company,11754.0,1984,1984-02,-5.8319,True,0.014706,0.040148
311,Exxon Corporation,11850.0,1984,1984-02,-1.1321,True,0.014706,0.116281
...,...,...,...,...,...,...,...,...
13276,Salesforce.com,90215.0,2020,2020-12,-9.4670,True,0.029412,0.084678
13636,Delta Airlines,91926.0,2020,2020-12,-0.0994,True,0.029412,0.010664
13432,T-Mobile US,91937.0,2020,2020-12,1.4367,True,0.029412,0.069641
13384,Hyatt Hotels Corporation,93098.0,2020,2020-12,3.1680,True,0.029412,0.001208


## Problem 2

### Part A

In [38]:
# Add returns
df_weights['weighted_eq_ret'] = df_weights['weights_eq'] * df_weights['RET']
df_weights['weighted_val_ret'] = df_weights['weights_val'] * df_weights['RET']

eq_returns = df_weights.groupby('date')['weighted_eq_ret'].sum()
val_returns = df_weights.groupby('date')['weighted_val_ret'].sum()

analyze(eq_returns, "Equal-weighted Best Companies")
print()
analyze(val_returns, "Value-weighted Best Companies")

Equal-weighted Best Companies monthly returns:
Mean = 1.2257387146792096%
Volatility = 5.451595203301768%
Sharpe Ratio = 0.2248403758842621

Value-weighted Best Companies monthly returns:
Mean = 1.5381304690577398%
Volatility = 5.210591060887331%
Sharpe Ratio = 0.29519308866963856


In [49]:
estimate_models(eq_returns, 'weighted_eq_ret', ff5)

CAPM
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.894
Model:                            OLS   Adj. R-squared:                  0.894
Method:                 Least Squares   F-statistic:                     3732.
Date:                Tue, 23 Apr 2024   Prob (F-statistic):          2.52e-217
Time:                        01:13:05   Log-Likelihood:                -882.61
No. Observations:                 443   AIC:                             1769.
Df Residuals:                     441   BIC:                             1777.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1032      0.086      1.206    

In [50]:
estimate_models(val_returns, 'weighted_val_ret', ff5)

CAPM
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.827
Model:                            OLS   Adj. R-squared:                  0.826
Method:                 Least Squares   F-statistic:                     2102.
Date:                Tue, 23 Apr 2024   Prob (F-statistic):          6.86e-170
Time:                        01:13:18   Log-Likelihood:                -971.40
No. Observations:                 443   AIC:                             1947.
Df Residuals:                     441   BIC:                             1955.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.4857      0.105      4.643    

The equal-weighted portfolio produces alphas that are positive but insignificant under all four models. In contrast, the value-weighted portfolio produces much larger, positive and significant alphas under all four models.