# BEM114 Homework 3 - Value of Intangibles
**Names:** Andrew Zabelo, Daniel Wen, Kyle McCandless  
**Student IDs:** 2176083, 2159859, 2157818

## Setup

### Imports and Helper Functions

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
from dateutil.relativedelta import relativedelta

import statsmodels.api as sm
import matplotlib.pyplot as plt

In [2]:
# Gets the next month from a date string
def get_next_month_str(d1):
    return (datetime.strptime(d1, "%Y-%m") + relativedelta(months=1)).strftime("%Y-%m")

def is_next_month(d1, d2):
    return d2 == get_next_month_str(d1)

# Briefly test is_next_month
assert is_next_month('1986-05', '1986-06')
assert is_next_month('2012-12', '2013-01')
assert is_next_month('1999-12', '2000-01')
assert not is_next_month('1986-04', '1986-06')
assert not is_next_month('1998-04', '1999-05')
assert not is_next_month('1984-01', '1986-10')

# Adds a final month of 0 returns any time a stock becomes de-listed
def add_padding_month(group):
    # Sorted list of dates in this group
    dates_list = sorted([''.join(str_list) for str_list in group['date'].agg(list)])
    
    # Collect de-listing dates
    dates_to_add = [group['date'].max()] 
    prev_date = dates_list[0]
    for date in dates_list[1:]:
        if not is_next_month(prev_date, date):
            dates_to_add.append(prev_date)
        prev_date = date
    
    # Add new padding rows for the month after each de-listing date
    new_rows = []
    for date in dates_to_add:
        new_row = pd.Series({'company': group['company'].iloc[0],
                             'PERMNO': group['PERMNO'].iloc[0],
                             'date': get_next_month_str(date),
                             'RET': 0.0,
                             'MV': 1.0})
        new_rows.append(new_row)
        
    return pd.concat([group, pd.DataFrame(new_rows)], ignore_index=True)

# Given a group of stocks, calculate equal-weighted and value-weighted weights
def calc_weights(group):
    if group['rebalance'].sum() > 0:
        # Calc equal weights
        group['weights_eq'] = 1 / float(group['PERMNO'].count())
        assert(group['PERMNO'].count() == group['PERMNO'].nunique())
        # Calc value weights
        group['weights_val'] = group['MV'] / group['MV'].sum()
        return group
    else:
        group['weights_eq'] = np.nan
        group['weights_val'] = np.nan
        return group

# Calculates returns and prints the returns mean, vol, and Sharpe ratio for a strategy
def analyze(returns, strat_name):
    strat_mean = returns.mean()
    strat_vol = returns.std()
    strat_sharpe = strat_mean / strat_vol
    print(f"{strat_name} monthly returns:\nMean = {strat_mean}%\nVolatility = {strat_vol}%\nSharpe Ratio = {strat_sharpe}")
    
# Estimates the CAPM and FF3 models on df_old using the returns found in ret_col_name
def estimate_models(df_old, return_col_name, ff5):
    df = pd.merge(df_old, ff5, how='inner', on=['date'])
    assert(len(df) == len(df_old))
    
    # Estimate CAPM
    print('CAPM')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF']])).fit().summary())
    
    # Estimate FF3
    print('FF3')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML']])).fit().summary())
    
    # Estimate Carhart
    print('Carhart')
    # print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'nuggets']])).fit().summary())
    
    # Estimate FF5
    print('FF5')
    print(sm.OLS(df[return_col_name] - df['RF'], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit().summary())
    
# Plots the cumulative returns for a strategy versus the CAPM-implied returns
def plot_cum_returns(df, return_col_name, capm_beta):
    df[return_col_name + '_MIR'] = df['RF'] + capm_beta * df['Mkt-RF']
    
    dates = df_total['date'] // 100 + (df_total['date'] % 100) / 12
    strategy_cumulative = (df[return_col_name] / 100 + 1.0).cumprod()
    mir_cumulative = (df[return_col_name + '_MIR'] / 100 + 1.0).cumprod()

    plt.figure()
    plt.plot(dates, strategy_cumulative, label=f'{return_col_name} Portfolio Value')
    plt.plot(dates, mir_cumulative, label=f'CAPM-Implied Portfolio Value')

    plt.title(f'{return_col_name} Model Performance')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Portfolio Value')

    plt.legend()
    plt.show()

### Process Dataframes

In [3]:
'''
Load CRSP data
'''

crsp = pd.read_csv('crsp_1926_2020.zip')

# Convert prices and returns to numeric and drop NaNs
crsp['PRC'] = pd.to_numeric(crsp['PRC'], errors='coerce')
crsp['RET'] = pd.to_numeric(crsp['RET'], errors='coerce')
crsp = crsp.dropna(subset=['PRC', 'RET'])

# Set types for relevant columns
crsp = crsp.astype({'date': 'string', 'SHRCD': 'int', 'EXCHCD': 'int'})

# [From HW2] Filter SHRCD and EXCHCD
crsp = crsp[crsp['SHRCD'].isin([10, 11])]
crsp = crsp[crsp['EXCHCD'].isin([1, 2, 3])]

# Reformat date column and add market value column
crsp['date'] = crsp['date'].str[:-3]
crsp['year'] = crsp['date'].str[:-3].astype('int')
crsp['MV'] = np.abs(crsp['PRC']) * crsp['SHROUT']
crsp['RET'] *= 100
crsp

Unnamed: 0,PERMNO,date,SHRCD,EXCHCD,PRC,RET,SHROUT,year,MV
2,10000,1986-02,10,3,-3.25000,-25.7143,3680.0,1986,1.196000e+04
3,10000,1986-03,10,3,-4.43750,36.5385,3680.0,1986,1.633000e+04
4,10000,1986-04,10,3,-4.00000,-9.8592,3793.0,1986,1.517200e+04
5,10000,1986-05,10,3,-3.10938,-22.2656,3793.0,1986,1.179388e+04
6,10000,1986-06,10,3,-3.09375,-0.5025,3793.0,1986,1.173459e+04
...,...,...,...,...,...,...,...,...,...
4705164,93436,2020-08,11,3,498.32001,74.1452,931809.0,2020,4.643391e+08
4705165,93436,2020-09,11,3,429.01001,-13.9087,948000.0,2020,4.067015e+08
4705166,93436,2020-10,11,3,388.04001,-9.5499,947901.0,2020,3.678235e+08
4705167,93436,2020-11,11,3,567.59998,46.2736,947901.0,2020,5.380286e+08


In [4]:
'''
Load FF5 data
'''

ff5 = pd.read_csv('ff5_factors.csv')
ff5 = ff5.astype({'date': 'string'})
ff5['date'] = ff5['date'].apply(lambda x: x[:4] + '-' + x[4:])

In [5]:
'''
Load 100 Best Companies to Work for in America
'''

bcw = pd.read_csv('bcwlist_modified.csv')
bcw = bcw.dropna(subset=['permno'])

# Set types for relevant columns
bcw = bcw.astype({'rank': 'int', 'company': 'string', 'year': 'int'})
bcw.rename(columns={'permno': 'PERMNO'}, inplace=True)
bcw = bcw.sort_values(by=['year', 'rank'])
bcw

Unnamed: 0,rank,company,PERMNO,year
0,1,AT&T Bell Laboratories,66093.0,1984
1,2,Trammell Crow Company,85629.0,1984
2,3,Delta Airlines,26112.0,1984
3,4,Federal Express,60628.0,1984
4,5,Goldman Sachs,86868.0,1984
...,...,...,...,...
2486,87,AbbVie,13721.0,2020
2487,88,Encompass Home Health & Hospice,10693.0,2020
2493,94,Goldman Sachs,86868.0,2020
2498,99,Delta Airlines,91926.0,2020


## Problem 1

### Part A - Process

**Data cleaning:**
* Filter CRSP to ordinary/common shares (SHRCD = 10 or 11) and NYSE, AMEX, and NASDAQ stocks (EXCHCD = 1, 2, or 3)]
* Do not filter out CRSP negative prices. We only need returns, and even if one month's return bid-ask midpoint estimate is slightly off, that won't affect the portfolio's performance at the end of the next month when the stock is traded
* Assume that if no BCW PERMNO then not publicly traded and remove from dataset
* Augment bcw dataframe with all years a portfolio will be active. For example, the portfolio formed in 1984 is active in 1985, 1986, ... 1992.

**Calculating weights:**
* Merge CRSP data with bcw dataframe on year, so that monthly price data for all stocks in an active best companies portfolio are present
* Create **rebalance column:** True if rebalancing based on stock prices that month, false otherwise. Rebalancing is true iff it is January the year the portfolio was formed or a stock is listed or delisted in that month.
* Apply calc_weights, which rebalances the entire portfolio if rebalancing == True, and if not returns NaN weights.
* After sorting by PERMNO and date Fill NaN weights with the first non-NaN weights above them, keeping the weights the same for months where we don't rebalance the portfolio. One stock's weights can never be filled with the weights of another since the earliest date of any stock always has rebalance == True
* Pad all stocks that are de-listed at any time(s) with an extra month of 0 returns
* Shift weights down by one date for each stock. For de-listing months, the previous month's weights occupy the padded month as desired, since the portfolio will carry the stock with 0 returns during the month it is de-listed, then be updated next month. For listing months, the weights from the first month the stock is present are shifted down to form the portfolio for the next month

**Calculating returns:**
* As in HW2, multiply the returns of each stock with its lagged weights, which are the weights the portfolio started that month with

In [6]:
# Prepare bcw for merge by adding years between 1984 - 1993, 1993 - 1998
bcw['year_formed'] = bcw['year']

bcw_extra = []
for year_formed, gap_length in zip([1984, 1993], [1993 - 1984, 1998 - 1993]):
    to_increment = bcw[bcw['year_formed'] == year_formed]
    for increment in range(1, gap_length):
        for _, row in to_increment.iterrows():
            row['year'] += increment
            bcw_extra.append(row.to_dict())

bcw_extra = pd.DataFrame(bcw_extra, columns=bcw.columns)
bcw = pd.concat([bcw, bcw_extra], ignore_index=True)
bcw

Unnamed: 0,rank,company,PERMNO,year,year_formed
0,1,AT&T Bell Laboratories,66093.0,1984,1984
1,2,Trammell Crow Company,85629.0,1984,1984
2,3,Delta Airlines,26112.0,1984,1984
3,4,Federal Express,60628.0,1984,1984
4,5,Goldman Sachs,86868.0,1984,1984
...,...,...,...,...,...
2320,101,Viking Freight System,80814.0,1997,1993
2321,101,Wal-Mart Stores,55976.0,1997,1993
2322,101,Weyerhaeuser Company,39917.0,1997,1993
2323,101,Worthington Industries,83601.0,1997,1993


In [7]:
# Merge bcw and crsp
df = pd.merge(bcw, crsp, how='inner', on=['year', 'PERMNO'])
df = df.sort_values(by=['PERMNO', 'date'])

# Find dates where firms were listed and de-listed
special_dates = set(df.groupby('PERMNO')['date'].min().tolist())
last_trade_dates = df.groupby('PERMNO')['date'].max().tolist()
for date in last_trade_dates:
    special_dates.add(get_next_month_str(date))

# Add to special dates if any firms are de-listed then listed again
prev_row = df.iloc[0]
for _, row in df.iloc[1:].iterrows():
    if (not is_next_month(prev_row['date'], row['date'])) and (prev_row['PERMNO'] == row['PERMNO']):
        special_dates.add(get_next_month_str(prev_row['date']))
        special_dates.add(row['date'])
    prev_row = row
    
# Add rebalance column
df['rebalance'] = ((df['date'].str[-2:] == '01') & (df['year'] == df['year_formed'])) | (df['date'].isin(special_dates))

df = df.drop(['rank', 'SHRCD', 'EXCHCD', 'PRC', 'SHROUT'], axis=1)
df = df.sort_values(by=['date'])
df

Unnamed: 0,company,PERMNO,year,year_formed,date,RET,MV,rebalance
562,Moog,61807.0,1984,1984,1984-01,-15.0000,8.582875e+04,True
394,Inland Steel Company,12458.0,1984,1984,1984-01,-3.2258,7.469400e+05,True
490,Liebert Corporation,49411.0,1984,1984,1984-01,1.7857,3.116048e+05,True
154,Armstrong,19692.0,1984,1984,1984-01,-5.4299,6.469856e+05,True
70,Time,40483.0,1984,1984,1984-01,-8.9417,2.671005e+06,True
...,...,...,...,...,...,...,...,...
13271,Hilton,14338.0,2020,2020,2020-12,7.3627,3.086864e+07,False
13379,CarMax,89508.0,2020,2020,2020-12,1.0483,1.540019e+07,False
13331,American Express,59176.0,2020,2020,2020-12,1.9563,9.735697e+07,False
13391,Capital One Financial,81055.0,2020,2020,2020-12,15.4250,4.521369e+07,False


In [8]:
# Group by date and calculate weights
df_weights = df.groupby('date', group_keys=False).apply(calc_weights)

# Assert that calc_weights is returning weights when rebalance is needed only
assert len(df_weights[(df_weights['rebalance'] == True) & (np.isnan(df_weights['weights_eq']))]) == 0
assert len(df_weights[(df_weights['rebalance'] == True) & (np.isnan(df_weights['weights_val']))]) == 0

# Fill the NaNs returned from calc_weights when there are no rebalances
# using the weights for that PERMNO on the previous date
df_weights = df_weights.sort_values(['PERMNO', 'date'])
df_weights['weights_eq'] = df_weights['weights_eq'].fillna(method='ffill')
df_weights['weights_val'] = df_weights['weights_val'].fillna(method='ffill')

# Pad all stocks with 1 extra month of 0 returns for weight shift
df_weights = df_weights.groupby('PERMNO').apply(add_padding_month).reset_index(drop=True)

# Shift weights
df_weights = df_weights.sort_values(['PERMNO', 'date'])
df_weights['weights_eq_lag'] = df_weights.groupby('PERMNO')['weights_eq'].shift(1)
df_weights['weights_val_lag'] = df_weights.groupby('PERMNO')['weights_val'].shift(1)
df_weights = df_weights.dropna(subset=['weights_eq_lag', 'weights_val_lag'])

# Assert that weights add up to one for all dates
test1 = df_weights.groupby('date')['weights_eq_lag'].sum()
test2 = df_weights.groupby('date')['weights_val_lag'].sum()
assert test1.apply(lambda x: np.isclose(x, 1.0, atol=0.00001)).all()
assert test2.apply(lambda x: np.isclose(x, 1.0, atol=0.00001)).all()

df_weights = df_weights.drop(['year', 'MV'], axis=1)
df_weights = df_weights.sort_values(['date', 'PERMNO'])
df_weights

Unnamed: 0,company,PERMNO,year_formed,date,RET,rebalance,weights_eq,weights_val,weights_eq_lag,weights_val_lag
402,Atlantic Richfield Company,10604.0,1984.0,1984-02,-0.5525,False,0.014085,0.038332,0.014085,0.038332
710,Dana Corporation,11607.0,1984.0,1984-02,-13.3739,False,0.014085,0.005508,0.014085,0.005508
893,Du Pont,11703.0,1984.0,1984-02,-3.3668,False,0.014085,0.040308,0.014085,0.040308
1062,Eastman Kodak Company,11754.0,1984.0,1984-02,-5.8319,False,0.014085,0.040958,0.014085,0.040958
1171,Exxon Corporation,11850.0,1984.0,1984-02,-1.1321,False,0.014085,0.114076,0.014085,0.114076
...,...,...,...,...,...,...,...,...,...,...
22660,Salesforce.com,90215.0,,2021-01,0.0000,,,,0.029412,0.085811
23068,Delta Airlines,91926.0,,2021-01,0.0000,,,,0.029412,0.019130
23105,T-Mobile US,91937.0,,2021-01,0.0000,,,,0.029412,0.036010
23305,Hyatt Hotels Corporation,93098.0,,2021-01,0.0000,,,,0.029412,0.001641


## Problem 2

### Part A

In [9]:
# Add returns
df_weights['weighted_eq_ret'] = df_weights['weights_eq_lag'] * df_weights['RET']
df_weights['weighted_val_ret'] = df_weights['weights_val_lag'] * df_weights['RET']

eq_returns = df_weights.groupby('date')['weighted_eq_ret'].sum()
val_returns = df_weights.groupby('date')['weighted_val_ret'].sum()

analyze(eq_returns, "Equal-weighted Best Companies")
print()
analyze(val_returns, "Value-weighted Best Companies")

Equal-weighted Best Companies monthly returns:
Mean = 1.220920902080068%
Volatility = 5.403471378171253%
Sharpe Ratio = 0.22595121110704866

Value-weighted Best Companies monthly returns:
Mean = 1.0753851390766656%
Volatility = 5.255787229671859%
Sharpe Ratio = 0.20460971726661895


### Part B

In [10]:
estimate_models(eq_returns, 'weighted_eq_ret', ff5)

CAPM
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.892
Model:                            OLS   Adj. R-squared:                  0.892
Method:                 Least Squares   F-statistic:                     3653.
Date:                Tue, 23 Apr 2024   Prob (F-statistic):          8.79e-216
Time:                        23:42:44   Log-Likelihood:                -885.39
No. Observations:                 444   AIC:                             1775.
Df Residuals:                     442   BIC:                             1783.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1085      0.086      1.267    

In [11]:
estimate_models(val_returns, 'weighted_val_ret', ff5)

CAPM
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.835
Model:                            OLS   Adj. R-squared:                  0.834
Method:                 Least Squares   F-statistic:                     2233.
Date:                Tue, 23 Apr 2024   Prob (F-statistic):          6.71e-175
Time:                        23:42:44   Log-Likelihood:                -966.89
No. Observations:                 444   AIC:                             1938.
Df Residuals:                     442   BIC:                             1946.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0137      0.103      0.133    

The equal-weighted and value-weighted portfolios produce alphas that are positive but insignificant under all four models.