In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize
from datetime import datetime
import os

In [3]:
hf_df = pd.read_excel("Data/hedge_funds_returns_data.xlsx")
factors_df = pd.read_excel("Data/factors_returns_data.xlsx")

In [29]:
short_mapping = {
    'Date': 'Date',
    'HFRI 400 (US) Fund Weighted Composite Index (HFRI4FWC)': 'HFRI4FWC',
    'HFRI 400 (US) EH: Long/Short Index (HFRI4ELS)': 'HFRI4ELS',
    'HFRI 400 (US) EH: Fundamental Value Index (HFRI4EHV)': 'HFRI4EHV',
    'HFRI 400 (US) Event-Driven Index (HFRI4ED)': 'HFRI4ED'
    
}

hf_df = hf_df.rename(columns=short_mapping)
hf_df.head()

Unnamed: 0,Date,HFRI4FWC,HFRI4ELS,HFRI4ED,HFRI4EHV
0,2005-01-31,0.0047,0.0067,0.0012,0.0049
1,2005-02-28,0.0198,0.0279,0.0111,0.0219
2,2005-03-31,-0.0103,-0.0172,0.0002,-0.0111
3,2005-04-29,-0.0124,-0.0165,-0.0105,-0.0137
4,2005-05-31,0.0079,0.0122,0.0064,0.0122


In [30]:
factors_df.tail()

Unnamed: 0,Date,Mkt-RF,SMB,HML,Mom,RF
216,2023-01-31,0.0665,0.0502,-0.0405,-0.1598,0.0035
217,2023-02-28,-0.0258,0.0121,-0.0078,0.0021,0.0034
218,2023-03-31,0.0251,-0.0559,-0.0901,-0.025,0.0036
219,2023-04-28,0.0061,-0.0334,-0.0003,0.0165,0.0035
220,2023-05-31,0.0035,0.0153,-0.078,-0.0063,0.0036


In [31]:
hf_df.tail()

Unnamed: 0,Date,HFRI4FWC,HFRI4ELS,HFRI4ED,HFRI4EHV
216,2023-01-31,0.0246,0.0436,0.0284,0.0499
217,2023-02-28,-0.0046,-0.0117,0.0019,-0.011
218,2023-03-31,-0.0137,-0.0015,-0.0205,-0.005
219,2023-04-28,0.0021,0.0008,0.0022,0.0019
220,2023-05-31,-0.0036,0.0007,-0.0189,-0.0067


In [32]:
def check_dataframe_info(df, name):
    print(f"\n=== {name} INFO ===")
    print(f"Shape: {df.shape}")
    print(f"Date range: {df.index.min()} to {df.index.max()}")
    print(f"Total NaN values: {df.isnull().sum().sum()}")
    print(f"Index is sorted: {df.index.is_monotonic_increasing}")
    print(f"Index type: {type(df.index)}")
    print("\nNaN values per column:")
    print(df.isnull().sum())
    print("\nFirst 2 rows:")
    print(df.head(2))
    print("\nLast 2 rows:")
    print(df.tail(2))

def check_alignment(df1, df2, name1, name2):
    print(f"\n=== ALIGNMENT CHECK ===")
    print(f"{name1} date range: {df1.index.min()} to {df1.index.max()}")
    print(f"{name2} date range: {df2.index.min()} to {df2.index.max()}")
    
    # Check common dates
    common_dates = df1.index.intersection(df2.index)
    print(f"\nCommon dates across all DataFrames: {len(common_dates)}")
    print(f"Common date range: {common_dates.min()} to {common_dates.max()}")

In [33]:
check_dataframe_info(hf_df, "HEDGE FUNDS")


=== HEDGE FUNDS INFO ===
Shape: (221, 5)
Date range: 0 to 220
Total NaN values: 0
Index is sorted: True
Index type: <class 'pandas.core.indexes.range.RangeIndex'>

NaN values per column:
Date        0
HFRI4FWC    0
HFRI4ELS    0
HFRI4ED     0
HFRI4EHV    0
dtype: int64

First 2 rows:
        Date  HFRI4FWC  HFRI4ELS  HFRI4ED  HFRI4EHV
0 2005-01-31    0.0047    0.0067   0.0012    0.0049
1 2005-02-28    0.0198    0.0279   0.0111    0.0219

Last 2 rows:
          Date  HFRI4FWC  HFRI4ELS  HFRI4ED  HFRI4EHV
219 2023-04-28    0.0021    0.0008   0.0022    0.0019
220 2023-05-31   -0.0036    0.0007  -0.0189   -0.0067


In [34]:
check_dataframe_info(factors_df, "FACTORS")


=== FACTORS INFO ===
Shape: (221, 6)
Date range: 0 to 220
Total NaN values: 0
Index is sorted: True
Index type: <class 'pandas.core.indexes.range.RangeIndex'>

NaN values per column:
Date      0
Mkt-RF    0
SMB       0
HML       0
Mom       0
RF        0
dtype: int64

First 2 rows:
        Date  Mkt-RF     SMB     HML  Mom         RF
0 2005-01-31 -0.0276 -0.0172  0.0206  0.0305  0.0016
1 2005-02-28  0.0189 -0.0057  0.0153  0.0337  0.0016

Last 2 rows:
          Date  Mkt-RF     SMB     HML  Mom         RF
219 2023-04-28  0.0061 -0.0334 -0.0003  0.0165  0.0035
220 2023-05-31  0.0035  0.0153 -0.0780 -0.0063  0.0036


In [35]:
check_alignment(hf_df,factors_df,  
                "HEDGE FUNDS", "FACTORS") 


=== ALIGNMENT CHECK ===
HEDGE FUNDS date range: 0 to 220
FACTORS date range: 0 to 220

Common dates across all DataFrames: 221
Common date range: 0 to 220


In [36]:
hf_df.describe()

Unnamed: 0,Date,HFRI4FWC,HFRI4ELS,HFRI4ED,HFRI4EHV
count,221,221.0,221.0,221.0,221.0
mean,2014-03-31 01:31:13.303167488,0.004832,0.005819,0.004905,0.005545
min,2005-01-31 00:00:00,-0.0654,-0.0917,-0.1421,-0.1142
25%,2009-08-31 00:00:00,-0.0041,-0.0093,-0.0035,-0.0097
50%,2014-03-31 00:00:00,0.006,0.008,0.008,0.009
75%,2018-10-31 00:00:00,0.0148,0.0219,0.0139,0.0209
max,2023-05-31 00:00:00,0.0554,0.0884,0.0765,0.111
std,,0.016772,0.026211,0.020738,0.027899


In [37]:
factors_df.describe()

Unnamed: 0,Date,Mkt-RF,SMB,HML,Mom,RF
count,221,221.0,221.0,221.0,221.0,221.0
mean,2014-03-31 01:31:13.303167488,0.007315,0.000215,-0.001251,0.000777,0.001027
min,2005-01-31 00:00:00,-0.1723,-0.0593,-0.1395,-0.343,0.0
25%,2009-08-31 00:00:00,-0.0176,-0.0182,-0.0185,-0.0179,0.0
50%,2014-03-31 00:00:00,0.0118,0.0008,-0.0033,0.0043,0.0001
75%,2018-10-31 00:00:00,0.0324,0.0148,0.0132,0.0256,0.0018
max,2023-05-31 00:00:00,0.1365,0.0734,0.1275,0.1275,0.0044
std,,0.045135,0.024461,0.032573,0.044931,0.001355


In [38]:
factors_df = factors_df.rename(columns={'Mom   ': 'Mom'})

In [39]:
def calculate_betas(hf_df, factors_df):
    """
    Calculate betas for each hedge fund against each factor
    """
    betas_dict = {}
    
    # Get hedge fund columns (excluding Date)
    hf_columns = [col for col in hf_df.columns if col != 'Date']
    
    for hf in hf_columns:
        # β₁ᵢ = Cov(RHFᵢ, RMkt-RF) / Var(RMkt-RF)
        beta1 = np.cov(hf_df[hf], factors_df['Mkt-RF'])[0,1] / np.var(factors_df['Mkt-RF'], ddof=1)
        
        # β₂ᵢ = Cov(RHFᵢ, RSMB) / Var(RSMB)
        beta2 = np.cov(hf_df[hf], factors_df['SMB'])[0,1] / np.var(factors_df['SMB'], ddof=1)
        
        # β₃ᵢ = Cov(RHFᵢ, RHML) / Var(RHML)
        beta3 = np.cov(hf_df[hf], factors_df['HML'])[0,1] / np.var(factors_df['HML'], ddof=1)
        
        # β₄ᵢ = Cov(RHFᵢ, RMom) / Var(RMom)
        beta4 = np.cov(hf_df[hf], factors_df['Mom'])[0,1] / np.var(factors_df['Mom'], ddof=1)
        
        # β₅ᵢ = Cov(RHFᵢ, RF) / Var(RF)
        beta5 = np.cov(hf_df[hf], factors_df['RF'])[0,1] / np.var(factors_df['RF'], ddof=1)
        
        betas_dict[hf] = {
            'beta_mkt': beta1,
            'beta_smb': beta2, 
            'beta_hml': beta3,
            'beta_mom': beta4,
            'beta_rf': beta5
        }
    
    return betas_dict

betas_dict = calculate_betas(hf_df, factors_df)

In [40]:
def calculate_quarterly_betas(hf_df, factors_df):
    """
    Calculate betas for each hedge fund against each factor on a rolling quarterly basis
    Each quarter will have exactly 3 monthly data points
    """
    # Convert Date column to datetime
    hf_df = hf_df.copy()
    factors_df = factors_df.copy()
    hf_df['Date'] = pd.to_datetime(hf_df['Date'])
    factors_df['Date'] = pd.to_datetime(factors_df['Date'])
    
    # Merge dataframes on Date
    merged_df = pd.merge(hf_df, factors_df, on='Date', how='inner')
    merged_df = merged_df.sort_values('Date')
    
    # Get hedge fund columns
    hf_columns = [col for col in hf_df.columns if col != 'Date']
    
    # Create quarterly groupings starting from 2012
    merged_df['Year'] = merged_df['Date'].dt.year
    merged_df['Month'] = merged_df['Date'].dt.month
    merged_df['Quarter'] = merged_df['Month'].apply(lambda x: f"Q{((x-1)//3)+1}")
    merged_df['YearQuarter'] = merged_df['Year'].astype(str) + ' ' + merged_df['Quarter']
    
    # Filter data from 2012 onwards
    merged_df = merged_df[merged_df['Year'] >= 2012]
    
    # Dictionary to store results
    quarterly_betas_dict = {}
    
    # Get unique quarters
    quarters = merged_df['YearQuarter'].unique()
    quarters = sorted(quarters)
    
    for hf in hf_columns:
        quarterly_results = []
        
        for quarter in quarters:
            # Get data for this specific quarter
            quarter_data = merged_df[merged_df['YearQuarter'] == quarter]
            
            # Skip if we don't have exactly 3 months or if any data is missing
            if len(quarter_data) != 3:
                continue
                
            # Check for missing values
            if quarter_data[hf].isna().any() or quarter_data[['Mkt-RF', 'SMB', 'HML', 'Mom', 'RF']].isna().any().any():
                continue
            
            try:
                # Calculate variances first and check if they're valid
                var_mkt = np.var(quarter_data['Mkt-RF'], ddof=1)
                var_smb = np.var(quarter_data['SMB'], ddof=1)
                var_hml = np.var(quarter_data['HML'], ddof=1)
                var_mom = np.var(quarter_data['Mom'], ddof=1)
                var_rf = np.var(quarter_data['RF'], ddof=1)
                
                # Calculate betas only if variance is not zero or NaN
                beta_mkt = np.cov(quarter_data[hf], quarter_data['Mkt-RF'])[0,1] / var_mkt if var_mkt != 0 and not np.isnan(var_mkt) else np.nan
                beta_smb = np.cov(quarter_data[hf], quarter_data['SMB'])[0,1] / var_smb if var_smb != 0 and not np.isnan(var_smb) else np.nan
                beta_hml = np.cov(quarter_data[hf], quarter_data['HML'])[0,1] / var_hml if var_hml != 0 and not np.isnan(var_hml) else np.nan
                beta_mom = np.cov(quarter_data[hf], quarter_data['Mom'])[0,1] / var_mom if var_mom != 0 and not np.isnan(var_mom) else np.nan
                beta_rf = np.cov(quarter_data[hf], quarter_data['RF'])[0,1] / var_rf if var_rf != 0 and not np.isnan(var_rf) else np.nan
                
                quarterly_results.append({
                    'Quarter': quarter,
                    'β_Mkt-RF': beta_mkt,
                    'β_SMB': beta_smb,
                    'β_HML': beta_hml,
                    'β_Mom': beta_mom,
                    'β_RF': beta_rf
                })
                
            except:
                continue
        
        # Convert to DataFrame
        if quarterly_results:
            quarterly_betas_dict[hf] = pd.DataFrame(quarterly_results)
    
    return quarterly_betas_dict

quarterly_betas_dict = calculate_quarterly_betas(hf_df, factors_df)

In [41]:
hf_df.columns

Index(['Date', 'HFRI4FWC', 'HFRI4ELS', 'HFRI4ED', 'HFRI4EHV'], dtype='object')

In [42]:
quarterly_betas_dict

{'HFRI4FWC':     Quarter  β_Mkt-RF     β_SMB     β_HML     β_Mom        β_RF
 0   2012 Q1  1.251557  0.214303 -0.981955 -0.186317         NaN
 1   2012 Q2  0.226077  0.446721  0.983295 -0.271952 -208.500000
 2   2012 Q3 -0.160003 -0.097370 -0.180802  0.075068  -32.000000
 3   2012 Q4  0.532527  0.662602 -0.011389 -0.400221         NaN
 4   2013 Q1  0.500617  0.658718  1.783607 -0.560506         NaN
 5   2013 Q2  0.557910 -0.293301  0.474146 -0.443491         NaN
 6   2013 Q3  0.276035  0.922707  0.603667  0.792452         NaN
 7   2013 Q4  0.388637 -0.144570  0.421262 -0.368217         NaN
 8   2014 Q1  0.278185  0.257333 -0.083052  0.216134         NaN
 9   2014 Q2  0.587732  0.174254 -0.880819  0.330776         NaN
 10  2014 Q3  0.247846  0.354003 -0.029592  1.658056         NaN
 11  2014 Q4  0.187737 -0.232255 -0.152957  0.383914         NaN
 12  2015 Q1  0.141804 -0.007250  0.095204 -0.193137         NaN
 13  2015 Q2  0.934372 -0.318610  0.262911 -0.042477         NaN
 14  2015 Q3 

In [43]:
# Remove β_RF column from all hedge funds in quarterly_betas_dict
for hf_name in quarterly_betas_dict.keys():
    if 'β_RF' in quarterly_betas_dict[hf_name].columns:
        quarterly_betas_dict[hf_name] = quarterly_betas_dict[hf_name].drop('β_RF', axis=1)

In [44]:
base_path = '/Users/jatindhurve/Desktop/PROJECTS/main_HF_project/all_output_results'  # Change this to your preferred directory
output_folder = os.path.join(base_path, 'All_Hedge_Fund_Betas')

os.makedirs(output_folder, exist_ok=True)

print(f"Directory created at: {output_folder}")

Directory created at: /Users/jatindhurve/Desktop/PROJECTS/main_HF_project/all_output_results/All_Hedge_Fund_Betas


### **HFRI 400 (US) Fund Weighted Composite Index (HFRI4FWC)**

In [45]:
# Example: If your hedge fund is named 'Fund_A'
hf_betas_HFRI4FWC = pd.DataFrame(quarterly_betas_dict['HFRI4FWC'])
hf_betas_HFRI4FWC.to_excel(os.path.join(output_folder, 'HFRI4FWC_betas.xlsx'), index=False)
hf_betas_HFRI4FWC.head()

Unnamed: 0,Quarter,β_Mkt-RF,β_SMB,β_HML,β_Mom
0,2012 Q1,1.251557,0.214303,-0.981955,-0.186317
1,2012 Q2,0.226077,0.446721,0.983295,-0.271952
2,2012 Q3,-0.160003,-0.09737,-0.180802,0.075068
3,2012 Q4,0.532527,0.662602,-0.011389,-0.400221
4,2013 Q1,0.500617,0.658718,1.783607,-0.560506


### **HFRI 400 (US) EH: Long/Short Index (HFRI4ELS)**

In [46]:
hf_betas_HFRI4ELS = pd.DataFrame(quarterly_betas_dict['HFRI4ELS'])
hf_betas_HFRI4ELS.to_excel(os.path.join(output_folder, 'HFRI4ELS_betas.xlsx'), index=False)
hf_betas_HFRI4ELS.head()

Unnamed: 0,Quarter,β_Mkt-RF,β_SMB,β_HML,β_Mom
0,2012 Q1,1.828523,0.297726,-1.422139,-0.26836
1,2012 Q2,0.524261,1.547814,2.421659,-0.646844
2,2012 Q3,0.607259,0.328087,0.794426,-0.176866
3,2012 Q4,0.532592,0.679355,0.030461,-0.453772
4,2013 Q1,0.693357,0.700659,2.684543,-0.835919


### **HFRI 400 (US) Event-Driven Index (HFRI4ED)**

In [47]:
hf_betas_HFRI4ED = pd.DataFrame(quarterly_betas_dict['HFRI4ED'])
hf_betas_HFRI4ED.to_excel(os.path.join(output_folder, 'HFRI4ED_betas.xlsx'), index=False)
hf_betas_HFRI4ED.head()

Unnamed: 0,Quarter,β_Mkt-RF,β_SMB,β_HML,β_Mom
0,2012 Q1,0.688885,0.23493,-0.635595,-0.131847
1,2012 Q2,0.262827,0.845628,1.233295,-0.326485
2,2012 Q3,0.357375,0.2023,0.443457,-0.128111
3,2012 Q4,0.349625,0.494175,0.141019,-0.452593
4,2013 Q1,0.40014,0.511337,1.440984,-0.452282


### **HFRI 400 (US) EH: Fundamental Value Index (HFRI4EHV)'**

In [48]:
hf_betas_HFRI4EHV = pd.DataFrame(quarterly_betas_dict['HFRI4EHV'])
hf_betas_HFRI4EHV.to_excel(os.path.join(output_folder, 'HFRI4EHV_betas.xlsx'), index=False)
hf_betas_HFRI4EHV.head()

Unnamed: 0,Quarter,β_Mkt-RF,β_SMB,β_HML,β_Mom
0,2012 Q1,1.655158,0.314553,-1.323935,-0.254199
1,2012 Q2,0.600359,2.445355,2.959101,-0.76203
2,2012 Q3,0.84989,0.477056,1.065159,-0.294133
3,2012 Q4,0.597488,0.772541,0.060303,-0.542467
4,2013 Q1,0.806175,1.102312,2.830211,-0.890918
