In [1]:
import pandas as pd
import numpy as np
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

In [2]:
all_data = pd.read_excel('Fuhrer and Hock data.xlsx', index_col='Date')

2013 Allaj

In [30]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

def step1_time_series_regression(df, market_col):
    """
    Perform time-series regression of excess returns on excess market returns.
    """
    market_returns = df[market_col].values.reshape(-1, 1)
    betas = {}
    intercepts = {}
    for asset in df.columns:
        if asset != market_col:
            model = LinearRegression()
            model.fit(market_returns, df[asset].values)
            betas[asset] = model.coef_[0]
            intercepts[asset] = model.intercept_
    return betas, intercepts

def step2_demeaning_and_residuals(df, betas, intercepts, market_col):
    """
    Demean the residuals and regress on demeaned excess market returns.
    """
    market_returns = df[market_col].values
    residuals = df.drop(columns=[market_col]).copy()
    for asset in residuals.columns:
        residuals[asset] = df[asset] - (intercepts[asset] + betas[asset] * market_returns)
    
    demeaned_market = market_returns - market_returns.mean()
    demeaned_residuals = residuals - residuals.mean()
    return demeaned_market, demeaned_residuals.values

def step3_cross_sectional_regression(demeaned_market, demeaned_residuals):
    """
    Perform cross-sectional regression of average excess returns on coefficient estimates.
    """
    avg_excess_returns = demeaned_residuals.mean(axis=0)
    demeaned_market_mean = np.mean(demeaned_market).reshape(-1, 1)
    demeaned_market_mean = np.full_like(avg_excess_returns, demeaned_market_mean)  # Ensure the correct shape
    model = LinearRegression()
    model.fit(demeaned_market_mean.reshape(-1, 1), avg_excess_returns)
    return model.intercept_, model.coef_[0]

def step4_quadratic_form(df, market_col, intercept, slope, demeaned_market, demeaned_residuals):
    """
    Estimate tau using the quadratic form.
    """
    T = len(df)
    n = df.shape[1] - 1
    avg_excess_returns = df.drop(columns=[market_col]).mean(axis=0)
    residual_cov_matrix = np.cov(demeaned_residuals, rowvar=False)
    diff = avg_excess_returns - slope * np.mean(demeaned_market)
    tau = (T / n) * diff.T @ np.linalg.inv(residual_cov_matrix) @ diff
    return tau

def allaj_2013(df, market_col):
    """
    Main function to estimate tau parameter based on Allaj 2013 method.
    """
    betas, intercepts = step1_time_series_regression(df, market_col)
    demeaned_market, demeaned_residuals = step2_demeaning_and_residuals(df, betas, intercepts, market_col)
    intercept, slope = step3_cross_sectional_regression(demeaned_market, demeaned_residuals)
    tau = step4_quadratic_form(df, market_col, intercept, slope, demeaned_market, demeaned_residuals)
    return tau

# Example usage:
# Assuming `all_data` is your pandas dataframe with excess daily returns
# and "MSCI Europe" is the column name of the market reference.
# tau_estimate = allaj_2013(all_data, "MSCI Europe")


In [33]:
step1_time_series_regression(all_data, "MSCI Europe")[0]

{'MSCI Austria': 1.3602289402647754,
 'MSCI Belgium': 0.9580881449485984,
 'MSCI Denmark': 0.8468063377032629,
 'MSCI Finland': 1.0119400748984748,
 'MSCI France': 1.1209306390642257,
 'MSCI Germany': 1.1300653129699272,
 'MSCI Ireland': 0.8971847510794795,
 'MSCI Italy': 1.215845962550725,
 'MSCI Netherlands': 1.0059292958268455,
 'MSCI Norway': 1.0436601065675963,
 'MSCI Portugal': 0.9711175493846661,
 'MSCI Spain': 1.1374568916033192,
 'MSCI Sweden': 1.044260115489641,
 'MSCI Switzerland': 0.8206955028393137,
 'MSCI UK': 0.9234870995248411}

In [31]:
allaj_2013_tau = allaj_2013(all_data, "MSCI Europe")
allaj_2013_tau

2372.5034048523517

In [None]:
Fuhrer and Hock - 2023

In [None]:


# Load your data into a DataFrame
all_data = pd.read_excel('Fuhrer and Hock data.xlsx', index_col='Date')

# Step 1: Estimate the Error Covariance Matrix
def step_1(all_data):
    market_mean = all_data['MSCI Europe'].mean()
    residuals = {}
    
    for column in all_data.columns:
        if column != 'MSCI Europe':
            asset_mean = all_data[column].mean()
            adjusted_market_returns = all_data['MSCI Europe'] - market_mean
            adjusted_asset_returns = all_data[column] - asset_mean
            
            model = LinearRegression().fit(adjusted_market_returns.values.reshape(-1, 1), adjusted_asset_returns.values.flatten())
            predicted_returns = model.predict(adjusted_market_returns.values.reshape(-1, 1))
            residuals[column] = adjusted_asset_returns - predicted_returns
    
    residuals_df = pd.DataFrame(residuals)
    error_cov_matrix = residuals_df.cov()
    return error_cov_matrix, residuals_df

# Step 2: Stratification for Consistent Estimation
def step_2(residuals_df, num_constituents):
    T = residuals_df.shape[0]
    pooled_var = residuals_df.var(ddof=1)
    
    demeaned_vars = []
    for column in residuals_df.columns:
        residuals = residuals_df[column]
        time_indices = np.arange(T)
        demeaned_model = LinearRegression().fit(time_indices.reshape(-1, 1), residuals.values)
        demeaned_residuals = residuals - demeaned_model.predict(time_indices.reshape(-1, 1))
        demeaned_var = np.var(demeaned_residuals, ddof=1)
        demeaned_vars.append(demeaned_var)
    
    demeaned_var = np.array(demeaned_vars)
    
    # Adjust stratified estimates based on the number of constituents
    stratified_estimates = (pooled_var - demeaned_var) / (T * np.array(num_constituents))
    
    return np.diag(stratified_estimates), num_constituents

# Step 3: Obtain Beta and Other Parameters
def step_3(all_data, error_cov_matrix):
    market_mean = all_data['MSCI Europe'].mean()
    market_returns = all_data['MSCI Europe'] - market_mean
    asset_returns = all_data.drop(columns=['MSCI Europe'])

    betas = {}
    for column in asset_returns.columns:
        asset_mean = all_data[column].mean()
        adjusted_asset_returns = all_data[column] - asset_mean
        
        beta = np.linalg.lstsq(market_returns.values.reshape(-1, 1), adjusted_asset_returns.values.flatten(), rcond=None)[0]
        betas[column] = beta[0]
    
    beta_vector = np.array(list(betas.values())).reshape(-1, 1)
    sigma_m = np.var(market_returns, ddof=1)
    sigma = beta_vector.dot(beta_vector.T) * sigma_m + error_cov_matrix
    
    return beta_vector, sigma_m, sigma

# Step 4: Cross-Sectional Regression to Estimate Sigma_m
def step_4(all_data, beta_vector):
    market_mean = all_data['MSCI Europe'].mean()
    market_returns = all_data['MSCI Europe'] - market_mean
    
    sigma_m_hat = np.var(market_returns, ddof=1)
    
    return sigma_m_hat

# Step 5: Calculate Tau as a Vector
def calculate_tau(sigma_m_hat, sigma_m, stratified_estimates, num_constituents):
    tau_vector = (stratified_estimates.diagonal() * (sigma_m_hat / sigma_m)) / np.array(num_constituents)
    return tau_vector

# Provided list of number of constituents
num_constituents = [5, 11, 17, 11, 68, 48, 5, 19, 15, 9, 4, 16, 34, 39, 83]

# Step 1
error_cov_matrix, residuals_df = step_1(all_data)

# Step 2
stratified_estimates, num_constituents = step_2(residuals_df, num_constituents)

# Step 3
beta_vector, sigma_m, sigma = step_3(all_data, error_cov_matrix)

# Step 4
sigma_m_hat = step_4(all_data, beta_vector)

# Step 5
tau_vector = calculate_tau(sigma_m_hat, sigma_m, stratified_estimates, num_constituents)

# Creating Table 4
asset_names = all_data.columns.drop('MSCI Europe')
num_assets = len(asset_names)

table_4 = pd.DataFrame({
    'Asset': asset_names,
    'Number of Constituents': num_constituents,
    'Beta': beta_vector.flatten(),
    'Sigma_m': [sigma_m] * num_assets,
    'Sigma': [sigma.iloc[i, i] for i in range(num_assets)],
    'Tau': tau_vector,
    'Stratified Estimates': stratified_estimates.diagonal()
})

# Display the table
print(table_4)


In [None]:
# Step 1
error_cov_matrix, residuals_df = step_1(all_data)

# Step 2
stratified_estimates, n_assets = step_2(residuals_df)

# Step 3
beta_vector, sigma_m, sigma = step_3(all_data, error_cov_matrix)

# Step 4
sigma_m_hat = step_4(all_data, beta_vector)

# Step 5: Calculate Tau as a Vector
tau_vector = calculate_tau(sigma_m_hat, sigma_m, stratified_estimates, n_assets)

In [None]:
sigma.iloc[0,0]

In [None]:
asset_names = all_data.columns.drop('MSCI Europe')

# Ensure the lengths of all arrays are the same
num_assets = len(asset_names)

# Create a DataFrame for Table 4
table_4 = pd.DataFrame({
    'Asset': asset_names,
    #'Beta': beta_vector.flatten(),
    #'Sigma_m': [sigma_m] * num_assets,
    #'Sigma': [sigma.iloc[i, i] for i in range(num_assets)],
    #'Tau': tau_vector.flatten(),
    'Stratified Estimates': [stratified_estimates[i, i] for i in range(num_assets)]
})

# Display the table
print(table_4)