<a href="https://colab.research.google.com/github/Barak3ttt/asset_pricing_assignment/blob/main/Q2%20Asset%20pricing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# a - GRS (Time-series)


In [5]:
# THIS CODE GETS THE DATA FROM THE WEBISTE DIRECTLY. IN THE PYTHON CODE ON VISUAL APP I HAVE THE CODE THAT
# GETS IT FROM THE DATA FILE I DOWNLOADED ON THE COMPUTER
import pandas as pd
import numpy as np
import io
import requests
import zipfile

# Define the mandatory sample period
START = '1963-07'
END = '2025-06'

def load_french_web_zip(url, skip_rows, n_rows):
    # Download the zip file from the web
    response = requests.get(url)
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        # Each .zip contains one .csv with the same base name
        csv_name = z.namelist()[0]
        with z.open(csv_name) as f:
            df = pd.read_csv(f, skiprows=skip_rows, nrows=n_rows, index_col=0)

    # Convert index to PeriodIndex for easy filtering
    # We strip whitespace because some CSVs have spaces in the date strings
    df.index = pd.to_datetime(df.index.astype(str).str.strip(), format='%Y%m').to_period('M')

    # Filter for the specific assignment sample
    df = df.loc[START:END]

    # Replace Kenneth French's missing value codes with NaN
    df = df.replace([-99.99, -99.9, -999], np.nan)
    return df

# --- LOAD TEST ASSETS (25 Portfolios) ---
portfolios = load_french_web_zip(
    'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/25_Portfolios_5x5_CSV.zip',
    skip_rows=15, n_rows=1170
)

# --- LOAD FACTORS ---
# 1. FF 5-Factors
factors_5 = load_french_web_zip(
    'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_CSV.zip',
    skip_rows=3, n_rows=730
)

# 2. FF 3-Factors
factors_3 = load_french_web_zip(
    'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip',
    skip_rows=3, n_rows=1170
)

# 3. Momentum Factor
mom = load_french_web_zip(
    'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_CSV.zip',
    skip_rows=13, n_rows=1170
)

# 4. Short-Term (ST) Reversal Factor
rev = load_french_web_zip(
    'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_ST_Reversal_Factor_CSV.zip',
    skip_rows=13, n_rows=1170
)

# Extract the Risk-Free rate
rf = factors_3['RF']

# Calculate excess returns for all 25 portfolios
portfolios_ex = portfolios.subtract(rf, axis=0)

# --- PREPARE THE THREE MODELS ---

# Model (i): FF 5-Factor
model_1_factors = factors_5[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]

# Model (ii): FF 3-Factor + Momentum
model_2_factors = pd.concat([factors_3[['Mkt-RF', 'SMB', 'HML']], mom], axis=1)

# Model (iii): FF 3-Factor + ST Reversal
model_3_factors = pd.concat([factors_3[['Mkt-RF', 'SMB', 'HML']], rev], axis=1)

print("Data successfully downloaded and prepared for the period:", START, "to", END)

#GRS calculation


import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import f

def calculate_grs(portfolios_ex, factors):
    T, N = portfolios_ex.shape
    K = factors.shape[1]

    # 1. Regressions to get alphas and residuals
    X = sm.add_constant(factors)
    alphas_list = []
    residuals_list = []
    for col in portfolios_ex.columns:
        model = sm.OLS(portfolios_ex[col], X).fit()
        alphas_list.append(model.params['const'])
        residuals_list.append(model.resid)

    # Convert to matrices for linear algebra
    # alphas: (N x 1), resid_mat: (T x N)
    alphas = np.array(alphas_list).reshape(-1, 1)
    resid_mat = np.array(residuals_list).T

    # 2. Factor Covariance (Omega) with Bessel Correction (T-1)
    f_bar = np.mean(factors, axis=0).values.reshape(-1, 1)
    f_dev = factors - f_bar.T
    Omega = (f_dev.T @ f_dev) / (T - 1)

    # 3. Residual Covariance (Sigma) with Bessel Correction (T-1)
    Sigma = (resid_mat.T @ resid_mat) / (T -  1)


    # 4. Split GRS into 3 terms
    Omega_inv = np.linalg.inv(Omega)
    Sigma_inv = np.linalg.inv(Sigma)

    # Term 1: The finite-sample multiplier
    term1 = (T - N - K) / N

    # Term 2: (1 + mu' * Omega^-1 * mu)
    term2 = 1 + (f_bar.T @ Omega_inv @ f_bar).item()

    # Term 3:
    term3 = (alphas.T @ Sigma_inv @ alphas).item()

    # Final GRS Calculation
    grs_stat = term1 * (1 / term2) * term3

    # P-value calculation (Degrees of freedom: N and T-N-K)
    p_val = 1 - f.cdf(grs_stat, N, T - N - K)

    return grs_stat, p_val

def align_data(df1, df2):
    """
    Aligns two dataframes on their index, keeping only rows
    where indices match in both (inner join).
    """
    # 'inner' join keeps only the intersection of dates
    df1_aligned, df2_aligned = df1.align(df2, join='inner', axis=0)
    return df1_aligned, df2_aligned

    # --- 1. Define the models with the raw data ---
# We keep them in a dictionary, but we will align/clean them INSIDE the loop
m_core = factors_5[['Mkt-RF', 'SMB', 'HML']]

models_to_test = {
    "Model (i): FF5": factors_5[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']],
    "Model (ii): FF3 + Mom": pd.concat([m_core, mom], axis=1),
    "Model (iii): FF3 + Rev": pd.concat([m_core, rev], axis=1)
}

# --- 2. Run the loop and collect results ---
summary_results = []

for name, factor_df in models_to_test.items():
    # CRITICAL FIX: We drop NaNs from the factor_df FIRST
    # because concat([ff3, mom]) might create NaNs if one file is longer
    f_clean_pre = factor_df.dropna()

    # Now align the portfolios with these specific factors
    p_clean, f_clean = align_data(portfolios_ex, f_clean_pre)

    # Double Check: Ensure indices are identical to satisfy statsmodels
    # This prevents the "indices are not aligned" error
    f_clean = f_clean.loc[p_clean.index]

    # Call your calculate_grs function
    grs_stat, p_val = calculate_grs(p_clean, f_clean)

    # Store results
    summary_results.append({
        "Model": name,
        "GRS Statistic": round(grs_stat, 2),
        "p-value": f"{p_val:.4e}",
        "T (Months)": len(p_clean)
    })

# --- 3. Print the Final Table ---
summary_df = pd.DataFrame(summary_results).set_index("Model")

print("\n" + "="*60)
print("TABLE 6: GRS TEST RESULTS")
print("="*60)
print(summary_df)
print("="*60)
print("Note: All models are rejected at the 5% level (p < 0.05).")

Data successfully downloaded and prepared for the period: 1963-07 to 2025-06

TABLE 6: GRS TEST RESULTS
                        GRS Statistic     p-value  T (Months)
Model                                                        
Model (i): FF5                   3.14  5.6961e-07         726
Model (ii): FF3 + Mom            3.37  8.2643e-08         726
Model (iii): FF3 + Rev           3.88  1.2239e-09         721
Note: All models are rejected at the 5% level (p < 0.05).


# c - Fama Macbeth (Cross-sectional)

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

def run_fama_macbeth_process(port_ex, factor_df):
    # --- 0. CLEANING & ALIGNMENT ---
    # Combine into one temporary dataframe to drop ANY row with a NaN in any column
    combined = pd.concat([port_ex, factor_df], axis=1).dropna()

    # Split them back out
    p = combined[port_ex.columns].copy()
    f = combined[factor_df.columns].copy()

    T, N = p.shape

    # --- STEP 1: TIME-SERIES (Find Betas) ---
    betas = []
    X_ts = sm.add_constant(f)
    for col in p.columns:
        # We use the cleaned f and p, so no NaNs here
        ts_model = sm.OLS(p[col], X_ts).fit()
        betas.append(ts_model.params.drop('const'))

    beta_df = pd.DataFrame(betas, index=p.columns)

    # --- STEP 2: CROSS-SECTIONAL (The T Regressions) ---
    gammas = []
    X_cs = sm.add_constant(beta_df)

    # Check if betas contain NaNs (happens if a portfolio has no data)
    if X_cs.isnull().values.any():
        X_cs = X_cs.dropna()
        print("Warning: Dropped portfolios with missing betas.")

    for t in p.index:
        y_t = p.loc[t]
        y_t_aligned = y_t.reindex(X_cs.index)

        # Cross-sectional regression
        cs_model = sm.OLS(y_t_aligned, X_cs).fit()
        gammas.append(cs_model.params)

    gamma_df = pd.DataFrame(gammas, index=p.index)

    # --- STEP 3: STATS ---
    risk_premia = gamma_df.mean()
    t_stats = gamma_df.mean() / (gamma_df.std() / np.sqrt(T))

    # Pricing Errors
    avg_ret = p.mean()
    predicted_ret = X_cs @ risk_premia
    pricing_errors = avg_ret - predicted_ret
    mape = pricing_errors.abs().mean()

    return risk_premia, t_stats, mape, T

# --- RUNNING FOR ALL MODELS ---

model_configs = {
    "Model (i): FF5": model_1_factors,
    "Model (ii): FF3 + Mom": model_2_factors,
    "Model (iii): FF3 + ST Rev": model_3_factors
}

total_count = 0

for name, feat in model_configs.items():
    # Execute
    premia, tvals, mape, T_sub = run_fama_macbeth_process(portfolios_ex, feat)
    total_count += T_sub

    print(f"\n{name}")
    print(f"Valid months (T): {T_sub}")
    print(f"Mean Absolute Pricing Error (MAPE): {mape:.5f}")

    results_df = pd.DataFrame({'Premium': premia, 't-stat': tvals})
    print(results_df)
    print("-" * 50)

print(f"\nTOTAL CROSS-SECTIONAL REGRESSIONS RUN: {total_count}")


Model (i): FF5
Valid months (T): 726
Mean Absolute Pricing Error (MAPE): 0.07028
         Premium    t-stat
const   0.875092  3.220676
Mkt-RF -0.339421 -1.059880
SMB     0.269106  2.344996
HML     0.279094  2.476850
RMW     0.486938  2.928065
CMA     0.010034  0.059786
--------------------------------------------------

Model (ii): FF3 + Mom
Valid months (T): 726
Mean Absolute Pricing Error (MAPE): 0.07641
         Premium    t-stat
const   0.668063  2.365283
Mkt-RF -0.035963 -0.108653
SMB     0.116083  1.004695
HML     0.365585  3.229831
Mom     2.332936  3.599320
--------------------------------------------------

Model (iii): FF3 + ST Rev
Valid months (T): 721
Mean Absolute Pricing Error (MAPE): 0.08418
         Premium    t-stat
const   1.215255  4.547699
Mkt-RF -0.611919 -1.938794
SMB     0.135394  1.169368
HML     0.334606  2.957371
ST_Rev -1.191753 -2.411661
--------------------------------------------------

TOTAL CROSS-SECTIONAL REGRESSIONS RUN: 2173
