# **FOUNDAMENTAL APPROACH - COCHRANE (1992) VAR MODEL EXTENSION**

### **1) REQUIREMENTS SETUP**

In [122]:
# !pip install -r requirements.txt

In [123]:
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR

### **2) MONTHLY DATA ANALYSIS**

##### **2.1) VARIABLES CONSTRUCTION**

In [124]:
# Log Risk Free Rate
rf_df = pd.read_csv("../data_extraction/raw_df/risk_free_monthly_df.csv")
rf_df = rf_df.rename(columns={"1-month Yield - US Treasury Securities":"Rf_t"})
rf_df["Date"] = pd.to_datetime(rf_df["Date"])

# Data Cleaning for NaN, -inf, 0.0 
rf_df["Rf_t"] = pd.to_numeric(rf_df["Rf_t"], errors="coerce") 
rf_df["Rf_t"] = rf_df["Rf_t"].replace([0, 0.0, -np.inf, np.inf], np.nan)

# (!!!) Either forward or backward filling
rf_df["Rf_t"] = rf_df["Rf_t"].ffill().bfill()

rf_df["rf_t"] = np.log(rf_df["Rf_t"])

rf_df.tail()

Unnamed: 0,Date,Rf_t,rf_t
282,2025-07-01,4.32,1.463255
283,2025-08-01,4.49,1.501853
284,2025-09-01,4.41,1.483875
285,2025-10-01,4.17,1.427916
286,2025-11-01,4.06,1.401183


In [125]:
# Log Synthetic Index Price
p_df = pd.read_csv("../data_extraction/raw_df/synthetic_price_monthly_df.csv")
p_df["p_t"] = np.log(p_df["Synthetic Index Close Price"])
p_df = p_df.rename(columns={"Synthetic Index Close Price":"P_t"})
p_df["Date"] = pd.to_datetime(p_df["Date"].astype(str).str[:10], format="%Y-%m-%d")
p_df.tail()

Unnamed: 0,Date,P_t,p_t
282,2025-07-01,333.452353,5.8095
283,2025-08-01,340.753141,5.831158
284,2025-09-01,348.811723,5.854532
285,2025-10-01,366.551887,5.90414
286,2025-11-01,375.118033,5.927241


In [126]:
# Log Synthetic Index Dividend
d_df = pd.read_csv("../data_extraction/raw_df/synthetic_div_monthly_df.csv")
d_df["d_t"] = np.log(d_df["Synthetic Index Dividend"])
d_df = d_df.rename(columns={"Synthetic Index Dividend": "D_t"})
d_df["Date"] = pd.to_datetime(d_df["Date"].astype(str).str[:10], format="%Y-%m-%d")
d_df.tail()

Unnamed: 0,Date,D_t,d_t
282,2025-07-01,0.055846,-2.885165
283,2025-08-01,0.253449,-1.372591
284,2025-09-01,0.216313,-1.531027
285,2025-10-01,0.07693,-2.564857
286,2025-11-01,0.061543,-2.788022


In [127]:
# Merged Variable Dataset
monthly_df = pd.merge(p_df, d_df, on="Date", how="outer")
monthly_df = pd.merge(monthly_df, rf_df, on="Date", how="outer")

monthly_df

Unnamed: 0,Date,P_t,p_t,D_t,d_t,Rf_t,rf_t
0,2002-01-01,6.741954,1.908350,0.000382,-7.869682,1.69,0.524729
1,2002-02-01,6.708014,1.903303,0.006262,-5.073240,1.69,0.524729
2,2002-03-01,6.614498,1.889264,0.002720,-5.907281,1.78,0.576613
3,2002-04-01,6.714867,1.904324,0.001222,-6.707280,1.79,0.582216
4,2002-05-01,6.022081,1.795433,0.003142,-5.762878,1.76,0.565314
...,...,...,...,...,...,...,...
282,2025-07-01,333.452353,5.809500,0.055846,-2.885165,4.32,1.463255
283,2025-08-01,340.753141,5.831158,0.253449,-1.372591,4.49,1.501853
284,2025-09-01,348.811723,5.854532,0.216313,-1.531027,4.41,1.483875
285,2025-10-01,366.551887,5.904140,0.076930,-2.564857,4.17,1.427916


In [128]:
# Log Gross Return (Approximation)
# rx_t+1 = (p_t+1 - p_t) + 
monthly_df["r_t+1"] = monthly_df["p_t"].shift(-1) - monthly_df["p_t"] + np.log(1 + monthly_df["D_t"].shift(-1) / monthly_df["P_t"])
monthly_df

Unnamed: 0,Date,P_t,p_t,D_t,d_t,Rf_t,rf_t,r_t+1
0,2002-01-01,6.741954,1.908350,0.000382,-7.869682,1.69,0.524729,-0.004118
1,2002-02-01,6.708014,1.903303,0.006262,-5.073240,1.69,0.524729,-0.013634
2,2002-03-01,6.614498,1.889264,0.002720,-5.907281,1.78,0.576613,0.015245
3,2002-04-01,6.714867,1.904324,0.001222,-6.707280,1.79,0.582216,-0.108423
4,2002-05-01,6.022081,1.795433,0.003142,-5.762878,1.76,0.565314,-0.044481
...,...,...,...,...,...,...,...,...
282,2025-07-01,333.452353,5.809500,0.055846,-2.885165,4.32,1.463255,0.022418
283,2025-08-01,340.753141,5.831158,0.253449,-1.372591,4.49,1.501853,0.024009
284,2025-09-01,348.811723,5.854532,0.216313,-1.531027,4.41,1.483875,0.049828
285,2025-10-01,366.551887,5.904140,0.076930,-2.564857,4.17,1.427916,0.023269


In [129]:
# Log Excess Return 
monthly_df["rx_t+1"] = monthly_df["r_t+1"] - monthly_df["rf_t"].shift(-1)
monthly_df

Unnamed: 0,Date,P_t,p_t,D_t,d_t,Rf_t,rf_t,r_t+1,rx_t+1
0,2002-01-01,6.741954,1.908350,0.000382,-7.869682,1.69,0.524729,-0.004118,-0.528847
1,2002-02-01,6.708014,1.903303,0.006262,-5.073240,1.69,0.524729,-0.013634,-0.590247
2,2002-03-01,6.614498,1.889264,0.002720,-5.907281,1.78,0.576613,0.015245,-0.566971
3,2002-04-01,6.714867,1.904324,0.001222,-6.707280,1.79,0.582216,-0.108423,-0.673737
4,2002-05-01,6.022081,1.795433,0.003142,-5.762878,1.76,0.565314,-0.044481,-0.586805
...,...,...,...,...,...,...,...,...,...
282,2025-07-01,333.452353,5.809500,0.055846,-2.885165,4.32,1.463255,0.022418,-1.479435
283,2025-08-01,340.753141,5.831158,0.253449,-1.372591,4.49,1.501853,0.024009,-1.459866
284,2025-09-01,348.811723,5.854532,0.216313,-1.531027,4.41,1.483875,0.049828,-1.378088
285,2025-10-01,366.551887,5.904140,0.076930,-2.564857,4.17,1.427916,0.023269,-1.377914


In [130]:
# Log Excess Return 
monthly_df["Δd_t+1"] = monthly_df["d_t"].shift(-1) - monthly_df["d_t"]
monthly_df

Unnamed: 0,Date,P_t,p_t,D_t,d_t,Rf_t,rf_t,r_t+1,rx_t+1,Δd_t+1
0,2002-01-01,6.741954,1.908350,0.000382,-7.869682,1.69,0.524729,-0.004118,-0.528847,2.796442
1,2002-02-01,6.708014,1.903303,0.006262,-5.073240,1.69,0.524729,-0.013634,-0.590247,-0.834041
2,2002-03-01,6.614498,1.889264,0.002720,-5.907281,1.78,0.576613,0.015245,-0.566971,-0.799999
3,2002-04-01,6.714867,1.904324,0.001222,-6.707280,1.79,0.582216,-0.108423,-0.673737,0.944402
4,2002-05-01,6.022081,1.795433,0.003142,-5.762878,1.76,0.565314,-0.044481,-0.586805,-0.088809
...,...,...,...,...,...,...,...,...,...,...
282,2025-07-01,333.452353,5.809500,0.055846,-2.885165,4.32,1.463255,0.022418,-1.479435,1.512574
283,2025-08-01,340.753141,5.831158,0.253449,-1.372591,4.49,1.501853,0.024009,-1.459866,-0.158436
284,2025-09-01,348.811723,5.854532,0.216313,-1.531027,4.41,1.483875,0.049828,-1.378088,-1.033831
285,2025-10-01,366.551887,5.904140,0.076930,-2.564857,4.17,1.427916,0.023269,-1.377914,-0.223165


In [131]:
# Log Dividend-Price Ratio
monthly_df["dp_t"] = monthly_df["d_t"] - monthly_df["p_t"]
monthly_df

Unnamed: 0,Date,P_t,p_t,D_t,d_t,Rf_t,rf_t,r_t+1,rx_t+1,Δd_t+1,dp_t
0,2002-01-01,6.741954,1.908350,0.000382,-7.869682,1.69,0.524729,-0.004118,-0.528847,2.796442,-9.778031
1,2002-02-01,6.708014,1.903303,0.006262,-5.073240,1.69,0.524729,-0.013634,-0.590247,-0.834041,-6.976543
2,2002-03-01,6.614498,1.889264,0.002720,-5.907281,1.78,0.576613,0.015245,-0.566971,-0.799999,-7.796545
3,2002-04-01,6.714867,1.904324,0.001222,-6.707280,1.79,0.582216,-0.108423,-0.673737,0.944402,-8.611604
4,2002-05-01,6.022081,1.795433,0.003142,-5.762878,1.76,0.565314,-0.044481,-0.586805,-0.088809,-7.558311
...,...,...,...,...,...,...,...,...,...,...,...
282,2025-07-01,333.452353,5.809500,0.055846,-2.885165,4.32,1.463255,0.022418,-1.479435,1.512574,-8.694665
283,2025-08-01,340.753141,5.831158,0.253449,-1.372591,4.49,1.501853,0.024009,-1.459866,-0.158436,-7.203749
284,2025-09-01,348.811723,5.854532,0.216313,-1.531027,4.41,1.483875,0.049828,-1.378088,-1.033831,-7.385559
285,2025-10-01,366.551887,5.904140,0.076930,-2.564857,4.17,1.427916,0.023269,-1.377914,-0.223165,-8.468998


##### **2.2) TRAIN/TEST SPLIT**

In [132]:
# Train & Test Split 
monthly_df.to_csv("FVM_data/raw_monthly_df.csv", index=False)

train_monthly_df = monthly_df[monthly_df["Date"] <= "2021-12-31"]
train_monthly_df.to_csv("FVM_data/train_monthly_df.csv", index=False)

test_monthly_df = monthly_df[monthly_df["Date"] >= "2021-12-31"]
test_monthly_df.to_csv("FVM_data/test_monthly_df.csv", index=False)

##### **2.3) VAR MODEL**

In [133]:
# Clean the df for state space vector 
# (!!!) Transfrom date in index and covert value columns to numeric
train_monthly_df = train_monthly_df[["Date", "rx_t+1", "Δd_t+1", "dp_t"]]
train_monthly_df = train_monthly_df.set_index("Date")
train_monthly_df = train_monthly_df.apply(pd.to_numeric, errors="coerce")

train_monthly_df.tail()

Unnamed: 0_level_0,rx_t+1,Δd_t+1,dp_t
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-08-01,3.278891,-0.983102,-6.969143
2021-09-01,2.471778,-0.635563,-8.011909
2021-10-01,3.05582,1.81545,-8.593345
2021-11-01,2.406457,-1.028748,-6.836845
2021-12-01,2.866583,-1.281594,-7.863721


In [None]:
# VAR model lag optimization
# (!!!) We prioritize AIC and FPE (lag=5) as we are interest in the best lag that maximize the reuslt in out-of-sample prediction
# (!!!) Parsimony-based index instead suggest a lag=2
m_model = VAR(train_monthly_df)
lag_selection = m_model.select_order(maxlags=10)
print(lag_selection.summary())

 VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0        1.931       1.976       6.896       1.949
1       -6.849      -6.670    0.001060      -6.777
2       -7.333     -7.019*   0.0006538     -7.206*
3       -7.292      -6.844   0.0006808      -7.112
4       -7.304      -6.721   0.0006732      -7.069
5      -7.393*      -6.676  0.0006159*      -7.104
6       -7.363      -6.511   0.0006352      -7.019
7       -7.311      -6.324   0.0006693      -6.913
8       -7.324      -6.203   0.0006613      -6.872
9       -7.277      -6.021   0.0006937      -6.771
10      -7.267      -5.877   0.0007017      -6.706
--------------------------------------------------


In [None]:
VAR_m_model = m_model.fit(2)  
print(VAR_m_model.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sat, 22, Nov, 2025
Time:                     00:10:34
--------------------------------------------------------------------
No. of Equations:         3.00000    BIC:                   -7.04279
Nobs:                     238.000    HQIC:                  -7.22570
Log likelihood:          -117.571    FPE:                0.000643158
AIC:                     -7.34917    Det(Omega_mle):     0.000589590
--------------------------------------------------------------------
Results for equation rx_t+1
               coefficient       std. error           t-stat            prob
----------------------------------------------------------------------------
const             1.096921         0.691645            1.586           0.113
L1.rx_t+1         0.641838         0.062881           10.207           0.000
L1.Δd_t+1        -0.003526         0.039052           -0.090           0.92