In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import adfuller
from math import sqrt
import datetime 
import calendar
import sys
from statsmodels.tsa.api import VAR

In [3]:
# Load data
file_path = r'C:\Users\PHIRI003\OneDrive - Wageningen University & Research\Documents\WEcR Internship\Work\Eggs\Eggs.xlsx'
df = pd.read_excel(file_path, header=0)

# Ensure the data has a datetime index
df.index = df['Date']

#print(df.head())

In [4]:
# Define the function to create seasonal dummy variables

def SeasonalDummies(df, frequency='M'):
    nT = len(df)  # Number of observations
    startdate = df.index[0]  # Start date of the time series
    datetime = pd.DataFrame(data=pd.date_range(startdate, periods=nT, freq=frequency), columns=["datetime"])
    monthnumber = datetime["datetime"].dt.month  # Extract month numbers
    monthname = pd.DataFrame()  # DataFrame to store month names
    
    for i in range(nT):
        monthname.at[i, 'D'] = calendar.month_name[monthnumber[i]]  # Assign month names based on month numbers
    
    seasdum = pd.get_dummies(monthname)  # Create dummy variables for each month
    seasdum = seasdum.drop('D_January', axis=1)  # Drop January to avoid multicollinearity
    seasdum = seasdum.set_index(df.index)  # Set the index to match the original DataFrame
    seasdum = seasdum.astype(int)
    return seasdum

# Create seasonal dummies
seasdum = SeasonalDummies(df)

# Combine the data with the seasonal dummy variables
df_with_dummies= pd.concat([df, seasdum], axis=1)


In [58]:
#Create a break dummy
break_date = '2018-12-30'
#break_date = '2022-07-30'
#break_date = '2022-02-28'

df_with_dummies['break_dummy'] = (df_with_dummies['Date']>= break_date).astype(int)

#create interaction terms to be used in the regime shift equations
df_with_dummies['Inter_API'] = df_with_dummies['break_dummy']*df_with_dummies['API'] 
df_with_dummies['Inter_CPI'] = df_with_dummies['break_dummy']*df_with_dummies['CPI'] 

#create a trend variable
df_with_dummies['trend'] = range(1, len(df_with_dummies) + 1)  

In [60]:
#Gregory and Handen cointegration test
X = df_with_dummies[['trend', 'API', 'break_dummy', 'Inter_API', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X = sm.add_constant(X)
Y = df_with_dummies['CPI']

#OLS estimation
ols_model = sm.OLS(Y,X).fit()
print(ols_model.summary())

#Cointegration test - Gregory and Hansen
df_with_dummies['residuals'] = ols_model.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test = adfuller(df_with_dummies['residuals'], maxlag=12)

# Print the results
print('ADF statistic:', adf_test[0])
print('p-value:', adf_test[1])
print('Critical values:', adf_test[4])


                            OLS Regression Results                            
Dep. Variable:                    CPI   R-squared:                       0.960
Model:                            OLS   Adj. R-squared:                  0.956
Method:                 Least Squares   F-statistic:                     243.6
Date:                Thu, 29 Aug 2024   Prob (F-statistic):           4.72e-98
Time:                        21:13:55   Log-Likelihood:                -477.33
No. Observations:                 168   AIC:                             986.7
Df Residuals:                     152   BIC:                             1037.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          57.1705      2.075     27.558      

In [59]:
#Gregory and Handen cointegration test
X2 = df_with_dummies[['trend', 'CPI', 'break_dummy', 'Inter_CPI', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
X2 = sm.add_constant(X2)
Y2 = df_with_dummies['API']

#OLS estimation
ols_model2 = sm.OLS(Y2,X2).fit()
print(ols_model2.summary())

#Cointegration test - Gregory and Hansen
df_with_dummies['residuals2'] = ols_model2.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test2 = adfuller(df_with_dummies['residuals2'], maxlag=12, regression='ct')

# Print the results
print('ADF statistic:', adf_test2[0])
print('p-value:', adf_test2[1])
print('Critical values:', adf_test2[4])


                            OLS Regression Results                            
Dep. Variable:                    API   R-squared:                       0.738
Model:                            OLS   Adj. R-squared:                  0.712
Method:                 Least Squares   F-statistic:                     28.56
Date:                Thu, 29 Aug 2024   Prob (F-statistic):           1.04e-36
Time:                        21:13:39   Log-Likelihood:                -748.31
No. Observations:                 168   AIC:                             1529.
Df Residuals:                     152   BIC:                             1579.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const         -20.4961     33.106     -0.619      

In [46]:
#Gregory and Handen cointegration test
df_with_dummies['Inter_Oil'] = df_with_dummies['break_dummy']*df_with_dummies['Oil'] 

X3 = df_with_dummies[['trend','Oil', 'break_dummy', 'Inter_Oil', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X3 = sm.add_constant(X3)
Y3 = df_with_dummies['CPI']

#OLS estimation
ols_model3 = sm.OLS(Y3,X3).fit()
print(ols_model3.summary())

#Cointegration test - Gregory and Hansen
# Assuming 'df_with_dummies' is already defined and 'ols_model' is fitted
df_with_dummies['residuals3'] = ols_model3.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test3 = adfuller(df_with_dummies['residuals3'], maxlag=12, regression='ct')

# Print the results
print('ADF statistic:', adf_test3[0])
print('p-value:', adf_test3[1])
print('Critical values:', adf_test3[4])


                            OLS Regression Results                            
Dep. Variable:                    CPI   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.892
Method:                 Least Squares   F-statistic:                     92.50
Date:                Thu, 29 Aug 2024   Prob (F-statistic):           2.38e-68
Time:                        21:05:50   Log-Likelihood:                -553.37
No. Observations:                 168   AIC:                             1139.
Df Residuals:                     152   BIC:                             1189.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          50.5925      3.864     13.095      

In [52]:
#Gregory and Handen cointegration test
df_with_dummies['Inter_Oil'] = df_with_dummies['break_dummy']*df_with_dummies['Oil'] 

X4 = df_with_dummies[['trend','Oil', 'CornPI', 'break_dummy', 'Inter_Corn', 'Inter_Oil', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X4 = sm.add_constant(X4)
Y4 = df_with_dummies['API']

#OLS estimation
ols_model4 = sm.OLS(Y4,X4).fit()
print(ols_model4.summary())

#Cointegration test - Gregory and Hansen
# Assuming 'df_with_dummies' is already defined and 'ols_model' is fitted
df_with_dummies['residuals4'] = ols_model4.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test4 = adfuller(df_with_dummies['residuals4'], maxlag=12, regression='ct')

# Print the results
print('ADF statistic:', adf_test4[0])
print('p-value:', adf_test4[1])
print('Critical values:', adf_test4[4])


                            OLS Regression Results                            
Dep. Variable:                    API   R-squared:                       0.760
Model:                            OLS   Adj. R-squared:                  0.733
Method:                 Least Squares   F-statistic:                     28.00
Date:                Mon, 12 Aug 2024   Prob (F-statistic):           4.75e-38
Time:                        20:48:14   Log-Likelihood:                -740.85
No. Observations:                 168   AIC:                             1518.
Df Residuals:                     150   BIC:                             1574.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          88.2902     10.357      8.525      

In [11]:
df_with_dummies['Inter_Corn'] = df_with_dummies['break_dummy']*df_with_dummies['CornPI'] 

X5 = df_with_dummies[['trend', 'CornPI', 'break_dummy', 'Inter_Corn', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X5 = sm.add_constant(X5)
Y5 = df_with_dummies['API']

#OLS estimation
ols_model5 = sm.OLS(Y5,X5).fit()
print(ols_model5.summary())

#Cointegration test - Gregory and Hansen
# Assuming 'df_with_dummies' is already defined and 'ols_model' is fitted
df_with_dummies['residuals5'] = ols_model5.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test4 = adfuller(df_with_dummies['residuals5'], maxlag=12, regression='ct')

# Print the results
print('ADF statistic:', adf_test5[0])
print('p-value:', adf_test5[1])
print('Critical values:', adf_test5[4])


                            OLS Regression Results                            
Dep. Variable:                    API   R-squared:                       0.718
Model:                            OLS   Adj. R-squared:                  0.690
Method:                 Least Squares   F-statistic:                     25.77
Date:                Wed, 28 Aug 2024   Prob (F-statistic):           2.58e-34
Time:                        10:20:39   Log-Likelihood:                -754.60
No. Observations:                 168   AIC:                             1541.
Df Residuals:                     152   BIC:                             1591.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          86.9767      9.995      8.702      