In [1]:
#Necessary packages
import pandas as pd
import numpy as np
from datetime import datetime
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
from math import sqrt
import datetime 
import calendar
import sys

In [2]:
# Load the data from the Excel file
file_path = r'C:\Users\PHIRI003\OneDrive - Wageningen University & Research\Documents\WEcR Internship\Work\Dairy\Merged_data.xlsx'
df = pd.read_excel(file_path, header=0 )

df.index = df['Date']

In [3]:
# Define the function to create seasonal dummy variables
def SeasonalDummies(df, frequency='M'):
    nT = len(df)  # Number of observations
    startdate = df.index[0]  # Start date of the time series
    datetime = pd.DataFrame(data=pd.date_range(startdate, periods=nT, freq=frequency), columns=["datetime"])
    monthnumber = datetime["datetime"].dt.month  # Extract month numbers
    monthname = pd.DataFrame()  # DataFrame to store month names
    
    for i in range(nT):
        monthname.at[i, 'D'] = calendar.month_name[monthnumber[i]]  # Assign month names based on month numbers
    
    seasdum = pd.get_dummies(monthname)  # Create dummy variables for each month
    seasdum = seasdum.drop('D_January', axis=1)  # Drop January to avoid multicollinearity
    seasdum = seasdum.set_index(df.index)  # Set the index to match the original DataFrame
    seasdum = seasdum.astype(int)
    return seasdum

# Create seasonal dummies
seasdum = SeasonalDummies(df)

# Combine the data with the seasonal dummy variables
df = pd.concat([df, seasdum], axis=1)


In [8]:
#Create a break dummy
break_date = '2015-01-01'
#break_date = '2010-11-01'
#break_date = '2022-12-01'

df['break_dummy'] = (df['Date']>= break_date).astype(int)

#create interaction terms
df['Inter_API'] = df['break_dummy']*df['API'] 
df['Inter_PPI'] = df['break_dummy']*df['PPI'] 
df['Inter_CPI'] = df['break_dummy']*df['CPI'] 

#create a trend variable
df['trend'] = range(1, len(df) + 1)  

In [12]:
#print(df.head())

In [9]:
from statsmodels.tsa.stattools import adfuller

#Gregory and Handen cointegration test
X3 = df[['trend','PPI', 'CPI', 'break_dummy', 'Inter_API', 'Inter_CPI', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X3 = sm.add_constant(X3)
Y3 = df['API']

#OLS estimation
ols_model3 = sm.OLS(Y3,X3).fit()
print(ols_model3.summary())

#Cointegration test - Gregory and Hansen
# Assuming 'df_with_dummies' is already defined and 'ols_model' is fitted
df['residuals3'] = ols_model3.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test3 = adfuller(df['residuals3'], maxlag=12)

# Print the results
print('ADF statistic:', adf_test3[0])

                            OLS Regression Results                            
Dep. Variable:                    API   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.983
Method:                 Least Squares   F-statistic:                     575.0
Date:                Fri, 30 Aug 2024   Prob (F-statistic):          3.41e-125
Time:                        14:51:25   Log-Likelihood:                -378.70
No. Observations:                 165   AIC:                             793.4
Df Residuals:                     147   BIC:                             849.3
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const        1607.1238    153.529     10.468      

In [10]:
from statsmodels.tsa.stattools import adfuller

#Gregory and Handen cointegration test
X = df[['trend','API', 'PPI', 'break_dummy', 'Inter_API', 'Inter_PPI', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X = sm.add_constant(X)
Y = df['CPI']

#OLS estimation
ols_model = sm.OLS(Y,X).fit()
print(ols_model.summary())

#Cointegration test - Gregory and Hansen
# Assuming 'df_with_dummies' is already defined and 'ols_model' is fitted
df['residuals'] = ols_model.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test = adfuller(df['residuals'], maxlag=12)

# Print the results
print('ADF statistic:', adf_test[0])


                            OLS Regression Results                            
Dep. Variable:                    CPI   R-squared:                       0.978
Model:                            OLS   Adj. R-squared:                  0.976
Method:                 Least Squares   F-statistic:                     389.7
Date:                Fri, 30 Aug 2024   Prob (F-statistic):          5.06e-113
Time:                        14:51:38   Log-Likelihood:                -348.26
No. Observations:                 165   AIC:                             732.5
Df Residuals:                     147   BIC:                             788.4
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          88.9590     25.841      3.443      

In [11]:
from statsmodels.tsa.stattools import adfuller

#Gregory and Handen cointegration test
X2 = df[['trend','API', 'CPI', 'break_dummy', 'Inter_API', 'Inter_CPI', 'D_April', 'D_August', 'D_December', 'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']] 
#X = df_with_dummies[['trend','API']] 
X2 = sm.add_constant(X2)
Y2 = df['PPI']

#OLS estimation
ols_model2 = sm.OLS(Y2,X2).fit()
print(ols_model2.summary())

#Cointegration test - Gregory and Hansen
# Assuming 'df_with_dummies' is already defined and 'ols_model' is fitted
df['residuals2'] = ols_model2.resid

# Perform the Augmented Dickey-Fuller test on the residuals
adf_test2 = adfuller(df['residuals2'], maxlag=12)

# Print the results
print('ADF statistic:', adf_test2[0])

                            OLS Regression Results                            
Dep. Variable:                    PPI   R-squared:                       0.947
Model:                            OLS   Adj. R-squared:                  0.940
Method:                 Least Squares   F-statistic:                     153.3
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           2.11e-84
Time:                        14:51:44   Log-Likelihood:                -467.81
No. Observations:                 165   AIC:                             971.6
Df Residuals:                     147   BIC:                             1028.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const        -311.5800    347.132     -0.898      