In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import adfuller
from math import sqrt
import datetime 
import calendar
import sys
from statsmodels.tsa.stattools import grangercausalitytests

In [2]:
# Load your data
file_path = r'C:\Users\PHIRI003\OneDrive - Wageningen University & Research\Documents\WEcR Internship\Work\R\Modified_Book1.xlsx'
df = pd.read_excel(file_path, header=0)

# Define the start and end dates
start_date = '2010-01-01'
end_date = '2023-10-31'

# Filter the DataFrame to include only the desired date range
df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

In [3]:

# Define the function to create seasonal dummy variables
def SeasonalDummies(df, frequency='M'):
    nT = len(df)  # Number of observations
    startdate = df.index[0]  # Start date of the time series
    datetime = pd.DataFrame(data=pd.date_range(startdate, periods=nT, freq=frequency), columns=["datetime"])
    monthnumber = datetime["datetime"].dt.month  # Extract month numbers
    monthname = pd.DataFrame()  # DataFrame to store month names
    
    for i in range(nT):
        monthname.at[i, 'D'] = calendar.month_name[monthnumber[i]]  # Assign month names based on month numbers
    
    seasdum = pd.get_dummies(monthname['D'], prefix='D')  # Create dummy variables for each month
    seasdum = seasdum.drop('D_January', axis=1)  # Drop January to avoid multicollinearity
    seasdum = seasdum.set_index(df.index)  # Set the index to match the original DataFrame
    seasdum = seasdum.astype(int)
    return seasdum

# Create seasonal dummies
seasdum = SeasonalDummies(df)

#merge data with seasonal dummies
df_with_dummies = pd.concat([df, seasdum], axis=1) 

In [4]:
#Create a break dummy
break_date = '2015-04-30'
#break_date = '2022-05-28'
#break_date = '2008-08-30'
df_with_dummies['break_dummy'] = (df_with_dummies['Date'] >= break_date).astype(int)

#Create interaction term
df_with_dummies['interaction_term'] = df_with_dummies['break_dummy']*df_with_dummies['PPI']


df_with_dummies['trend'] = range(1, len(df_with_dummies) + 1)  # Trend variable from 1 to number of observations


#print(df_with_dummies.head)

In [5]:
#DEfine regression variables
#X = pd.concat(df_with_dummies['PPI', seasdum, 'interaction_term', 'break_+dummy'], axis=1)
X = df_with_dummies[['trend', 'PPI', 'break_dummy', 'interaction_term', 'D_April', 'D_August', 'D_December',  'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']]
X = sm.add_constant(X)  # Adds a constant term to the predictor
Y = df_with_dummies['CPI']

reg_result = sm.OLS(Y, X).fit()
print(reg_result.summary())

residuals = reg_result.resid
adf_test = adfuller(residuals, maxlag=12, regression='c')

print('ADF Statistic:', adf_test[0])
print('p-value:', adf_test[1])

                            OLS Regression Results                            
Dep. Variable:                    CPI   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.984
Method:                 Least Squares   F-statistic:                     672.2
Date:                Fri, 30 Aug 2024   Prob (F-statistic):          2.93e-129
Time:                        14:32:54   Log-Likelihood:                -246.80
No. Observations:                 166   AIC:                             525.6
Df Residuals:                     150   BIC:                             575.4
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const              -10.3635      6.256  

In [14]:
#Create interaction term
df_with_dummies['interaction_term2'] = df_with_dummies['break_dummy']*df_with_dummies['CPI']

#X = pd.concat(df_with_dummies['PPI', seasdum, 'interaction_term', 'break_+dummy'], axis=1)
X2 = df_with_dummies[[ 'CPI', 'break_dummy', 'interaction_term2', 'D_April', 'D_August', 'D_December',  'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']]
#X = df_with_dummies[['PPI', 'break_dummy', 'interaction_term']]
X2 = sm.add_constant(X2)  # Adds a constant term to the predictor
Y2 = df_with_dummies['PPI']

reg_result2 = sm.OLS(Y2, X2).fit()
print(reg_result2.summary())

residuals2 = reg_result2.resid
adf_test2 = adfuller(residuals2, maxlag=8, regression='c')

print('ADF Statistic:', adf_test2[0])
print('p-value:', adf_test2[1])

                            OLS Regression Results                            
Dep. Variable:                    PPI   R-squared:                       0.989
Model:                            OLS   Adj. R-squared:                  0.987
Method:                 Least Squares   F-statistic:                     930.5
Date:                Fri, 30 Aug 2024   Prob (F-statistic):          9.15e-139
Time:                        14:37:08   Log-Likelihood:                -227.62
No. Observations:                 166   AIC:                             485.2
Df Residuals:                     151   BIC:                             531.9
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                23.9745      3.52

In [9]:
#Create interaction term
df_with_dummies['interaction_term3'] = df_with_dummies['break_dummy']*df_with_dummies['Oil Price']

#X = pd.concat(df_with_dummies['PPI', seasdum, 'interaction_term', 'break_+dummy'], axis=1)
X3 = df_with_dummies[['trend', 'Oil Price', 'break_dummy', 'interaction_term3', 'D_April', 'D_August', 'D_December',  'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']]
#X = df_with_dummies[['PPI', 'break_dummy', 'interaction_term']]
X3 = sm.add_constant(X3)  # Adds a constant term to the predictor
Y3 = df_with_dummies['PPI']

reg_result3 = sm.OLS(Y3, X3).fit()
print(reg_result3.summary())

residuals3 = reg_result3.resid
adf_test3 = adfuller(residuals3, maxlag=8, regression='c')

print('ADF Statistic:', adf_test3[0])
print('p-value:', adf_test3[1])

                            OLS Regression Results                            
Dep. Variable:                    PPI   R-squared:                       0.776
Model:                            OLS   Adj. R-squared:                  0.753
Method:                 Least Squares   F-statistic:                     34.57
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           4.65e-41
Time:                        14:34:13   Log-Likelihood:                -474.51
No. Observations:                 166   AIC:                             981.0
Df Residuals:                     150   BIC:                             1031.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                83.4724      3.24

In [10]:
#Create interaction term
df_with_dummies['interaction_term4'] = df_with_dummies['break_dummy']*df_with_dummies['Wheat Price']

#X = pd.concat(df_with_dummies['PPI', seasdum, 'interaction_term', 'break_+dummy'], axis=1)
X4 = df_with_dummies[['trend', 'Wheat Price',  'break_dummy', 'interaction_term4','D_April', 'D_August', 'D_December',  'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']]
#X = df_with_dummies[['PPI', 'break_dummy', 'interaction_term']]
X4 = sm.add_constant(X4)  # Adds a constant term to the predictor
Y4 = df_with_dummies['PPI']

reg_result4 = sm.OLS(Y4, X4).fit()
print(reg_result4.summary())

residuals4 = reg_result4.resid
adf_test4 = adfuller(residuals4, maxlag=8, regression='c')

print('ADF Statistic:', adf_test4[0])
print('p-value:', adf_test4[1])

                            OLS Regression Results                            
Dep. Variable:                    PPI   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.750
Method:                 Least Squares   F-statistic:                     34.03
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           1.14e-40
Time:                        14:34:32   Log-Likelihood:                -475.52
No. Observations:                 166   AIC:                             983.0
Df Residuals:                     150   BIC:                             1033.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                84.5763      3.41

In [13]:
#Wheat and Oil coint

#X = pd.concat(df_with_dummies['PPI', seasdum, 'interaction_term', 'break_+dummy'], axis=1)
X5 = df_with_dummies[['Oil Price',  'break_dummy', 'interaction_term3','D_April', 'D_August', 'D_December',  'D_February',  'D_July',  'D_June',  'D_March', 'D_May', 'D_September', 'D_October', 'D_November']]
#X = df_with_dummies[['PPI', 'break_dummy', 'interaction_term']]
X5 = sm.add_constant(X5)  # Adds a constant term to the predictor
Y5 = df_with_dummies['Wheat Price']

reg_result5 = sm.OLS(Y5, X5).fit()
print(reg_result4.summary())

residuals5 = reg_result5.resid
adf_test5 = adfuller(residuals5, maxlag=8, regression='c')

print('ADF Statistic:', adf_test5[0])
print('p-value:', adf_test5[1])

                            OLS Regression Results                            
Dep. Variable:                    PPI   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.750
Method:                 Least Squares   F-statistic:                     34.03
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           1.14e-40
Time:                        14:36:45   Log-Likelihood:                -475.52
No. Observations:                 166   AIC:                             983.0
Df Residuals:                     150   BIC:                             1033.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                84.5763      3.41