In [None]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.stattools import grangercausalitytests


# Function to test stationarity
def test_stationarity(timeseries):
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print('Results of Dickey-Fuller Test:')
    print(dfoutput)
# Print the first few lines of the CSV file to check column names


# Make sure the date column is the index and is in datetime format
df = pd.read_csv(r'C:\Users\nick\Downloads\Copy of oil and treasury.csv', index_col='observation_date', parse_dates=['observation_date'])

# Convert all columns to numeric type if they're not already
df = df.apply(pd.to_numeric, errors='coerce')

# Drop rows with any NaN values that resulted from the conversion
df.dropna(inplace=True)
columns_to_use = ['DSG10', 'Real_oil', 'Real_gold']
df = df[columns_to_use]

# Test stationarity
for column in df.columns:
    print(f'Testing stationarity for {column}')
    test_stationarity(df[column])

# Difference the DataFrame to make data stationary
# Here, you select only the numeric columns
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
df_diff = df[numeric_columns].diff().dropna()

# Re-test stationarity on differenced data
for column in df_diff.columns:
    print(f'Testing stationarity for {column} after differencing')
    test_stationarity(df_diff[column])
# Assuming `df_diff` is the DataFrame with differenced data

maxlag = 10  # This is the maximum lag to test for, which you may change based on your dataset
test = 'ssr_chi2test'  # This is the test to use, which could also be 'lrtest', 'params_ftest', 'ssr_ftest'

results = grangercausalitytests(df[['DSG10', 'Real_oil']], maxlag=maxlag, verbose=True)
results = grangercausalitytests(df[['DSG10', 'Real_gold']], maxlag=maxlag, verbose=True)\

model = VAR(df_diff)
model_fitted = model.fit(1)
print(model_fitted.summary())

# Decide the number of steps to forecast
lag_order = model_fitted.k_ar

# Prepare the last observations based on the lag order
last_obs = df.values[-lag_order:]

# Forecast
n_forecast_steps = 10  # replace with your desired number of steps
forecast = model_fitted.forecast(y=last_obs, steps=n_forecast_steps)

# Convert the forecast to a DataFrame
forecast_df = pd.DataFrame(forecast, index=pd.date_range(start=df.index[-1], periods=n_forecast_steps, freq=df.index.freq), columns=df.columns)
# Cumulatively sum the forecasted differenced values and add the last value from the original series
# The last actual observation from the undifferenced series
last_actual_obs = df.iloc[-1]

# Initialize the forecasted values array with the last actual observation
forecasted_values = last_actual_obs.values

# Prepare a list to hold the reversed forecasts
reversed_forecasts = []

# Reverse the differencing for each forecasted step
for forecasted_difference in forecast:
    # Sum the last actual value and the forecasted difference
    forecasted_value = forecasted_values + forecasted_difference
    # Append the forecasted value to the reversed forecasts list
    reversed_forecasts.append(forecasted_value)
    # Update the last actual value to the current forecasted value
    forecasted_values = forecasted_value

# Convert the reversed forecasts to a DataFrame
reversed_forecast_df = pd.DataFrame(reversed_forecasts, index=forecast_df.index, columns=df.columns)

print(reversed_forecast_df['DSG10'])