In [None]:
#************************************************************
#
#      	Session 20
#			Time Series Analysis 4 - Multivariate
#
#************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.varmax import VARMAX
from statsmodels.tsa.api import VAR
from datetime import datetime

# Load the data
datadir = "J:/private/SYS4021/2021/Data/VAweather/"
VAweather = pd.read_csv(datadir + 'VirginiaWeatherData.csv')

# Replace -999s with NAs
VAweather.replace(-999.0, np.nan, inplace=True)
VAweather = VAweather.iloc[:-7] # Drop last 7 rows

# Create date column for plotting
VAweather['date'] = pd.to_datetime(VAweather[['Year', 'Month']].assign(DAY=1))

# Impute NAs (Simplified imputation as a placeholder, since no direct equivalent of 'mnimput' exists in Python)
VAweather.fillna(method='ffill', inplace=True)

# Create time series of monthly Richmond precipitation and minimum temperature
Richmond_Tmin = VAweather['R_Tmin'].dropna()
Richmond_Tmax = VAweather['R_Tmax'].dropna()

# Use the ts() command to get a time series
Tmin_ts = Richmond_Tmin.values
Tmax_ts = Richmond_Tmax.values

# Build univariate time series of min and max Richmond temperature
time_temp = np.arange(1, len(Tmin_ts) - 6 + 1)
Tmin_lm = np.polyfit(time_temp, Tmin_ts[:-6], 1)
Tmax_lm = np.polyfit(time_temp, Tmax_ts[:-6], 1)

# Fit ARIMA models to residuals
e_Tmin_lm = ARIMA(Tmin_ts[:-6] - np.polyval(Tmin_lm, time_temp), order=(2,0,1)).fit()
e_Tmax_lm = ARIMA(Tmax_ts[:-6] - np.polyval(Tmax_lm, time_temp), order=(1,0,0)).fit()

print(e_Tmin_lm.summary())
print(e_Tmax_lm.summary())

# See if the residuals are correlated
allResiduals = pd.DataFrame({
    'Tmin': e_Tmin_lm.resid,
    'Tmax': e_Tmax_lm.resid
})
print(allResiduals.corr())

# Simulate 50 years of monthly minimum and maximum temperature residuals
np.random.seed(0)
e_Tmin_sim = e_Tmin_lm.simulate(nsim=12*50)
e_Tmax_sim = e_Tmax_lm.simulate(nsim=12*50)

# See if the simulated residuals are correlated
allSimulations = pd.DataFrame({
    'Tmin': e_Tmin_sim,
    'Tmax': e_Tmax_sim
})
print(allSimulations.corr())

# Build VARMA model to minimum and maximum temperature residuals
AICmatrix = np.zeros((3, 4))
for p in range(1, 4):
    for q in range(4):
        model = VARMAX(allResiduals, order=(p, q)).fit(disp=False)
        AICmatrix[p-1, q] = model.aic

print(AICmatrix)

# Pick the model with the lowest AIC
p, q = np.unravel_index(AICmatrix.argmin(), AICmatrix.shape)
varma_model = VARMAX(allResiduals, order=(p+1, q)).fit(disp=False)

# Simulate 50 years of monthly temperature residuals from VARMA model
T_sim = varma_model.simulate(nsim=12*50)

# Compare correlation of simulated residuals to actual residuals
print(pd.DataFrame(T_sim).corr())
print(allResiduals.corr())

# Add mean predictions and plot simulation of Tmin and Tmax
time_50yr = np.arange(1, 50*12 + 1)
Tmin_mean = np.polyval(Tmin_lm, time_50yr)
Tmax_mean = np.polyval(Tmax_lm, time_50yr)

plt.figure(figsize=(12, 6))
plt.plot(time_50yr, T_sim[:, 0] + Tmin_mean, label='Simulated Tmin', color='blue')
plt.plot(time_50yr, T_sim[:, 1] + Tmax_mean, label='Simulated Tmax', color='red')
plt.xlabel('Time Step')
plt.ylabel('Temperature')
plt.legend()
plt.show()

# Diagnostics
# compute fitted values (true - residual; lose 1st observation because p=1)
Tmin_fitted = allResiduals['Tmin'][1:] - varma_model.resid[:, 0]
Tmax_fitted = allResiduals['Tmax'][1:] - varma_model.resid[:, 1]

# Residuals vs Fitted
plt.figure(figsize=(12, 6))
plt.subplot(2, 1, 1)
sns.scatterplot(x=Tmin_fitted + Tmin_ts[1:], y=varma_model.resid[:, 0])
plt.xlabel('Tmin Fitted Values')
plt.ylabel('Tmin Residuals')

plt.subplot(2, 1, 2)
sns.scatterplot(x=Tmax_fitted + Tmax_ts[1:], y=varma_model.resid[:, 1])
plt.xlabel('Tmax Fitted Values')
plt.ylabel('Tmax Residuals')

plt.tight_layout()
plt.show()

# QQ plot of residuals
fig, ax = plt.subplots(2, 1, figsize=(12, 8))
sns.qqplot(varma_model.resid[:, 0], line='s', ax=ax[0])
ax[0].set_title('Tmin Residuals QQ')
sns.qqplot(varma_model.resid[:, 1], line='s', ax=ax[1])
ax[1].set_title('Tmax Residuals QQ')
plt.tight_layout()
plt.show()

# Forecast the next 6 months with the first model
varma_fcst = varma_model.get_forecast(steps=6)

# Prediction for the next 6 months:
next_6mo_time = np.arange(len(Tmin_ts) - 5, len(Tmin_ts) + 1)

# The actual time series for the test period
next_6mo_Tmin_ts = Tmin_ts[-6:]
next_6mo_Tmax_ts = Tmax_ts[-6:]

E_Y_pred_Tmin = np.polyval(Tmin_lm, next_6mo_time)
e_t_pred_Tmin = varma_fcst.predicted_mean['Tmin']
e_t_pred_Tmin_lower = varma_fcst.conf_int()['lower Tmin']
e_t_pred_Tmin_upper = varma_fcst.conf_int()['upper Tmin']
next_6mo_prediction_Tmin = E_Y_pred_Tmin + e_t_pred_Tmin

E_Y_pred_Tmax = np.polyval(Tmax_lm, next_6mo_time)
e_t_pred_Tmax = varma_fcst.predicted_mean['Tmax']
e_t_pred_Tmax_lower = varma_fcst.conf_int()['lower Tmax']
e_t_pred_Tmax_upper = varma_fcst.conf_int()['upper Tmax']
next_6mo_prediction_Tmax = E_Y_pred_Tmax + e_t_pred_Tmax

# MSE
print('MSE Tmin:', np.mean((next_6mo_prediction_Tmin - next_6mo_Tmin_ts)**2))
print('MSE Tmax:', np.mean((next_6mo_prediction_Tmax - next_6mo_Tmax_ts)**2))

# Plot actual values and predicted values
plt.figure(figsize=(12, 6))
plt.subplot(2, 1, 1)
plt.plot(next_6mo_time, next_6mo_Tmin_ts, label='Actual Tmin', color='black')
plt.plot(next_6mo_time, next_6mo_prediction_Tmin, label='Predicted Tmin', color='red')
plt.fill_between(next_6mo_time, E_Y_pred_Tmin + e_t_pred_Tmin_lower, E_Y_pred_Tmin + e_t_pred_Tmin_upper, color='red', alpha=0.2)
plt.ylabel('Richmond Tmin')
plt.title('Tmin Trend and Seasonality Model + VARMA of Residuals')

plt.subplot(2, 1, 2)
plt.plot(next_6mo_time, next_6mo_Tmax_ts, label='Actual Tmax', color='black')
plt.plot(next_6mo_time, next_6mo_prediction_Tmax, label='Predicted Tmax', color='red')
plt.fill_between(next_6mo_time, E_Y_pred_Tmax + e_t_pred_Tmax_lower, E_Y_pred_Tmax + e_t_pred_Tmax_upper, color='red', alpha=0.2)
plt.ylabel('Richmond Tmax')
plt.title('Tmax Trend and Seasonality Model + VARMA of Residuals')

plt.tight_layout()
plt.show()
