In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.impute import SimpleImputer
from datetime import datetime
import seaborn as sns

# Load the data
datadir = "J:/private/SYS4021/2021/Data/VAweather/"
sourcedir = "J:/private/SYS4021/2021/R_Code"
VAweather = pd.read_csv(datadir + 'VirginiaWeatherData.csv')

# Create time series of monthly Richmond precipitation and minimum temperature
Richmond_P = VAweather['R_Precip']
Richmond_Tmin = VAweather['R_Tmin']

# Create datetime column
VAweather['date'] = pd.to_datetime(VAweather[['Year', 'Month']].assign(day=1))

# Replace -999s with NA, remove last 7 rows, and re-make time series
VAweather.replace(-999, np.nan, inplace=True)
VAweather = VAweather[:-7]

Richmond_P = VAweather['R_Precip']
Richmond_Tmin = VAweather['R_Tmin']

# Impute missing values
imputer = SimpleImputer(strategy='mean')
VAweather.iloc[:, 2:8] = imputer.fit_transform(VAweather.iloc[:, 2:8])

Richmond_P = VAweather['R_Precip']
Richmond_Tmin = VAweather['R_Tmin']

# Boxplot
sns.boxplot(y=Richmond_P)
plt.show()

# Seasonality analysis
stl = STL(Richmond_P, period=12)
res = stl.fit()
res.plot()
plt.show()

# Periodogram
from scipy.signal import periodogram
frequencies, spectrum = periodogram(Richmond_P.dropna(), scaling='spectrum')
plt.plot(frequencies, spectrum)
plt.title("Smooth Periodogram of Richmond Precip")
plt.xlabel('Frequency')
plt.ylabel('Spectrum')
plt.show()

# Find the peak
max_omega_precip = frequencies[np.argmax(spectrum)]
print("Peak Frequency:", max_omega_precip)
print("Period:", 1 / max_omega_precip)

# Model Trend and Seasonality
VAweather['t'] = np.arange(len(VAweather))

# Trend model
temp_trend_model = sm.OLS(Richmond_Tmin, sm.add_constant(VAweather['t'])).fit()
print(temp_trend_model.summary())

# Plot trend model
sns.lineplot(x='date', y='R_Tmin', data=VAweather)
plt.plot(VAweather['date'], temp_trend_model.fittedvalues, color='red')
plt.show()

# Seasonal model
seasonal_model = sm.OLS(Richmond_Tmin, sm.add_constant(np.column_stack([VAweather['t'], 
    np.sin(2 * np.pi * VAweather['t'] / 12), np.cos(2 * np.pi * VAweather['t'] / 12)]))).fit()
print(seasonal_model.summary())

# Plot seasonal model
plt.plot(VAweather['date'], Richmond_Tmin, label='Observed')
plt.plot(VAweather['date'], seasonal_model.fittedvalues, color='red', label='Fitted')
plt.legend()
plt.show()

# Model diagnostics
fig, ax = plt.subplots(2, 2)
sm.graphics.plot_regress_exog(seasonal_model, 't', fig=fig)
plt.show()

# ARIMA Model
e_ts_temp = seasonal_model.resid
fig, ax = plt.subplots(2, 1)
plot_acf(e_ts_temp, ax=ax[0])
plot_pacf(e_ts_temp, ax=ax[1])
plt.show()

# AR(1) model
ar1_model = ARIMA(e_ts_temp, order=(1, 0, 0)).fit()
print(ar1_model.summary())

# MA(2) model
ma2_model = ARIMA(e_ts_temp, order=(0, 0, 2)).fit()
print(ma2_model.summary())

# ARMA(1, 2) model
arma12_model = ARIMA(e_ts_temp, order=(1, 0, 2)).fit()
print(arma12_model.summary())

# Auto ARIMA
from pmdarima import auto_arima
auto_model = auto_arima(e_ts_temp, seasonal=False, trace=True)
print(auto_model.summary())

# ARIMA(1,1,1) model
arima111_model = ARIMA(e_ts_temp, order=(1, 1, 1)).fit()
print(arima111_model.summary())
