# Chapter 5 - The ARMA Model

## Listing 5-1. Getting the Sunspot data into Python

In [None]:
import pandas as pd
data = pd.read_csv('Ch05_Sunspots_database.csv', usecols = [1, 2])


## Listing 5-2. Aggregating the sunspot data to yearly data

In [None]:
data['year'] = data.Date.apply(lambda x: x[:4])
data = data[['Monthly Mean Total Sunspot Number', 'year']].groupby('year').sum()
data.head()


## Listing 5-3. Plotting the yearly sunspot data

In [None]:
import matplotlib.pyplot as plt
ax = data.plot()
ax.set_ylabel('Sunspots')
plt.show()


## Listing 5-4. Applying the ADF test to the sunspot yearly totals

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(data['Monthly Mean Total Sunspot Number'])
print(result)

pvalue = result[1]

if pvalue < 0.05:
    print('stationary')
else:
    print('not stationary')


## Listing 5-7. Creating the ACF and PACF plots

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt

plot_acf(data['Monthly Mean Total Sunspot Number'], lags=40)

plot_pacf(data['Monthly Mean Total Sunspot Number'], lags=40)

plt.show()


## Listing 5-8. Fitting the ARMA(1,1) model

In [None]:
from sklearn.metrics import r2_score
from statsmodels.tsa.arima.model import ARIMA

# Forecast the first ARMA(1,1) model
mod = ARIMA(list(data['Monthly Mean Total Sunspot Number']), order=(1,0,1))
res = mod.fit()
pred = res.predict()
print(r2_score(data, pred))

plt.plot(list(data['Monthly Mean Total Sunspot Number']))
plt.plot(pred)
plt.legend(['Actual Sunspots', 'Predicted Sunspots'])
plt.xlabel('Timesteps')
plt.show()


## Listing 5-9. Plotting a histrogram of the residuals

In [None]:
ax = pd.Series(res.resid).hist()
ax.set_ylabel('Number of occurences')
ax.set_xlabel('Residual')
plt.show()


## Listing 5-10. Obtaining the summary table of your model’s fit

In [None]:
res.summary()

## Listing 5-11. Grid search with cross-validation for optimal p and q

In [None]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
data_array = data.values

avg_errors = []

for p in range(13):
    for q in range(13):
        
        errors = []
        
        tscv = TimeSeriesSplit(test_size=10)
        
        for train_index, test_index in tscv.split(data_array):
            
            X_train, X_test = data_array[train_index], data_array[test_index]
            X_test_orig = X_test
            
            fcst = []
            for step in range(10):
                
                try:
                    mod = ARIMA(X_train, order=(p,0,q))
                    res = mod.fit()

                    fcst.append(res.forecast(steps=1))
                    
                except:
                    print('errorred')
                    fcst.append(-9999999.)

                X_train = np.concatenate((X_train, X_test[0:1,:]))
                X_test = X_test[1:]
                
            errors.append(r2_score(X_test_orig, fcst))
            
        pq_result = [p, q, np.mean(errors)]
        
        print(pq_result)
        avg_errors.append(pq_result)

avg_errors = pd.DataFrame(avg_errors)
avg_errors.columns = ['p', 'q', 'error']
result = avg_errors.pivot(index='p', columns='q')


## Listing 5-12. Showing the test prediction of the final model

In [None]:
data_array = data.values
X_train, X_test = data_array[:-10], data_array[-10:]
X_test_orig = X_test

fcst = []
for step in range(10):
    mod = ARIMA(X_train, order=(10,0,9))
    res = mod.fit()
    fcst.append(res.forecast(steps=1))
    X_train = np.concatenate((X_train, X_test[0:1,:]))
    X_test = X_test[1:]

plt.plot(X_test_orig)
plt.plot(fcst)
plt.legend(['Actual Sunspots', 'Predicted Sunspots'])
plt.xlabel('Time steps of test data')
plt.show()
