# Chapter 4 - The MA Model

## Listing 4-1. Importing Stock Price data using Yahoo Finance package

In [None]:
from pandas_datareader import data as pdr
import yfinance

data = pdr.get_data_yahoo('MSFT', start='2019-01-01', end='2019-12-31')
data = data['Close']


## Listing 4-2. Plotting the Stock Price data 

In [None]:
import matplotlib.pyplot as plt
ax = data.plot()
ax.set_ylabel("Stock Price")
plt.show()


## Listing 4-3. Computing the differenced data and plotting it

In [None]:
# Need to difference
data = data.diff().dropna()
ax = data.plot()
ax.set_ylabel("Daily Difference in Stock Price")
plt.show()


## Listing 4-4. Applying an ADF test to the differenced data 

In [None]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(data)
pvalue = result[1]
if pvalue < 0.05:
    print('stationary')
else:
    print('not stationary')


## Listing 4-5. Plotting the Autocorrelation Function and the Partial Autocorrelation Function

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(data, lags=20)
plot_pacf(data, lags=20)
plt.show()


## Listing 4-6. Fitting the MA model and plotting the forecast

In [None]:
from sklearn.metrics import r2_score
from statsmodels.tsa.arima.model import ARIMA

# Forecast the first MA(1) model
mod = ARIMA(data.diff().dropna(), order=(0,0,1))
res = mod.fit()

orig_data = data.diff().dropna()
pred = res.predict()

plt.plot(orig_data)
plt.plot(pred)
plt.show()

print(r2_score(orig_data, pred))


## Listing 4-7. Fitting the MA model on train data and evaluation the R2 score on train and test data

In [None]:
train = data.diff().dropna()[0:240]
test = data.diff().dropna()[240:250]

# Forecast the first MA(1) model
mod = ARIMA(train, order=(0,0,1))
res = mod.fit()

orig_data = data.diff().dropna()
pred = res.predict()
fcst = res.forecast(steps = len(test))

print(r2_score(train, pred))
print(r2_score(test, fcst))


## Listing 4-8. Plotting the out of sample forecast of the MA(1) model (MA with order 1)

In [None]:
plt.plot(list(test))
plt.plot(list(fcst))
plt.legend(['Actual Prices', 'Predicted Prices'])
plt.show()


## Listing 4-9. Estimating the error of the MA(1) model for 10 refitted one-step forecasts

In [None]:
import pandas as pd
train = data.diff().dropna()[0:240]
test = data.diff().dropna()[240:250]

# Import the ARMA module from statsmodels
from statsmodels.tsa.arima.model import ARIMA
fcst = []
for step in range(len(test)):
    # Forecast the first MA(1) model
    mod = ARIMA(train.reset_index(drop=True), order=(0,0,1))
    res = mod.fit()
    orig_data = data.diff().dropna()
    pred = res.predict()
    fcst += list(res.forecast(steps = 1))
    train = train.append(pd.Series(test[step]))
print(r2_score(list(test), fcst))
plt.plot(list(test))
plt.plot(fcst)
plt.legend(['Actual Prices', 'Predicted Prices'])
plt.show()


## Listing 4-10. Grid search to obtain the MA order that optimizes forecasting R2

In [None]:
def evaluate2(order):
    train = data.diff().dropna()[0:240]
    test = data.diff().dropna()[240:250]
    
    fcst = []
    for step in range(len(test)):
        # Forecast the first MA(1) model
        mod = ARIMA(train.reset_index(drop=True), order=(0,0,order))
        res = mod.fit()
        orig_data = data.diff().dropna()
        pred = res.predict()
        fcst += list(res.forecast(steps = 1))
        train = train.append(pd.Series(test[step]))

    return r2_score(list(test), fcst)

scores = []
for i in range(1, 21):
    scores.append((i, evaluate2(i)))
    
# observe best order is 4 with R2 of 0.566
scores = pd.DataFrame(scores)
print(scores[scores[1] == scores.max()[1]])


## Listing 4-11. Obtaining the final forecast

In [None]:
train = data.diff().dropna()[0:240]
test = data.diff().dropna()[240:250]

fcst = []
for step in range(len(test)):
    # Forecast the first MA(1) model
    mod = ARIMA(train.reset_index(drop=True), order=(0,0,4))
    res = mod.fit()


    orig_data = data.diff().dropna()
    pred = res.predict()

    fcst += list(res.forecast(steps = 1))

    train = train.append(pd.Series(test[step]))

print(r2_score(list(test), fcst))

plt.plot(list(test))
plt.plot(fcst)
plt.legend(['Actual Prices', 'Forecasted Prices'])
plt.show()
