In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pmdarima as pm
import statsmodels as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

In [2]:
%%capture
# create loading bar
from tqdm.notebook import tqdm
tqdm().pandas()

In [3]:
count_ig_csv = 'emerging_risks_doc_count_instagram.csv'
count_tw_csv = 'emerging_risks_doc_count_twitter.csv'
engagement_fb_csv = 'emerging_risks_local_engagement_facebook.csv'
engagement_ig_csv = 'emerging_risks_local_engagement_instagram.csv'
engagement_tw_csv = 'emerging_risks_local_engagement_twitter.csv'
files = [count_ig_csv, count_tw_csv, engagement_fb_csv, engagement_ig_csv, engagement_tw_csv]

In [4]:
df = pd.read_csv(engagement_tw_csv)
df.date = pd.to_datetime(df.date)

In [5]:
def arima_predict(ser: pd.Series, p: int, d: int, q: int, train_size: float = 0.66) -> dict:
    X = ser.values
    size = int(len(X) * train_size)
    train, test = X[0:size], X[size:len(X)]
    history = [x for x in train]
    predictions = list()

    for t in tqdm(range(len(test))):
        model = ARIMA(history, order=(p, d, q))
        model_fit = model.fit(disp=False) # disp=False means no convergence output
        output = model_fit.forecast()
        yhat = output[0]
        if yhat is None:
            raise ValueError("Hessian could not be inverted and no value could be forecasted.") # maybe try in R?
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
    
    error = mean_squared_error(test, predictions) # THE LOWER THE BETTER! Current record: 14545 by Lorenzo
    print(f'Test MSE: {error:.3f}')
    return {'test': test, 'predictions': predictions, 'MSE': error}

def plot_arima_predict(arima_predict_result: dict) -> None:
    # plot
    plt.plot(arima_predict_result["test"])
    plt.plot(arima_predict_result["predictions"], color='red')
    plt.show()

In [None]:
plot_arima_predict(arima_predict(df.Pesticides), 1,1,0)

In [None]:
plot_arima_predict(arima_predict(df.Pesticides[:-3].rolling(7).mean()[7:], 7, 1, 0))