In [1]:
import pandas as pd
%matplotlib inline
import cufflinks as cf
from statsmodels.tsa.stattools import acf, pacf, kpss
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

cf.go_offline()

In [2]:
entire_df = pd.read_csv('target-hospital-admissions.csv')
US_df = entire_df[entire_df["location_name"] == "US"]
US_df = US_df[['date','value']] # Since all the values are in the US, only need date and value columns
US_df['date'] = pd.to_datetime(US_df['date']) # Ensuring that the strings in date column are DateTime objects
US_df.set_index('date', inplace=True) # Eliminate the leading entry column by setting date as the index column

In [25]:
training_df1 = US_df.loc["2023-11-11": "2023-12-02"]
p_values = [1, 2, 3, 4]
d_values = [0, 1]
q_values = [1, 2, 3, 4]

In [30]:
#First step
order_df1 = pd.DataFrame(columns=["AIC", "VALUE", "DIFFERENCE"])

real_forecast = US_df["value"].loc["2023-12-09"]

import warnings
warnings.filterwarnings("ignore")

for p in p_values:
    for d in d_values:
        for q in q_values:
            index = f"({p},{d},{q})"
            
            #Creating model based on looped p, d, or q
            model = ARIMA(training_df1["value"], order = (p, d, q))
            fit = model.fit()
            
        
            
            #Forecasting for forecast MSE, MAE, MAPE
            forecast = fit.forecast(steps = 1)
            new_index = ["2023-12-09"]
            forecast.index = new_index # Changing index and ensuring it is DateTime object
            forecast.index = pd.to_datetime(forecast.index)
            
                        
            data = {"AIC": fit.aic, "VALUE": forecast.values[0],
            "DIFFERENCE": abs(forecast.values[0]-real_forecast)}
            
            order_df1.loc[index] = data

In [32]:
order_df1.sort_values(by="DIFFERENCE")

Unnamed: 0,AIC,VALUE,DIFFERENCE
"(4,1,3)",62.118277,5608.736786,1569.263214
"(4,1,2)",60.129687,5602.256237,1575.743763
"(4,1,4)",64.141783,5589.031975,1588.968025
"(3,1,2)",58.132086,5582.040564,1595.959436
"(4,1,1)",58.143148,5578.919022,1599.080978
"(3,1,3)",60.130842,5578.851454,1599.148546
"(3,1,4)",62.156282,5574.26688,1603.73312
"(3,1,1)",56.16881,5559.242662,1618.757338
"(3,0,1)",67.549985,4428.166386,2749.833614
"(4,0,1)",68.07902,4278.839966,2899.160034


In [55]:
#Second step
training_df2 = training_df1.loc["2023-11-18": "2023-12-02"]
training_df2.loc[pd.to_datetime("2023-12-09")] = {"value": 5608}
training_df2.sort_index(inplace = True)



order_df2 = pd.DataFrame(columns=["AIC", "VALUE", "DIFFERENCE"])

real_forecast = US_df["value"].loc["2023-12-16"]

import warnings
warnings.filterwarnings("ignore")

for p in p_values:
    for d in d_values:
        for q in q_values:
            index = f"({p},{d},{q})"
            
            #Creating model based on looped p, d, or q
            model = ARIMA(training_df2["value"], order = (p, d, q))
            fit = model.fit()
            
        
            
            #Forecasting for forecast MSE, MAE, MAPE
            forecast = fit.forecast(steps = 1)
            new_index = ["2023-12-16"]
            forecast.index = new_index # Changing index and ensuring it is DateTime object
            forecast.index = pd.to_datetime(forecast.index)
            
                        
            data = {"AIC": fit.aic, "VALUE": forecast.values[0],
            "DIFFERENCE": abs(forecast.values[0]-real_forecast)}
            
            order_df2.loc[index] = data

In [56]:
order_df2.sort_values(by="DIFFERENCE")

Unnamed: 0,AIC,VALUE,DIFFERENCE
"(2,1,1)",54.459663,6442.802183,3443.197817
"(2,1,2)",56.015769,6304.883039,3581.116961
"(1,1,1)",54.610938,6249.288899,3636.711101
"(2,1,3)",57.829235,6234.779228,3651.220772
"(2,1,4)",59.882773,6164.745004,3721.254996
"(1,1,4)",58.010729,6128.968949,3757.031051
"(3,1,4)",61.794722,6000.726043,3885.273957
"(3,1,3)",59.553785,5997.614491,3888.385509
"(4,1,4)",63.558038,5973.899285,3912.100715
"(1,1,3)",56.878213,5928.151645,3957.848355


In [51]:
#Third step
training_df3 = training_df2.loc["2023-11-25": "2023-12-09"]
training_df3.loc[pd.to_datetime("2023-12-16")] = {"value": 4770}




order_df3 = pd.DataFrame(columns=["AIC", "VALUE", "DIFFERENCE"])

real_forecast = US_df["value"].loc["2023-12-23"]

import warnings
warnings.filterwarnings("ignore")

for p in p_values:
    for d in d_values:
        for q in q_values:
            index = f"({p},{d},{q})"
            
            #Creating model based on looped p, d, or q
            model = ARIMA(training_df3["value"], order = (p, d, q))
            fit = model.fit()
            
        
            
            #Forecasting for forecast MSE, MAE, MAPE
            forecast = fit.forecast(steps = 1)
            new_index = ["2023-12-23"]
            forecast.index = new_index # Changing index and ensuring it is DateTime object
            forecast.index = pd.to_datetime(forecast.index)
            
                        
            data = {"AIC": fit.aic, "VALUE": forecast.values[0],
            "DIFFERENCE": abs(forecast.values[0]-real_forecast)}
            
            order_df3.loc[index] = data

In [59]:
order_df3.sort_values(by = "DIFFERENCE")

Unnamed: 0,AIC,VALUE,DIFFERENCE
"(4,1,4)",61.516952,5632.571289,9501.428711
"(4,1,3)",58.341933,5560.192294,9573.807706
"(2,1,1)",50.344366,5559.220778,9574.779222
"(3,1,4)",57.111451,5419.641546,9714.358454
"(1,1,1)",48.17217,5361.486395,9772.513605
"(2,1,3)",53.183423,5358.885046,9775.114954
"(1,1,2)",49.194721,5357.947865,9776.052135
"(3,1,3)",56.131199,5357.921797,9776.078203
"(2,1,4)",55.143486,5324.648651,9809.351349
"(1,1,4)",52.804327,5323.461598,9810.538402


In [60]:
#Third step
training_df4 = training_df3.loc["2023-12-02": "2023-12-16"]
training_df4.loc[pd.to_datetime("2023-12-23")] = {"value": 5632}




order_df4 = pd.DataFrame(columns=["AIC", "VALUE", "DIFFERENCE"])

real_forecast = US_df["value"].loc["2023-12-30"]

import warnings
warnings.filterwarnings("ignore")

for p in p_values:
    for d in d_values:
        for q in q_values:
            index = f"({p},{d},{q})"
            
            #Creating model based on looped p, d, or q
            model = ARIMA(training_df4["value"], order = (p, d, q))
            fit = model.fit()
            
        
            
            #Forecasting for forecast MSE, MAE, MAPE
            forecast = fit.forecast(steps = 1)
            new_index = ["2023-12-30"]
            forecast.index = new_index # Changing index and ensuring it is DateTime object
            forecast.index = pd.to_datetime(forecast.index)
            
                        
            data = {"AIC": fit.aic, "VALUE": forecast.values[0],
            "DIFFERENCE": abs(forecast.values[0]-real_forecast)}
            
            order_df4.loc[index] = data

In [61]:
order_df4.sort_values(by = "DIFFERENCE")

Unnamed: 0,AIC,VALUE,DIFFERENCE
"(3,1,3)",57.454676,6089.597418,14940.402582
"(3,1,4)",59.141816,5862.442637,15167.557363
"(1,1,4)",55.345671,5791.224259,15238.775741
"(3,0,2)",63.281431,5750.039627,15279.960373
"(4,1,1)",47.479606,5742.701739,15287.298261
"(2,0,1)",53.271571,5736.636387,15293.363613
"(3,1,1)",42.355167,5733.034046,15296.965954
"(2,1,1)",43.259082,5731.374512,15298.625488
"(3,0,1)",62.738999,5730.21686,15299.78314
"(4,1,2)",48.776079,5729.593502,15300.406498


In [62]:
forecast = pd.Series(data = [5608.736786, 6442.802183, 5632.571289, 6089.597418], index=pd.to_datetime(["2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30"]))



arima_df = pd.DataFrame(index = training_df.index)
new_index = arima_df.index.append(forecast.index)
arima_df = arima_df.reindex(new_index)
arima_df["Training Data"] = training_df.iloc[:, 0]

arima_df["Real Data"] = US_df.loc[forecast.index]
arima_df["ARIMA Prediction"] = forecast
arima_df.sort_index(inplace=True)
arima_df

Unnamed: 0,Training Data,Real Data,ARIMA Prediction
2023-11-11,2695.0,,
2023-11-18,3422.0,,
2023-11-25,4240.0,,
2023-12-02,5752.0,,
2023-12-09,,7178.0,5608.736786
2023-12-16,,9886.0,6442.802183
2023-12-23,,15134.0,5632.571289
2023-12-30,,21030.0,6089.597418


In [65]:
arima_df.iplot(yTitle = "Count", xTitle = "Date", title = "ARIMA Prediction Using Single Point Approach")