# Import necessary libraries

In [None]:
#importing important libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 
import seaborn as sns

# Read the data

Read the input hospital data and then set the index to the 'Date' column.

Aggregate the 3 German States Holiday columns for both School and Public Holidays into one single column respectively

Train Test Split for Model Preparation

In [None]:
#loading and conveting time series data by setting index as date
df = pd.read_csv('ProcessedDataset.csv')
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
df.index = df.Date
#School holidays Aggregation
holidays_S = pd.DataFrame({'School_holiday': df[["S_BW", "S_H","S_RP"]].max(axis=1)})
holidays_S = holidays_S[["School_holiday"]]
holidays_P = pd.DataFrame({'Public_holiday': df[["P_BW", "P_H","P_RP"]].max(axis=1)})
holidays = holidays_P[["Public_holiday"]]
holidays['School_holiday'] = holidays_S.School_holiday

#Consider only the Occupancy
df = df.drop('Date', axis=1)
df = df[['Occupancy']]

Train Test Split

In [None]:
#divide into train and test set
train = df['20080101':'20121231']
test =  df['20130101':'20130130']
test1 = df['20130101':'20130130']
test2 = df['20130130':'20131231']
#Holidays
holidays_train = holidays['20080101':'20121231']
holidays_test = holidays['20130101':'20130130']

In [None]:
#visualizing timeseries data
fig, ax = plt.subplots(figsize=(12,5))
train.plot(ax=ax,color='g',legend=None)
test1.plot(ax=ax,color='b',legend=None)
test2.plot(ax=ax,color='r',legend=None)
plt.axvline(x='2013-01-01',linewidth=1.1, color='black',linestyle='--')
plt.axvline(x='2013-01-30',linewidth=1.1, color='black',linestyle='--')
plt.axvspan('2013-01-30','2013-12-31', facecolor='r', alpha=0.3)
plt.axvspan('2013-01-01','2013-01-30', facecolor='b', alpha=0.3)
plt.axvspan('2008-01-01','2012-12-31', facecolor='g', alpha=0.3)
plt.title('Train and Evaluation Split')
plt.xlabel('')
plt.ylabel('Occupancy')
plt.savefig('train_test_split.png')

## Yearly Seasonality

In [None]:
# A = Train_df_A.plot(x='ds',y='y',label='Admissions',legend=True)
# D= Train_df_D.plot(x='ds',y='y',label='Discharges',legend=True,ax=A)
# Train_df_O.plot(x='ds',y='y',label='Occupancy',legend=True,ax=D,figsize=(16,6))
# plt.xlabel('')
# plt.savefig('Yearly_seasonality.png')

# Individual Plots

#### ARIMA Occupancy

In [None]:
#Load the output
ar_o_pred = np.array([114.52319146, 116.79080668, 115.9826378 , 113.40991486,
       112.95176448, 113.268716  , 113.43769586, 113.91884535,
       114.21797002, 114.05411974, 113.86233834, 113.77674961,
       113.72470498, 113.74753996, 113.81206347, 113.83688908,
       113.82687624, 113.80854327, 113.78670402, 113.77178524,
       113.76919137, 113.77027805, 113.76894436, 113.76494484,
       113.75841658, 113.75074066, 113.7439766 , 113.73845609,
       113.73350018, 113.72862268])
ar_o_pred = pd.DataFrame(ar_o_pred)
ar_o_pred.index=test.index

#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
ar_o_pred.plot(ax=ax)
test.plot(ax=ax)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["Predictions", "Test"])
plt.title('ARIMAX Occupancy Test vs Predictions Plot')
# plt.savefig('Model1_ARIMA_O.png')

#### ARIMA (Admisisons + Discharges)

In [None]:
#Load the output
ar_ad_pred = pd.read_csv('O_df_ARIMA.csv',usecols=['Occupancy'])
ar_ad_pred.index=test.index

#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
ar_ad_pred.plot(ax=ax)
test.plot(ax=ax)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["Predictions", "Test"]);
plt.title('ARIMAX Aggregated Model Test vs Predictions Plot')

#### Fb Prophet Occupancy

In [None]:
#Load the output
fb_o_pred = pd.read_csv('forecast_fbprophet.csv',usecols=['yhat'])
fb_o_pred.index=test.index

#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
fb_o_pred.plot(ax=ax)
test.plot(ax=ax)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["Predictions", "Test"]);
plt.title('Prophet Occupancy Test vs Predictions Plot')
plt.savefig('Model2_Prophet_O.png')

#### FbProphet (Admisisons + Discharges)

In [None]:
#Load the output
fb_ad_pred = pd.read_csv('forecast_ad_fbprophet.csv',usecols=['Occupancy'])
fb_ad_pred.index=test.index

#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
fb_ad_pred.plot(ax=ax)
test.plot(ax=ax)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["Predictions", "Test"])
plt.title('Prophet Aggregated Model Test vs Predictions Plot')

# Comparison Plots

Comparison plot between ARIMA Occupancy vs ARIMA (Admisisons + Discharges) Models

In [None]:
#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
ar_o_pred.plot(ax=ax)
ar_ad_pred.plot(ax=ax,color='orange')
test.plot(ax=ax,color='r',linewidth=4)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["ARIMA Predictions","ARIMA A+D Predictions","Actual"]);

Comparison plot between FBProphet Occupancy vs FBProphet (Admisisons + Discharges) Models

In [None]:
#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
fb_o_pred.plot(ax=ax)
fb_ad_pred.plot(ax=ax,color='orange')
test.plot(ax=ax,color='r',linewidth=4)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["FB Predictions","FB A+D Predictions","Actual"]);

Comparison plot between FBProphet Occupancy vs ARIMA Occupancy Models

In [None]:
#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
fb_o_pred.plot(ax=ax)
ar_o_pred.plot(ax=ax,color='orange')
test.plot(ax=ax,color='r',linewidth=4)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["FB Predictions","ARIMA Predictions","Actual"]);

Comparison plot between FBProphet (Admisisons + Discharges) vs ARIMA (Admisisons + Discharges) Models

In [None]:
#visualizing timeseries data
fig, ax = plt.subplots(figsize=(8,4))
fb_ad_pred.plot(ax=ax)
ar_ad_pred.plot(ax=ax,color='orange')
test.plot(ax=ax,color='r',linewidth=4)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["FB A+D Predictions","ARIMA A+D Predictions","Actual"]);

All the 4 combined models plots 

In [None]:
#visualizing timeseries data
fig, ax = plt.subplots(figsize=(10,6))
fb_o_pred.plot(ax=ax)
ar_o_pred.plot(ax=ax,color='orange')
ar_ad_pred.plot(ax=ax,color='green')
test.plot(ax=ax,color='r',linewidth=4)
fb_ad_pred.plot(ax=ax,color='blue',linewidth=4)
plt.xlabel('Date')
plt.ylabel('Occupancy')
ax.legend(["FB Predictions","ARIMA Predictions","ARIMA A+D Predictions","Actual","FB A+D Predictions"]);

# MAPE Bar Plot for 4 models 

In [None]:
MAPE = pd.DataFrame(columns=['Model','Mape_Test','Mape_Train'])
MAPE['Model'] = ['ARIMAX Occupancy','Prophet Occupancy','ARIMAX Aggregated Model','Prophet Aggregated Model']
MAPE['Mape_Test'] = [10.61,22.18,16.45,5.32]
MAPE['Mape_Train'] = [25.77,10.54,53.19,15.64]
#visualizing timeseries data
plt.figure(figsize=(10,5))
bar = sns.barplot(x="Model", y="Mape_Test", data=MAPE,palette='Set1')
# for index, row in MAPE.iterrows():
#   bar.text( row.Model, row.Mape_Test, row.Mape_Test, color='black', ha="center")
bar.set_xticklabels(bar.get_xticklabels())
plt.ylabel('MAPE')
plt.title('MAPE values comparison of 4 models')

# Holidays Effect Check on 2005 data

In [None]:
data = pd.read_csv("2005data.csv")
ax= data.plot(x='Month',y='Occupancy')
plt.title('Occupancy plot for 2005')
data.plot(x='Month',y='Admissions')
plt.title('Admissions plot for 2005')
data.plot(x='Month',y='Discharges')
plt.title('Discharges plot for 2005')