In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

import cufflinks as cf
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

cf.go_offline()

%matplotlib inline
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

from neuralforecast.auto import AutoLSTM
from neuralforecast.tsdataset import TimeSeriesDataset

from datetime import datetime, timedelta

In [None]:
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, AutoCES, AutoETS
from statsforecast.arima import arima_string

In [None]:
class FixedAUTOModelProcessor:
    def __init__(self, overall_df, dates, model_type = "ARIMA"):
        self.overall_df = overall_df
        self.overall_df_value_col = "value"
        self.dates = dates
        self.model_type = model_type
        
        self.dfs = []
        self.sf = None
        self.forecast = None
        self.plotting_df = pd.DataFrame()
        
        self.mae = None
        self.mse = None
        self.mape = None
        self.nmse = None
        
        self.metrics_df = pd.DataFrame(columns = ["Reference Date", "MAE", "MSE", "MAPE", "NMSE"])
        self.display_df = pd.DataFrame(columns = ["Reference Date", "Target End Date", "Prediction"])
    
    def create_training_dfs(self, value_col):
        self.overall_df_value_col = value_col
        for date in self.dates:
            df = self.overall_df.loc[:date]
            df['ds'] = df.index
            df["unique_id"] = "series_1"
            df = df.rename(columns = {value_col: "y"})
            self.dfs.append(df)
    
    def create_fixed_model(self, h, freq, season_length, model_name):
        models = []
        if self.model_type == "ARIMA":
            self.sf = StatsForecast(models=[AutoARIMA(season_length=season_length)], freq = freq)
        elif self.model_type == "CES":
            self.sf = StatsForecast(models=[AutoCES(season_length=season_length)], freq = freq)
        elif self.model_type == "ETS":
            self.sf = StatsForecast(models=[AutoETS(model = "AZN")], freq = freq)
        
        self.sf.fit(self.dfs[0])
        
        start_date = datetime.strptime(self.dates[0], "%Y-%m-%d")
        ending_date = datetime.strptime(self.dates[-1], "%Y-%m-%d") + timedelta(weeks = h)

        prediction_horizon = abs((ending_date - start_date).days) // 7
        
        self.forecast = self.sf.predict(h = prediction_horizon)
        self.forecast.set_index("ds", inplace = True)
        
        #self.sf.save(path = f"./{self.model_type}/fixed_models/{model_name}.ckpt")
        
    
    def create_graph(self):
        self.plotting_df.index = self.overall_df.index
        self.plotting_df["Real Data"] = self.overall_df[self.overall_df_value_col]
        
        if self.model_type == "ARIMA":
            self.plotting_df[f"Fixed {self.model_type} Model"] = self.forecast["AutoARIMA"]
        elif self.model_type == "CES":
            self.plotting_df[f"Fixed {self.model_type} Model"] = self.forecast["CES"]
        elif self.model_type == "ETS":
            self.plotting_df[f"Fixed {self.model_type} Model"] = self.forecast["AutoETS"]
        
        self.plotting_df.iplot(xTitle = "Date", yTitle = "Count", title = f"Fixed Parameter {self.model_type} Predictions")
    
    def create_metrics(self):
        col_string = ""
        if self.model_type == "ARIMA":
            col_string = "AutoARIMA"
        elif self.model_type == "CES":
            col_string = "CES"
        elif self.model_type == "ETS":
            col_string = "AutoETS"
        
        self.mae = mean_absolute_error(self.overall_df[self.overall_df_value_col].loc[self.forecast.index], self.forecast[col_string])
        self.mse = mean_squared_error(self.overall_df[self.overall_df_value_col].loc[self.forecast.index], self.forecast[col_string])
        self.mape = mean_absolute_percentage_error(self.overall_df[self.overall_df_value_col].loc[self.forecast.index], self.forecast[col_string])
        self.nmse = self.mse/np.var(self.overall_df[self.overall_df_value_col].loc[self.forecast.index])

    def create_metrics_df(self):
        self.metrics_df.loc[len(self.metrics_df)] = [self.dates[0], self.mae, self.mse, self.mape, self.nmse]
    
    def create_display_df(self):
        for index, row in self.forecast.iterrows():
            reference_date = self.dates[0]
            target_end_date = index
            value = row[1]
            self.display_df.loc[len(self.display_df)] = [reference_date, target_end_date, value]

In [None]:
updated_df = pd.read_csv("https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/refs/heads/main/target-data/target-hospital-admissions.csv")

In [None]:
updated_df = updated_df[updated_df["location_name"] == "US"]
updated_df = updated_df[["date", "value"]]
updated_df["date"] = pd.to_datetime(updated_df["date"])
updated_df.set_index("date", inplace = True)
updated_df.sort_values(by = "date", inplace = True)

In [None]:
updated_df.iplot()

In [None]:
Processor = FixedAUTOModelProcessor(overall_df = updated_df, dates = ["2024-10-05", "2024-10-19", "2024-11-02", "2024-11-16", "2024-12-07"], model_type = "ARIMA")

In [None]:
Processor.dates

In [None]:
Processor.dfs

In [None]:
Processor.create_training_dfs(value_col = "value")

In [None]:
Processor.create_fixed_model(h = 4, freq = "W-SAT", season_length = 52, model_name = "testing")

In [None]:
Processor.dfs[1]

In [None]:
Processor.forecast

In [None]:
Processor.create_graph()

In [None]:
Processor.create_metrics()

In [None]:
Processor.create_metrics_df()

In [None]:
Processor.metrics_df

In [None]:
Processor.create_display_df()

In [None]:
Processor.display_df

In [None]:
class UpdatingAUTOModelProcessor:
    def __init__(self, overall_df, dates, model_type = "ARIMA"):
        self.overall_df = overall_df
        self.overall_df_value_col = "value"
        self.dates = dates
        self.model_type = model_type
        
        self.dfs = []
        self.sf = None
        self.forecasts = []
        self.plotting_df = pd.DataFrame()
        
        self.maes = []
        self.mses = []
        self.mapes = []
        self.nmses = []
        
        self.metrics_df = pd.DataFrame(columns = ["Reference Date", "MAE", "MSE", "MAPE", "NMSE"])
        self.display_df = pd.DataFrame(columns = ["Reference Date", "Target End Date", "Prediction"])
    
    def create_training_dfs(self, value_col):
        self.overall_df_value_col = value_col
        for date in self.dates:
            df = self.overall_df.loc[:date]
            df['ds'] = df.index
            df["unique_id"] = "series_1"
            df = df.rename(columns = {value_col: "y"})
            self.dfs.append(df)
            
    def create_models(self, h, freq, season_length, model_names):
        if self.model_type == "ARIMA":
            self.sf = StatsForecast(models=[AutoARIMA(season_length=season_length)], freq = freq)
        elif self.model_type == "CES":
            self.sf = StatsForecast(models=[AutoCES(season_length=season_length)], freq = freq)
        elif self.model_type == "ETS":
            self.sf = StatsForecast(models=[AutoETS(model = "AZN")], freq = freq)
        
        for i in range(len(self.dfs)):
            fc = self.sf.forecast(df = self.dfs[i], h = h)
            fc.set_index("ds", inplace = True)
            self.forecasts.append(fc)
            #self.sf.save(path = f"./{self.model_type}/updating_models/{model_names[i]}.ckpt")
    
    def create_graph(self):
        self.plotting_df.index = self.overall_df.index
        self.plotting_df["Real Data"] = self.overall_df[self.overall_df_value_col]
        
        for i in range(len(self.forecasts)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            
            if self.model_type == "ARIMA":
                self.plotting_df[f"{english_date} Model"] = self.forecasts[i]["AutoARIMA"]
            elif self.model_type == "CES":
                self.plotting_df[f"{english_date} Model"] = self.forecasts[i]["CES"]
            elif self.model_type == "ETS":
                self.plotting_df[f"{english_date} Model"] = self.forecasts[i]["AutoETS"]
        
        self.plotting_df.iplot(xTitle = "Date", yTitle = "Count", title = f"Fixed Parameter {self.model_type} Predictions")
        
    def calculate_metrics(self):
        col_string = ""
        if self.model_type == "ARIMA":
            col_string = "AutoARIMA"
        elif self.model_type == "CES":
            col_string = "CES"
        elif self.model_type == "ETS":
            col_string = "AutoETS"
        
        for i in range(len(self.forecasts)):
            mae = mean_absolute_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][col_string])
            mse = mean_squared_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][col_string])
            mape = mean_absolute_percentage_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][col_string])
            nmse = mse/np.var(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index])
            
            self.maes.append(mae)
            self.mses.append(mse)
            self.mapes.append(mape)
            self.nmses.append(nmse)
    
    def create_metrics_df(self):
        for i in range(len(self.dates)):
            self.metrics_df.loc[len(self.metrics_df)] = [self.dates[i], self.maes[i], self.mses[i], self.mapes[i], self.nmses[i]]
    
    def create_display_df(self):
        for i in range(len(self.forecasts)):
            for index, row in self.forecasts[i].iterrows():
                reference_date = self.dates[i]
                target_end_date = index
                value = row[1]
                self.display_df.loc[len(self.display_df)] = [reference_date, target_end_date, value]

In [None]:
updated_df = pd.read_csv("https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/refs/heads/main/target-data/target-hospital-admissions.csv")

In [None]:
updated_df = updated_df[updated_df["location_name"] == "US"]
updated_df = updated_df[["date", "value"]]
updated_df["date"] = pd.to_datetime(updated_df["date"])
updated_df.set_index("date", inplace = True)
updated_df.sort_values(by = "date", inplace = True)

In [None]:
UProcessor = UpdatingAUTOModelProcessor(overall_df = updated_df, dates = ["2024-10-05", "2024-10-19", "2024-11-02", "2024-11-16", "2024-12-07"], model_type = "ETS")

In [None]:
UProcessor.create_training_dfs(value_col = "value")

In [None]:
UProcessor.create_models(h = 4, freq = "W-SAT", season_length = 52, model_name = "test")

In [None]:
UProcessor.forecasts[4]

In [None]:
UProcessor.create_graph()

In [None]:
UProcessor.calculate_metrics()

In [None]:
UProcessor.create_metrics_df()

In [None]:
UProcessor.create_display_df()

In [None]:
UProcessor.metrics_df

In [None]:
UProcessor.display_df

In [None]:
StatsForecast.save?