In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

import cufflinks as cf
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

cf.go_offline()

%matplotlib inline
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

from neuralforecast.auto import AutoLSTM
from neuralforecast.tsdataset import TimeSeriesDataset

from datetime import datetime, timedelta

In [None]:
class FixedModelLSTMProcessor:
    def __init__(self, overall_df, dates):
        self.overall_df = overall_df
        self.overall_df_value_col = "value"
        self.dates = dates
        self.dfs = []
        self.forecasts = []
        self.plotting_df = pd.DataFrame()
        
        self.maes = []
        self.mses = []
        self.mapes = []
        self.nmses = []
        
        self.metrics_df = pd.DataFrame(columns = ["Reference Date", "MAE", "MSE", "MAPE", "NMSE"])
        self.display_df = pd.DataFrame(columns = ["Reference Date", "Target End Date", "Prediction"])
    
    def create_training_dfs(self, value_col):
        self.overall_df_value_col = value_col
        for date in self.dates:
            df = self.overall_df.loc[:date]
            df['ds'] = df.index
            df["unique_id"] = "series_1"
            df = df.rename(columns = {value_col: "y"})
            self.dfs.append(df)
        
    
    def create_fixed_model(self, h, model_name):
        #Creating AutoLSTM model and predicting with hyperparameter tuning by optuna backend. This is based upon the first training dataframe
        initial_dataset, *_ = TimeSeriesDataset.from_df(self.dfs[0])
        LSTMmodel = AutoLSTM(h = h, backend = "optuna")
        LSTMmodel.fit(dataset = initial_dataset)
        
        for i in range(len(self.dfs)):
            dataset, *_ = TimeSeriesDataset.from_df(self.dfs[i])
            y_hat = LSTMmodel.predict(dataset = dataset)
            start_date = datetime.strptime(self.dates[i], "%Y-%m-%d")
            horizon_dates = [start_date + timedelta(weeks=j) for j in range(1, h+1)]
        
            fc = pd.DataFrame(y_hat.ravel(), index = pd.to_datetime(horizon_dates))
            self.forecasts.append(fc)
        
        LSTMmodel.save(path=f'./AutoLSTM/fixed_models/{model_name}.ckpt')
    
    def create_graph(self):
        self.plotting_df.index = self.overall_df.index
        self.plotting_df["Real Data"] = self.overall_df[self.overall_df_value_col]
        
        for i in range(len(self.forecasts)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            
            self.plotting_df[f"{english_date} Model"] = self.forecasts[i][0]
        
        self.plotting_df.iplot(xTitle = "Date", yTitle = "Count", title = "Fixed Parameter LSTM Predictions")
    
    def calculate_metrics(self):
        for i in range(len(self.forecasts)):
            mae = mean_absolute_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][0])
            mse = mean_squared_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][0])
            mape = mean_absolute_percentage_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][0])
            nmse = mse/np.var(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index])
            
            self.maes.append(mae)
            self.mses.append(mse)
            self.mapes.append(mape)
            self.nmses.append(nmse)
    
    def display_maes(self):
        for i in range(len(self.maes)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Mean Absolute Error for {english_date} model: {self.maes[i]}")
        
    def display_mses(self):
        for i in range(len(self.mses)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Mean Squared Error for {english_date} model: {self.mses[i]}")
    
    def display_mapes(self):
        for i in range(len(self.mapes)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Mean Absolute Percentage Error for {english_date} model: {self.mapes[i]}")
    
    def display_nmses(self):
        for i in range(len(self.nmses)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Normalized Mean Square Error for {english_date} model: {self.nmses[i]}")
    
    def create_metrics_df(self):
        for i in range(len(self.dates)):
            self.metrics_df.loc[len(self.metrics_df)] = [self.dates[i], self.maes[i], self.mses[i], self.mapes[i], self.nmses[i]]
        
            
    def create_display_df(self):
        for i in range(len(self.forecasts)):
            for index, row in self.forecasts[i].iterrows():
                reference_date = self.dates[i]
                target_end_date = index
                value = row[0]
                self.display_df.loc[len(self.display_df)] = [reference_date, target_end_date, value]
                
            

In [None]:
updated_df = pd.read_csv("https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/refs/heads/main/target-data/target-hospital-admissions.csv")

In [None]:
updated_df = updated_df[updated_df["location_name"] == "US"]
updated_df = updated_df[["date", "value"]]
updated_df["date"] = pd.to_datetime(updated_df["date"])
updated_df.set_index("date", inplace = True)
updated_df.sort_values(by = "date", inplace = True)

In [None]:
updated_df.iplot()

In [None]:
Processor = FixedModelLSTMProcessor(overall_df = updated_df, dates = ["2024-10-05", "2024-10-19", "2024-11-02", "2024-11-16", "2024-12-07"])

In [None]:
Processor.dates

In [None]:
Processor.dfs

In [None]:
Processor.create_training_dfs(value_col = "value")

In [None]:
Processor.dfs[0]

In [None]:
Processor.create_fixed_model(h = 4, model_name = "test_model")

In [None]:
Processor.forecasts[2]

In [None]:
Processor.create_graph()

In [None]:
Processor.plotting_df.iloc[-16:]

In [None]:
Processor.forecasts[1]

In [None]:
Processor.overall_df

In [None]:
Processor.overall_df["value"].loc[Processor.forecasts[1].index]

In [None]:
Processor.calculate_metrics()

In [None]:
Processor.maes

In [None]:
Processor.mses

In [None]:
Processor.mapes

In [None]:
Processor.create_display_df()

In [None]:
Processor.display_df

In [None]:
Processor.create_metrics_df()

In [None]:
Processor.metrics_df

In [None]:
class UpdatingModelLSTMProcessor:
    def __init__(self, overall_df, dates):
        self.overall_df = overall_df
        self.overall_df_value_col = "value"
        self.dates = dates
        self.dfs = []
        self.forecasts = []
        self.plotting_df = pd.DataFrame()
        
        self.maes = []
        self.mses = []
        self.mapes = []
        self.nmses = []
        
        self.metrics_df = pd.DataFrame(columns = ["Reference Date", "MAE", "MSE", "MAPE", "NMSE"])
        self.display_df = pd.DataFrame(columns = ["Reference Date", "Target End Date", "Prediction"])
    
    def create_training_dfs(self, value_col):
        self.overall_df_value_col = value_col
        for date in self.dates:
            df = self.overall_df.loc[:date]
            df['ds'] = df.index
            df["unique_id"] = "series_1"
            df = df.rename(columns = {value_col: "y"})
            self.dfs.append(df)
    
    def create_models(self, h, model_names):
        for i in range(len(self.dfs)):
            dataset, *_ = TimeSeriesDataset.from_df(self.dfs[i])
            LSTMmodel = AutoLSTM(h = h, backend = "optuna")
            LSTMmodel.fit(dataset = dataset)
            y_hat = LSTMmodel.predict(dataset = dataset)
            start_date = datetime.strptime(self.dates[i], "%Y-%m-%d")
            horizon_dates = [start_date + timedelta(weeks=j) for j in range(1, h+1)]
            
            fc = pd.DataFrame(y_hat.ravel(), index = pd.to_datetime(horizon_dates))
            self.forecasts.append(fc)
            LSTMmodel.save(path=f'./AutoLSTM/fixed_models/{model_names[i]}.ckpt')
    
    def create_graph(self):
        self.plotting_df.index = self.overall_df.index
        self.plotting_df["Real Data"] = self.overall_df[self.overall_df_value_col]
        
        for i in range(len(self.forecasts)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            
            self.plotting_df[f"{english_date} Model"] = self.forecasts[i][0]
        
        self.plotting_df.iplot(xTitle = "Date", yTitle = "Count", title = "Updating Parameter LSTM Predictions")
    
    def calculate_metrics(self):
        for i in range(len(self.forecasts)):
            mae = mean_absolute_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][0])
            mse = mean_squared_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][0])
            mape = mean_absolute_percentage_error(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index], self.forecasts[i][0])
            nmse = mse/np.var(self.overall_df[self.overall_df_value_col].loc[self.forecasts[i].index])
            
            self.maes.append(mae)
            self.mses.append(mse)
            self.mapes.append(mape)
            self.nmses.append(nmse)
    
    def display_maes(self):
        for i in range(len(self.maes)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Mean Absolute Error for {english_date} model: {self.maes[i]}")
        
    def display_mses(self):
        for i in range(len(self.mses)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Mean Squared Error for {english_date} model: {self.mses[i]}")
    
    def display_mapes(self):
        for i in range(len(self.mapes)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Mean Absolute Percentage Error for {english_date} model: {self.mapes[i]}")
    
    def display_nmses(self):
        for i in range(len(self.nmses)):
            date_obj = datetime.strptime(self.dates[i], "%Y-%m-%d")
            english_date = date_obj.strftime("%B %d, %Y")
            print(f"Normalized Mean Square Error for {english_date} model: {self.nmses[i]}")
        
    def create_metrics_df(self):
        for i in range(len(self.dates)):
            self.metrics_df.loc[len(self.metrics_df)] = [self.dates[i], self.maes[i], self.mses[i], self.mapes[i], self.nmses[i]]
    
    def create_display_df(self):
        for i in range(len(self.forecasts)):
            for index, row in self.forecasts[i].iterrows():
                reference_date = self.dates[i]
                target_end_date = index
                value = row[0]
                self.display_df.loc[len(self.display_df)] = [reference_date, target_end_date, value]
            
            

In [None]:
UpdatingProcessor = UpdatingModelLSTMProcessor(overall_df = updated_df, dates = ["2024-10-05", "2024-10-19", "2024-11-02", "2024-11-16", "2024-12-07"])

In [None]:
UpdatingProcessor.dates

In [None]:
UpdatingProcessor.dfs

In [None]:
UpdatingProcessor.create_training_dfs(value_col = "value")

In [None]:
UpdatingProcessor.dfs[2]

In [None]:
UpdatingProcessor.create_models(h = 4, model_names = ["test_1", "test_2", "test_3", "test_4", "test_5"])

In [None]:
UpdatingProcessor.forecasts[1]

In [None]:
UpdatingProcessor.create_graph()

In [None]:
UpdatingProcessor.calculate_metrics()

In [None]:
UpdatingProcessor.maes

In [None]:
UpdatingProcessor.create_display_df()

In [None]:
UpdatingProcessor.display_df

In [None]:
UpdatingProcessor.create_metrics_df()

In [None]:
UpdatingProcessor.metrics_df

In [None]:
Processor.metrics_df