In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

import cufflinks as cf
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

cf.go_offline()

%matplotlib inline

In [2]:
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

In [3]:
from nixtla import NixtlaClient

In [4]:
nixtla_client = NixtlaClient(api_key="nixak-sVs734PNn3JEOMYhbwrfTAW5C9Y1Jqu3jDBlBW38DgOxX8tLmcZSmX4InxwXiiEFXllPOCcGmdKEYSZW")

In [5]:
nixtla_client.validate_api_key()

INFO:nixtla.nixtla_client:Happy Forecasting! :)


True

In [6]:
entire_df = pd.read_csv("flu-hospital-admissions.csv")
entire_df = entire_df[entire_df["location_name"] == "US"]
entire_df = entire_df[["date", "value"]]
entire_df["date"] = pd.to_datetime(entire_df["date"])
entire_df.set_index("date", inplace=True)
entire_df.sort_values(by = "date", inplace = True)
entire_df

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2022-02-05,1095.0
2022-02-12,1164.0
2022-02-19,1493.0
2022-02-26,1603.0
2022-03-05,1791.0
...,...
2024-11-30,4348.0
2024-12-07,6342.0
2024-12-14,9118.0
2024-12-21,14667.0


In [7]:
#5 different weeks, with the final week being 10/7/23, 10/14/23, 10/21/23, 10/28/28, 11/4/23

In [8]:
oct7df = entire_df.loc[:"2023-10-07"]
oct14df = entire_df.loc[:"2023-10-14"]
oct21df = entire_df.loc[:"2023-10-21"]
oct28df = entire_df.loc[:"2023-10-28"]
nov4df = entire_df.loc[:"2023-11-04"]

In [37]:
oct7df

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2022-02-05,1095.0
2022-02-12,1164.0
2022-02-19,1493.0
2022-02-26,1603.0
2022-03-05,1791.0
...,...
2023-09-09,804.0
2023-09-16,821.0
2023-09-23,884.0
2023-09-30,1026.0


In [9]:
#Training, forecasting, and plotting for Oct 7 df

oct7forecast = nixtla_client.forecast(df = oct7df, time_col="date", target_col="value", h=13)
oct7forecast = oct7forecast.set_index("date")

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Inferred freq: W-SAT
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Querying model metadata...
INFO:nixtla.nixtla_client:Restricting input...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


In [10]:
oct7plottingdf = pd.DataFrame()
oct7plottingdf.index = entire_df.index
oct7plottingdf = oct7plottingdf.loc[: "2024-01-06"]

oct7plottingdf["TimeGPT Forecast"] = oct7forecast["TimeGPT"]
oct7plottingdf["Training Data"] = oct7df["value"]
oct7plottingdf["Real Data"] = entire_df["2023-10-14" : "2024-01-06"]

oct7plottingdf

Unnamed: 0_level_0,TimeGPT Forecast,Training Data,Real Data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-05,,1095.0,
2022-02-12,,1164.0,
2022-02-19,,1493.0,
2022-02-26,,1603.0,
2022-03-05,,1791.0,
...,...,...,...
2023-12-09,21020.453,,7510.0
2023-12-16,19229.861,,10289.0
2023-12-23,16906.191,,15727.0
2023-12-30,15053.542,,21685.0


In [11]:
oct7plottingdf.iplot(xTitle = "Date", yTitle = "Count", title = "TimeGPT Prediction Given Oct. 7 As Final Point")


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [12]:
#Metrics
print("MAE:",mean_absolute_error(entire_df["2023-10-14" : "2024-01-06"], oct7forecast["TimeGPT"]))
print("MAPE:",mean_absolute_percentage_error(entire_df["2023-10-14" : "2024-01-06"], oct7forecast["TimeGPT"]))
print("MSE:",mean_squared_error(entire_df["2023-10-14" : "2024-01-06"], oct7forecast["TimeGPT"]))

MAE: 6067.5930307692315
MAPE: 1.2415618369086314
MSE: 51881445.34835239


In [13]:
#Training, forecasting, and plotting for Oct 14 df

oct14forecast = nixtla_client.forecast(df = oct14df, time_col="date", target_col="value", h=12)
oct14forecast = oct14forecast.set_index("date")

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Inferred freq: W-SAT
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Restricting input...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


In [14]:
oct14plottingdf = pd.DataFrame()
oct14plottingdf.index = entire_df.index
oct14plottingdf = oct14plottingdf.loc[: "2024-01-06"]

oct14plottingdf["TimeGPT Forecast"] = oct14forecast["TimeGPT"]
oct14plottingdf["Training Data"] = oct14df["value"]
oct14plottingdf["Real Data"] = entire_df["2023-10-21" : "2024-01-06"]

oct14plottingdf

Unnamed: 0_level_0,TimeGPT Forecast,Training Data,Real Data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-05,,1095.0,
2022-02-12,,1164.0,
2022-02-19,,1493.0,
2022-02-26,,1603.0,
2022-03-05,,1791.0,
...,...,...,...
2023-12-09,19843.283,,7510.0
2023-12-16,20061.887,,10289.0
2023-12-23,18484.574,,15727.0
2023-12-30,16346.734,,21685.0


In [15]:
oct14plottingdf.iplot(xTitle = "Date", yTitle = "Count", title = "TimeGPT Prediction Given Oct. 14 As Final Point")


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [16]:
#Metrics
print("MAE:",mean_absolute_error(entire_df["2023-10-21" : "2024-01-06"], oct14forecast["TimeGPT"]))
print("MAPE:",mean_absolute_percentage_error(entire_df["2023-10-21" : "2024-01-06"], oct14forecast["TimeGPT"]))
print("MSE:",mean_squared_error(entire_df["2023-10-21" : "2024-01-06"], oct14forecast["TimeGPT"]))

MAE: 6581.176041666667
MAPE: 1.292987794055154
MSE: 56925383.300325714


In [17]:
#Training, forecasting, and plotting for Oct 21 df

oct21forecast = nixtla_client.forecast(df = oct21df, time_col="date", target_col="value", h=11)
oct21forecast = oct21forecast.set_index("date")

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Inferred freq: W-SAT
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Restricting input...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


In [18]:
oct21plottingdf = pd.DataFrame()
oct21plottingdf.index = entire_df.index
oct21plottingdf = oct21plottingdf.loc[: "2024-01-06"]

oct21plottingdf["TimeGPT Forecast"] = oct21forecast["TimeGPT"]
oct21plottingdf["Training Data"] = oct21df["value"]
oct21plottingdf["Real Data"] = entire_df["2023-10-28" : "2024-01-06"]

oct21plottingdf

Unnamed: 0_level_0,TimeGPT Forecast,Training Data,Real Data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-05,,1095.0,
2022-02-12,,1164.0,
2022-02-19,,1493.0,
2022-02-26,,1603.0,
2022-03-05,,1791.0,
...,...,...,...
2023-12-09,18295.277,,7510.0
2023-12-16,17735.814,,10289.0
2023-12-23,17067.623,,15727.0
2023-12-30,15743.491,,21685.0


In [19]:
oct21plottingdf.iplot(xTitle = "Date", yTitle = "Count", title = "TimeGPT Prediction Given Oct. 21 As Final Point")


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [20]:
#Metrics
print("MAE:",mean_absolute_error(entire_df["2023-10-28" : "2024-01-06"], oct21forecast["TimeGPT"]))
print("MAPE:",mean_absolute_percentage_error(entire_df["2023-10-28" : "2024-01-06"], oct21forecast["TimeGPT"]))
print("MSE:",mean_squared_error(entire_df["2023-10-28" : "2024-01-06"], oct21forecast["TimeGPT"]))

MAE: 6056.500318181817
MAPE: 1.0757765185400536
MSE: 48114943.66000927


In [41]:
oct28df

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2022-02-05,1095.0
2022-02-12,1164.0
2022-02-19,1493.0
2022-02-26,1603.0
2022-03-05,1791.0
...,...
2023-09-30,1026.0
2023-10-07,1110.0
2023-10-14,1215.0
2023-10-21,1470.0


In [21]:
#Training, forecasting, and plotting for Oct 28 df

oct28forecast = nixtla_client.forecast(df = oct28df, time_col="date", target_col="value", h=10)
oct28forecast = oct28forecast.set_index("date")

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Inferred freq: W-SAT
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Restricting input...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


In [22]:
oct28plottingdf = pd.DataFrame()
oct28plottingdf.index = entire_df.index
oct28plottingdf = oct28plottingdf.loc[: "2024-01-06"]

oct28plottingdf["TimeGPT Forecast"] = oct28forecast["TimeGPT"]
oct28plottingdf["Training Data"] = oct28df["value"]
oct28plottingdf["Real Data"] = entire_df["2023-11-04" : "2024-01-06"]

oct28plottingdf

Unnamed: 0_level_0,TimeGPT Forecast,Training Data,Real Data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-05,,1095.0,
2022-02-12,,1164.0,
2022-02-19,,1493.0,
2022-02-26,,1603.0,
2022-03-05,,1791.0,
...,...,...,...
2023-12-09,13883.653,,7510.0
2023-12-16,12983.673,,10289.0
2023-12-23,12255.561,,15727.0
2023-12-30,12273.593,,21685.0


In [23]:
oct28plottingdf.iplot(xTitle = "Date", yTitle = "Count", title = "TimeGPT Prediction Given Oct. 28 As Final Point")


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [24]:
#Metrics
print("MAE:",mean_absolute_error(entire_df["2023-11-04" : "2024-01-06"], oct28forecast["TimeGPT"]))
print("MAPE:",mean_absolute_percentage_error(entire_df["2023-11-04" : "2024-01-06"], oct28forecast["TimeGPT"]))
print("MSE:",mean_squared_error(entire_df["2023-11-04" : "2024-01-06"], oct28forecast["TimeGPT"]))

MAE: 4947.28966
MAPE: 0.6654331163499977
MSE: 32616235.399480253


In [25]:
#Training, forecasting, and plotting for Nov 4 df

nov4forecast = nixtla_client.forecast(df = nov4df, time_col="date", target_col="value", h=9)
nov4forecast = nov4forecast.set_index("date")

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Inferred freq: W-SAT
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Restricting input...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


In [26]:
nov4plottingdf = pd.DataFrame()
nov4plottingdf.index = entire_df.index
nov4plottingdf = nov4plottingdf.loc[: "2024-01-06"]

nov4plottingdf["TimeGPT Forecast"] = nov4forecast["TimeGPT"]
nov4plottingdf["Training Data"] = nov4df["value"]
nov4plottingdf["Real Data"] = entire_df["2023-11-11" : "2024-01-06"]

nov4plottingdf

Unnamed: 0_level_0,TimeGPT Forecast,Training Data,Real Data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-05,,1095.0,
2022-02-12,,1164.0,
2022-02-19,,1493.0,
2022-02-26,,1603.0,
2022-03-05,,1791.0,
...,...,...,...
2023-12-09,10901.648,,7510.0
2023-12-16,10511.217,,10289.0
2023-12-23,9889.516,,15727.0
2023-12-30,8929.090,,21685.0


In [27]:
nov4plottingdf.iplot(xTitle = "Date", yTitle = "Count", title = "TimeGPT Prediction Given Nov. 4 As Final Point")


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [28]:
#Metrics
print("MAE:",mean_absolute_error(entire_df["2023-11-11" : "2024-01-06"], nov4forecast["TimeGPT"]))
print("MAPE:",mean_absolute_percentage_error(entire_df["2023-11-11" : "2024-01-06"], nov4forecast["TimeGPT"]))
print("MSE:",mean_squared_error(entire_df["2023-11-11" : "2024-01-06"], nov4forecast["TimeGPT"]))

MAE: 4845.935
MAPE: 0.4748514606259648
MSE: 41163965.956501216


In [29]:
#Visually, the graphs look worse but MAE, MAPE, MSE go down as training window expands. Could this potentially be because of fewer data points to account for in the MSE, MAPE, and MSE in the newer windows?