# Probabilistic Forecasting with `sktime`: Distribution Forecast

## Notebook Goal: 
* Provide an example workflow of a distribution forecast with BGL data using sktime.
* Demonstrate how to use Optuna for a single forecasting type.

In [1]:
%load_ext autoreload
%autoreload 2

## Data Load

In [10]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.path.dirname("__file__"), "..", "..")))


In [13]:
from src.data.data_loader import load_data

# Specify the columns to keep
keep_columns = [
    "id",
    "p_num",
    "time",
    "bg-0:00",
    "insulin-0:00",
    "carbs-0:00",
    "hr-0:00",
    "steps-0:00",
    "cals-0:00",
    "activity-0:00",
]

# Load the data with specified columns
df = load_data(
    data_source_name="kaggle_brisT1D", dataset_type="train", keep_columns=keep_columns
)

# Print the first 5 rows of the data
print(df.head())

      id p_num      time  bg-0:00  insulin-0:00  carbs-0:00  hr-0:00  \
0  p01_0   p01  06:10:00     15.1        0.0417         NaN      NaN   
1  p01_1   p01  06:25:00     14.4        0.0417         NaN      NaN   
2  p01_2   p01  06:40:00     13.9        0.0417         NaN      NaN   
3  p01_3   p01  06:55:00     13.8        0.0417         NaN      NaN   
4  p01_4   p01  07:10:00     13.4        0.0417         NaN      NaN   

   steps-0:00  cals-0:00 activity-0:00  
0         NaN        NaN           NaN  
1         NaN        NaN           NaN  
2         NaN        NaN           NaN  
3         NaN        NaN           NaN  
4         NaN        NaN           NaN  


  return pd.read_csv(file_path, usecols=keep_columns)


In [15]:
from sktime.forecasting.arima import ARIMA

# step 1: data specification
y = df["bg-0:00"]
# step 2: specifying forecasting horizon
fh = [1, 2, 3]
# step 3: specifying the forecasting algorithm
forecaster = ARIMA()
# step 4: fitting the forecaster
forecaster.fit(y, fh=[1, 2, 3])
# step 5: querying predictions
y_pred = forecaster.predict()

# for probabilistic forecasting:
#   call a probabilistic forecasting method after or instead of step 5
y_pred_int = forecaster.predict_interval(coverage=0.95)
y_pred_int

Unnamed: 0_level_0,bg-0:00,bg-0:00
Unnamed: 0_level_1,0.95,0.95
Unnamed: 0_level_2,lower,upper
177024,10.633607,12.483572
177025,10.217717,12.817674
177026,9.895042,13.059583


In [None]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)