# Inference Pipeline
Predicts delays for the next hour given delays from the last three 30 minutes

### Imports

In [3]:
from datetime import date
import hopsworks
import os
import pandas as pd
from pathlib import Path
import sys

root_dir = Path().absolute()
# Strip subdirectories if the notebook started in any
if root_dir.parts[-1:] == ('pipeline',):
    root_dir = Path(*root_dir.parts[:-1])
if root_dir.parts[-1:] == ('src',):
    root_dir = Path(*root_dir.parts[:-1])
root_dir = str(root_dir) 

os.chdir(root_dir)
print(f"Root dir: {Path.cwd()}")

Root dir: C:\Users\royli\Desktop\Courses\ID2223_Scalable_Machine_Learning_and_Deep_Learning\Project


### Connect to Hopsworks

In [None]:
# Enter the project name if the project in Hopsworks is not your main project
#project_name = None
project_name = 'metro_delay_prediction'
if project_name:
    project = hopsworks.login(project=f'{project_name}')
else:
    project = hopsworks.login()

### Retrieve Model from Model Registry

In [None]:
mr = project.get_model_registry()
retrieved_model = mr.get_model(name='model', version=1)

fv = retrieved_model.get_feature_view()
saved_dir = retrieved_model.download()

### Load Model

In [None]:
xgboost_model = XGBRegressor()
xgboost_model.load_model(f'{saved_dir}/model.json')

# Display the model parameters
xgboost_model

### Fetch Recent Delays with Feature View

In [None]:
batch_df = fv.get_batch_data()
batch_df = batch_df.sort_values_by('timestamp')
latest = batch_df.tail(3)

# Display the most recent feature view (blue, red, and green lines)
latest

### Predict the Delay of the Next 30 Minutes

In [None]:
features = ['line', 'day', 'delay_60', 'delay_30', 'delay_current']

valid = latest.dropna(subset=['delay_60', 'delay_30', 'delay_current'])
X = valid[features]
preds = xgboost_model.pred(X)