### 0. Imports and Variable Setup

In [80]:

import os
import warnings
import hopsworks
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from joblib import dump, load
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor  # For regression tasks
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score


In [81]:

# Get the API key from GitHub Secrets
HOPSWORKS_API_KEY = os.getenv('HOPSWORKS_API_KEY')

with open('hopsworks-api-key.txt', 'r') as file:
    os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()

In [82]:
# Connect to Hopsworks Project and Features Store
project = hopsworks.login(project="ScalableMLandDeepLcourse")
fs = project.get_feature_store()

2025-01-07 13:47:33,725 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-07 13:47:33,740 INFO: Initializing external client
2025-01-07 13:47:33,743 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-07 13:47:34,807 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1170582


In [83]:
# Creating a directory for the model artifacts if it doesn't exist
model_dir = "trafic_pred_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
images_dir = model_dir + "/images"
if not os.path.exists(images_dir):
    os.mkdir(images_dir)

## 4. Batch Inference

In [84]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="trafic_pred_xgboost",
    version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

Downloading model artifact (1 dirs, 2 files)... DONE

In [85]:
# Assuming the downloaded directory contains 'model.pkl'
model_file_path = os.path.join(saved_model_dir, "xgboost_model.joblib")

# Load the model
trafic_pred_xgboost = load(model_file_path)

In [86]:
today = datetime.datetime.now() - datetime.timedelta(0)
today = today.replace(minute=0, second=0, microsecond=0)
today = today - datetime.timedelta(hours = 3)
today

datetime.datetime(2025, 1, 7, 10, 0)

In [87]:
trafic_fg = fs.get_feature_group(name='stockholm_traffic',version=1,)
weather_fg = fs.get_feature_group(name="stockholm_weather_holiday", version=1)

trafic_df = trafic_fg.filter(trafic_fg.date == today).read()
weather_df = weather_fg.filter(weather_fg.date == today).read()


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.36s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.65s) 


In [88]:

# Label Encoding
label_encoder_FRC = LabelEncoder()

trafic_df["FRC"] = label_encoder_FRC.fit_transform(trafic_df["frc"])


# Supprimer l'ancienne colonne frc
trafic_df = trafic_df.drop(columns=["frc"])

In [89]:
# Label Encoding
label_encoder_coor = LabelEncoder()

trafic_df["SEG"] = label_encoder_coor.fit_transform(trafic_df["coordinates"])

In [90]:
merged_df = pd.merge(trafic_df, weather_df, on="date", how="inner")

In [91]:
merged_df['hour'] = pd.to_datetime(merged_df['date']).dt.hour

In [92]:
merged_df = merged_df.drop(columns=["date"],axis=1)

In [93]:
merged_df

Unnamed: 0,currentspeed,freeflowspeed,currenttraveltime,freeflowtraveltime,confidence,roadclosure,coordinates,relativespeed,FRC,SEG,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,holiday_status,heure
0,16,16,83,83,1.00,False,LINESTRING (18.035570345361634 59.341354559504...,1.000000,6,10,4.026,0.676,2.7,31.68,176.159103,0,11
1,10,12,136,113,0.72,False,LINESTRING (18.030286393601045 59.340611581406...,0.833333,5,1,4.026,0.676,2.7,31.68,176.159103,0,11
2,10,18,139,77,0.70,False,"LINESTRING (18.066489509788 59.34369878970523,...",0.555556,5,154,4.026,0.676,2.7,31.68,176.159103,0,11
3,17,17,29,29,1.00,False,LINESTRING (18.043447993240193 59.343878503496...,1.000000,5,42,4.026,0.676,2.7,31.68,176.159103,0,11
4,18,18,17,17,1.00,False,LINESTRING (18.060907832826672 59.345399301387...,1.000000,3,127,4.026,0.676,2.7,31.68,176.159103,0,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,13,22,153,90,0.99,False,LINESTRING (18.038586489399563 59.342495815378...,0.590909,3,24,4.026,0.676,2.7,31.68,176.159103,0,11
161,60,60,28,28,1.00,False,LINESTRING (18.056384287322032 59.351407464362...,1.000000,1,99,4.026,0.676,2.7,31.68,176.159103,0,11
162,75,75,97,97,1.00,False,LINESTRING (18.010798803998824 59.341308954632...,1.000000,0,0,4.026,0.676,2.7,31.68,176.159103,0,11
163,14,26,67,36,1.00,False,LINESTRING (18.063042871203066 59.345046598587...,0.538462,2,140,4.026,0.676,2.7,31.68,176.159103,0,11


In [94]:
merged_df['predicted_rs'] = trafic_pred_xgboost.predict(merged_df.loc[:, merged_df.columns != 'coordinates'])

In [95]:
batch_df = merged_df[['SEG','coordinates','predicted_rs','relativespeed']]

In [96]:
batch_df

Unnamed: 0,SEG,coordinates,predicted_rs,relativespeed
0,10,LINESTRING (18.035570345361634 59.341354559504...,0.923090,1.000000
1,1,LINESTRING (18.030286393601045 59.340611581406...,0.876741,0.833333
2,154,"LINESTRING (18.066489509788 59.34369878970523,...",0.710897,0.555556
3,42,LINESTRING (18.043447993240193 59.343878503496...,0.764532,1.000000
4,127,LINESTRING (18.060907832826672 59.345399301387...,0.778615,1.000000
...,...,...,...,...
160,24,LINESTRING (18.038586489399563 59.342495815378...,0.672489,0.590909
161,99,LINESTRING (18.056384287322032 59.351407464362...,0.941734,1.000000
162,0,LINESTRING (18.010798803998824 59.341308954632...,0.967534,1.000000
163,140,LINESTRING (18.063042871203066 59.345046598587...,0.629251,0.538462


In [100]:
# Get or create feature group
monitor_fg = fs.get_or_create_feature_group(
    name='rs_predictions',
    description='Trafic prediction monitoring',
    version=1,
    primary_key=['SEG'],
)

In [101]:
monitor_fg.insert(batch_df)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1170582/fs/1161285/fg/1394717


Uploading Dataframe: 100.00% |██████████| Rows 165/165 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: rs_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1170582/jobs/named/rs_predictions_1_offline_fg_materialization/executions


(Job('rs_predictions_1_offline_fg_materialization', 'SPARK'), None)