### 0. Imports and Variable Setup

In [25]:

import os
import warnings
import hopsworks
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from joblib import dump, load
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor  # For regression tasks
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score


In [26]:

# Get the API key from GitHub Secrets
HOPSWORKS_API_KEY = os.getenv('HOPSWORKS_API_KEY')

#with open('hopsworks-api-key.txt', 'r') as file:
#    os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()

In [27]:
# Connect to Hopsworks Project and Features Store
project = hopsworks.login(project="ScalableMLandDeepLcourse")
fs = project.get_feature_store()

2025-01-03 18:47:01,510 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-03 18:47:01,518 INFO: Initializing external client
2025-01-03 18:47:01,520 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-03 18:47:02,629 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1170582


In [28]:
# Creating a directory for the model artifacts if it doesn't exist
model_dir = "trafic_pred_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
images_dir = model_dir + "/images"
if not os.path.exists(images_dir):
    os.mkdir(images_dir)

## 4. Batch Inference

In [29]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="trafic_pred_xgboost",
    version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

Downloading model artifact (1 dirs, 2 files)... DONE

In [31]:
# Assuming the downloaded directory contains 'model.pkl'
model_file_path = os.path.join(saved_model_dir, "xgboost_model.joblib")

# Load the model
trafic_pred_xgboost = load(model_file_path)

In [32]:
today = datetime.datetime.now() - datetime.timedelta(0)
today = today.replace(minute=0, second=0, microsecond=0)
today = today - datetime.timedelta(hours = 3)
today

datetime.datetime(2025, 1, 3, 15, 0)

In [33]:
trafic_fg = fs.get_feature_group(name='stockholm_traffic',version=1,)
weather_fg = fs.get_feature_group(name="stockholm_weather_holiday", version=1)

trafic_df = trafic_fg.filter(trafic_fg.date == today).read()
weather_df = weather_fg.filter(weather_fg.date == today).read()


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.20s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.52s) 


In [34]:

# Label Encoding
label_encoder_FRC = LabelEncoder()

trafic_df["FRC"] = label_encoder_FRC.fit_transform(trafic_df["frc"])


# Supprimer l'ancienne colonne frc
trafic_df = trafic_df.drop(columns=["frc"])

In [35]:
# Label Encoding
label_encoder_coor = LabelEncoder()

trafic_df["SEG"] = label_encoder_coor.fit_transform(trafic_df["coordinates"])

In [36]:
merged_df = pd.merge(trafic_df, weather_df, on="date", how="inner")

In [37]:
merged_df['heure'] = pd.to_datetime(merged_df['date']).dt.hour

In [38]:
merged_df = merged_df.drop(columns=["date"],axis=1)

In [39]:
merged_df

Unnamed: 0,currentspeed,freeflowspeed,currenttraveltime,freeflowtraveltime,confidence,roadclosure,coordinates,relativespeed,FRC,SEG,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,holiday_status,heure
0,72,72,31,31,1.00,False,LINESTRING (18.03548585577761 59.3491852516705...,1.000000,0,9,-4.724,-9.874,0.0,20.16,299.500122,0,16
1,12,12,86,86,1.00,False,LINESTRING (18.05543076201704 59.3469348599300...,1.000000,6,89,-4.724,-9.874,0.0,20.16,299.500122,0,16
2,22,27,88,72,1.00,False,LINESTRING (18.045435510120683 59.336678095802...,0.814815,3,48,-4.724,-9.874,0.0,20.16,299.500122,0,16
3,25,37,46,31,1.00,False,LINESTRING (18.0666464190154 59.34681285368509...,0.675676,1,157,-4.724,-9.874,0.0,20.16,299.500122,0,16
4,17,24,43,31,0.96,False,LINESTRING (18.06021984621421 59.3460940121944...,0.708333,5,123,-4.724,-9.874,0.0,20.16,299.500122,0,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,23,23,23,23,1.00,False,LINESTRING (18.057828656876865 59.344644274453...,1.000000,3,106,-4.724,-9.874,0.0,20.16,299.500122,0,16
162,21,34,25,15,1.00,False,LINESTRING (18.037363402088488 59.340838199356...,0.617647,2,17,-4.724,-9.874,0.0,20.16,299.500122,0,16
163,23,23,30,30,1.00,False,LINESTRING (18.055611811125573 59.338421532941...,1.000000,5,93,-4.724,-9.874,0.0,20.16,299.500122,0,16
164,15,19,273,216,1.00,False,LINESTRING (18.077163360565237 59.332712475417...,0.789474,2,165,-4.724,-9.874,0.0,20.16,299.500122,0,16


In [40]:
merged_df['predicted_rs'] = trafic_pred_xgboost.predict(merged_df.loc[:, merged_df.columns != 'coordinates'])

In [41]:
batch_df = merged_df[['SEG','coordinates','predicted_rs']]

In [43]:
batch_df

Unnamed: 0,SEG,coordinates,predicted_rs
0,9,LINESTRING (18.03548585577761 59.3491852516705...,0.922732
1,89,LINESTRING (18.05543076201704 59.3469348599300...,0.844444
2,48,LINESTRING (18.045435510120683 59.336678095802...,0.784221
3,157,LINESTRING (18.0666464190154 59.34681285368509...,0.685033
4,123,LINESTRING (18.06021984621421 59.3460940121944...,0.610479
...,...,...,...
161,106,LINESTRING (18.057828656876865 59.344644274453...,0.742221
162,17,LINESTRING (18.037363402088488 59.340838199356...,0.601197
163,93,LINESTRING (18.055611811125573 59.338421532941...,0.719355
164,165,LINESTRING (18.077163360565237 59.332712475417...,0.790020


In [44]:
# Get or create feature group
monitor_fg = fs.get_or_create_feature_group(
    name='rs_predictions',
    description='Trafic prediction monitoring',
    version=1,
    primary_key=['SEG'],
)

In [45]:
monitor_fg.insert(batch_df)


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1170582/fs/1161285/fg/1394512


Uploading Dataframe: 100.00% |██████████| Rows 166/166 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: rs_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1170582/jobs/named/rs_predictions_1_offline_fg_materialization/executions


(Job('rs_predictions_1_offline_fg_materialization', 'SPARK'), None)