### 0. Imports and Variable Setup

In [1]:

import os
import warnings
import hopsworks
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from joblib import dump, load
from xgboost import XGBRegressor , plot_importance
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor  # For regression tasks
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score


In [2]:

# # Get the API key from GitHub Secrets
# HOPSWORKS_API_KEY = os.getenv('HOPSWORKS_API_KEY')

with open('hopsworks-api-key.txt', 'r') as file:
    os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()

In [3]:
# Connect to Hopsworks Project and Features Store
project = hopsworks.login(project="ScalableMLandDeepLcourse")
fs = project.get_feature_store()

2024-12-25 13:26:33,593 INFO: Initializing external client
2024-12-25 13:26:33,594 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-25 13:26:35,006 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1170582


In [4]:
# Creating a directory for the model artifacts if it doesn't exist
model_dir = "trafic_pred_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
images_dir = model_dir + "/images"
if not os.path.exists(images_dir):
    os.mkdir(images_dir)

## 4. Batch Inference

In [5]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="trafic_pred_tree",
    version=4,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

Downloading model artifact (0 dirs, 1 files)... DONE

In [6]:
# Assuming the downloaded directory contains 'model.pkl'
model_file_path = os.path.join(saved_model_dir, "decision_tree_model.joblib")

# Load the model
trafic_pred_tree = load(model_file_path)

In [7]:
today = datetime.datetime.now() - datetime.timedelta(0)
today = today.replace(minute=0, second=0, microsecond=0)
today = today - datetime.timedelta(hours = 3)
today

datetime.datetime(2024, 12, 25, 10, 0)

In [8]:
trafic_fg = fs.get_feature_group(name='stockholm_traffic',version=1,)
weather_fg = fs.get_feature_group(name="stockholm_weather_holiday", version=1)

trafic_df = trafic_fg.filter(trafic_fg.date == today).read()
weather_df = weather_fg.filter(weather_fg.date == today).read()


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.21s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.58s) 


In [9]:

# Label Encoding
label_encoder_FRC = LabelEncoder()

trafic_df["FRC"] = label_encoder_FRC.fit_transform(trafic_df["frc"])

# Supprimer l'ancienne colonne frc
trafic_df = trafic_df.drop(columns=["frc"])


In [10]:
# Label Encoding
label_encoder_coor = LabelEncoder()

trafic_df["SEG"] = label_encoder_coor.fit_transform(trafic_df["coordinates"])

# Supprimer l'ancienne colonne coordinates
trafic_df = trafic_df.drop(columns=["coordinates"])

In [11]:
merged_df = pd.merge(trafic_df, weather_df, on="date", how="inner")

In [12]:
merged_df['heure'] = pd.to_datetime(merged_df['date']).dt.hour

In [13]:
merged_df = merged_df.drop(columns=["date"],axis=1)

In [16]:
merged_df

Unnamed: 0,currentspeed,freeflowspeed,currenttraveltime,freeflowtraveltime,confidence,roadclosure,relativespeed,FRC,SEG,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,holiday_status,heure,predicted_rs
0,72,72,31,31,1.00,False,1.000000,0,9,8.276,3.026,0.1,19.799999,240.155823,3,11,1.000000
1,12,12,86,86,1.00,False,1.000000,6,89,8.276,3.026,0.1,19.799999,240.155823,3,11,1.000000
2,28,28,69,69,1.00,False,1.000000,3,48,8.276,3.026,0.1,19.799999,240.155823,3,11,1.000000
3,38,38,30,30,1.00,False,1.000000,1,157,8.276,3.026,0.1,19.799999,240.155823,3,11,0.783784
4,17,24,43,31,0.91,False,0.708333,5,123,8.276,3.026,0.1,19.799999,240.155823,3,11,0.541667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,73,73,34,34,1.00,False,1.000000,0,6,8.276,3.026,0.1,19.799999,240.155823,3,11,1.000000
162,26,26,93,93,1.00,False,1.000000,3,49,8.276,3.026,0.1,19.799999,240.155823,3,11,0.750000
163,23,31,74,55,1.00,False,0.741935,1,131,8.276,3.026,0.1,19.799999,240.155823,3,11,0.656250
164,10,15,35,23,0.89,False,0.666667,4,121,8.276,3.026,0.1,19.799999,240.155823,3,11,0.666667


In [15]:
merged_df['predicted_rs'] = trafic_pred_tree.predict(merged_df)

In [20]:
batch_df = merged_df[['SEG', 'predicted_rs']]

In [22]:
# Get or create feature group
monitor_fg = fs.get_or_create_feature_group(
    name='rs_predictions',
    description='Trafic prediction monitoring',
    version=1,
    primary_key=['SEG'],
)

In [23]:
monitor_fg.insert(batch_df)


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1170582/fs/1161285/fg/1394210


Uploading Dataframe: 100.00% |██████████| Rows 166/166 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: rs_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1170582/jobs/named/rs_predictions_1_offline_fg_materialization/executions


(Job('rs_predictions_1_offline_fg_materialization', 'SPARK'), None)