In [None]:

import hopsworks
import pandas as pd

# Login to Hopsworks
project = hopsworks.login()

# Connect to model registry
mr = project.get_model_registry()
dataset_api = project.get_dataset_api()

# Load the best version of the ranking model
ranking_model = mr.get_best_model(
    name="ranking_model",
    metric="fscore",
    direction="max"
)
ranking_model


In [None]:

%%writefile event_ranking_transformer.py

import os
import pandas as pd
import hopsworks

class Transformer(object):

    def __init__(self):
        # Connect to Hopsworks
        project = hopsworks.connection().get_project()
        self.fs = project.get_feature_store()

        # Feature views
        self.events_fv = self.fs.get_feature_view(name="events", version=1)
        self.users_fv = self.fs.get_feature_view(name="users", version=1)
        self.candidate_index = self.fs.get_feature_view(name="candidate_embeddings", version=1)

        # Load model input schema
        mr = project.get_model_registry()
        model = mr.get_model(name="ranking_model", version=1)
        input_schema = model.model_schema["input_schema"]["columnar_schema"]
        self.ranking_model_feature_names = [feat["name"] for feat in input_schema]

    def preprocess(self, inputs):
        inputs = inputs["instances"][0]
        user_id = inputs["user_id"]
        weather_filter = inputs.get("weather", True)

        # Retrieve top-k candidates
        neighbors = self.candidate_index.find_neighbors(inputs["query_emb"], k=100)
        neighbors = [n[0] for n in neighbors]

        # Get already seen events
        seen_events = self.fs.sql(
            f"SELECT event_id FROM interactions_1 WHERE user_id = '{user_id}'",
            online=True
        ).values.reshape(-1).tolist()

        candidate_ids = [eid for eid in neighbors if eid not in seen_events]
        candidate_df = pd.DataFrame({"event_id": candidate_ids})

        # Fetch event details
        event_data = [
            self.events_fv.get_feature_vector({"event_id": eid})
            for eid in candidate_ids
        ]
        events_df = pd.DataFrame(event_data)

        # Filter for future events only
        events_df["start_time"] = pd.to_datetime(events_df["start_time"])
        current_time = pd.Timestamp.now()
        events_df = events_df[events_df["start_time"] > current_time]

        # Weather filter
        if weather_filter:
            events_df = events_df[
                (events_df["event_indoor_capability"] & events_df["weather_condition"].isin(["Rain", "Cloudy"])) |
                (~events_df["event_indoor_capability"] & events_df["weather_condition"].isin(["Clear", "Windy"]))
            ]

        # Merge with event_id list
        merged_df = candidate_df.merge(events_df, on="event_id", how="inner")

        # Add user features
        user_feats = self.users_fv.get_feature_vector({"user_id": user_id}, return_type="pandas")
        for col in ["user_id", "user_city", "age", "user_weather_preference"]:
            merged_df[col] = user_feats[col].values[0]

        # Select features for ranking model
        model_inputs = merged_df[self.ranking_model_feature_names]

        return {
            "inputs": [{
                "ranking_features": model_inputs.values.tolist(),
                "event_ids": merged_df["event_id"].tolist()
            }]
        }

    def postprocess(self, outputs):
        preds = outputs["predictions"]
        ranking = list(zip(preds["scores"], preds["event_ids"]))
        ranking.sort(reverse=True)
        return {"ranking": ranking}
