In [None]:
# pip install "hopsworks[python]"

import hopsworks
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

from hopsworks.hsfs.builtin_transformations import label_encoder  # For transformations

# Connect to Hopsworks
project = hopsworks.login()
fs = project.get_feature_store()


In [None]:
# Generate base data
def generate_base_data(n_samples=1000):
    return pd.DataFrame({
        'user_id': range(n_samples),  # Common key
        'procrastination_level': np.random.randint(1, 11, n_samples),
        'coffee_cups': np.random.randint(0, 8, n_samples),
        'task_completion': np.random.randint(0, 101, n_samples)
    })

# Generate additional features
def generate_extra_data(n_samples=1000):
    return pd.DataFrame({
        'user_id': range(n_samples),  # Common key
        'last_minute_panic': np.random.randint(0, 2, n_samples),
        'zodiac_sign': np.random.choice(['Aries', 'Taurus', 'Gemini', 'Cancer'], n_samples)
    })

base_data = generate_base_data()
extra_data = generate_extra_data()


In [None]:
# Create first feature group
base_fg = fs.get_or_create_feature_group(
    name='procrastinator_base',
    version=1,
    primary_key=['user_id'],
    description='Base features for procrastination prediction',
    online_enabled=True
)
base_fg.insert(base_data)

# Create second feature group
extra_fg = fs.get_or_create_feature_group(
    name='procrastinator_extra',
    version=1,
    primary_key=['user_id'],
    description='Additional procrastination features',
    online_enabled=True
)
extra_fg.insert(extra_data)

In [None]:
# Create feature view joining both groups with transformation
query = base_fg.select_all().join(
    extra_fg.select_all(),
    on=['user_id'],
    prefix="extra_"
)

# Define transformations
transformation_functions = [
    label_encoder('extra_zodiac_sign')  # Transform categorical zodiac sign
]

fv = fs.get_or_create_feature_view(
    name='procrastinator_view',
    version=1,
    description='Combined view with transformations',
    query=query,
    labels=['task_completion'],
    transformation_functions=transformation_functions
)

In [None]:
# Get training data
X_train, X_test, y_train, y_test = fv.train_test_split(test_size=0.2)

In [None]:
# 6. Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 7. Evaluate model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

In [None]:
import os
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
from joblib import dump

input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

# 8. Save model 
model_dir = "procrastinator_model"
os.makedirs(model_dir, exist_ok=True)

dump(model, os.path.join(model_dir, 'model.joblib'))

mr = project.get_model_registry()
procrastination_model = mr.sklearn.create_model(
    name="procrastinator_model",
    metrics={"mse": mse},
    model_schema=model_schema,
    input_example=X_test.sample().to_numpy(),
    description="Predicts task completion based on procrastination"
)
procrastination_model.save(model_dir)


In [None]:
# 9. Create model serving endpoint
ms = project.get_model_serving()
deployment = procrastination_model.deploy(name="procrastinator2025")

In [None]:
deployment.start()

In [None]:
# 10 Try to predict something.
input = {
    "instances": [
        [9,                     # procrastination_level
         10,                     # coffee_cups
         0,                     # last_minute_panic
         1                      # zodiac_sign
        ]  
                ]   
}

# Make the prediction
predictions = deployment.predict(input)
prediction_value = predictions['predictions'][0]

# Output the prediction result
print(f"Predicted task completion: {prediction_value:.2f}%")
