In [5]:
import hopsworks
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# 1. Connect to Hopsworks
project = hopsworks.login(api_key_value="")
fs = project.get_feature_store()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/25749
Connected. Call `.close()` to terminate connection gracefully.


In [None]:
# 2. Generate the very important data
def generate_data(n_samples=1000):
    data = pd.DataFrame({
        'procrastination_level': np.random.randint(1, 11, n_samples),    # 1-10 scale
        'coffee_cups': np.random.randint(0, 8, n_samples),               # Number of coffee cups
        'last_minute_panic': np.random.randint(0, 2, n_samples),         # 1 for True, 0 for False
        'zodiac_sign': np.random.randint(1, 13, n_samples),              # Zodiac sign as integer (1-12)
        'task_completion': np.random.randint(0, 101, n_samples)          # Task completion as integer (0-100%)
    })
    return data

data = generate_data()

In [12]:
# 3. Create feature group
fg = fs.get_or_create_feature_group(
    name='procrastinator_integer',
    version=1,
    primary_key=['procrastination_level', 'coffee_cups', 'last_minute_panic', 'zodiac_sign'],
    description='Features for predicting task completion based on procrastination'
)
fg.insert(data, wait=True)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/25749/fs/25669/fg/1184082


Uploading Dataframe: 0.00% |          | Rows 0/1000 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: procrastinator_integer_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/25749/jobs/named/procrastinator_integer_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x310616470>, None)

In [13]:
# 4. Create a Feature view (mapping of the features for the model)
fv = fs.get_or_create_feature_view(
    name='procrastinator_integer_view',
    version=1,
    description='Feature view for procrastination prediction',
    query=fg.select_all(),
    labels=['task_completion'],
)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/25749/fs/25669/fv/procrastinator_integer_view/version/1


In [14]:
# 5. Get training data
X_train, X_test, y_train, y_test = fv.train_test_split(test_size=0.2)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.05s) 



In [None]:
# 6. Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 7. Evaluate model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

In [None]:
import os
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
from joblib import dump

input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

# 8. Save model 
model_dir = "procrastinator_integers_model"
os.makedirs(model_dir, exist_ok=True)

dump(model, os.path.join(model_dir, 'model.joblib'))

mr = project.get_model_registry()
procrastination_model = mr.sklearn.create_model(
    name="procrastinator_integers_model",
    metrics={"mse": mse},
    model_schema=model_schema,
    input_example=X_test.sample().to_numpy(),
    description="Predicts task completion based on procrastination"
)
procrastination_model.save(model_dir)


In [23]:
# 9. Create model serving endpoint
ms = project.get_model_serving()
deployment = procrastination_model.deploy(name="procrastinator40001k")

Connected. Call `.close()` to terminate connection gracefully.
Deployment created, explore it at https://c.app.hopsworks.ai:443/p/25749/deployments/299009
Before making predictions, start the deployment by using `.start()`


In [24]:
deployment.start()

  0%|          | 0/5 [00:00<?, ?it/s]

Start making predictions by using `.predict()`


In [25]:
# 10 Try to predict something.
input = {
    "instances": [
        [7,                     # procrastination_level
         1,                     # coffee_cups
         0,                     # last_minute_panic
         4                      # zodiac_sign
        ]  
    ]   
}

# Make the prediction
predictions = deployment.predict(input)
prediction_value = predictions['predictions'][0]

# Output the prediction result
print(f"Predicted task completion: {prediction_value:.2f}%")


Predicted task completion: 5430.00%
