Skip to content

Commit fa3eb50

Browse files
committed
feat: upload a batch of inferences
1 parent eff6bf0 commit fa3eb50

File tree

5 files changed

+125
-12
lines changed

5 files changed

+125
-12
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import os
2+
3+
import pandas as pd
4+
from openlayer import Openlayer
5+
from openlayer.lib import data
6+
from openlayer.types.inference_pipelines import data_stream_params
7+
8+
os.environ["OPENLAYER_API_KEY"] = "YOUR_API_KEY"
9+
pipeline_id = "YOUR_INFERENCE_PIPELINE_ID"
10+
11+
df = pd.DataFrame(
12+
{
13+
"CreditScore": [600],
14+
"Geography": ["France"],
15+
"Gender": ["Male"],
16+
"Age": [40],
17+
"Tenure": [5],
18+
"Balance": [100000],
19+
"NumOfProducts": [1],
20+
"HasCrCard": [1],
21+
"IsActiveMember": [1],
22+
"EstimatedSalary": [50000],
23+
"AggregateRate": [0.5],
24+
"Year": [2020],
25+
"Prediction": [0],
26+
}
27+
)
28+
29+
config = data_stream_params.ConfigTabularClassificationData(
30+
categorical_feature_names=["Gender", "Geography"],
31+
class_names=["Retained", "Exited"],
32+
feature_names=[
33+
"CreditScore",
34+
"Geography",
35+
"Gender",
36+
"Age",
37+
"Tenure",
38+
"Balance",
39+
"NumOfProducts",
40+
"HasCrCard",
41+
"IsActiveMember",
42+
"EstimatedSalary",
43+
"AggregateRate",
44+
"Year",
45+
],
46+
predictions_column_name="Prediction",
47+
)
48+
49+
data.upload_batch_inferences(
50+
client=Openlayer(),
51+
inference_pipeline_id=pipeline_id,
52+
dataset_df=df,
53+
config=config,
54+
)

examples/monitoring/upload_reference_dataset.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@
5151
inference_pipeline_id=pipeline_id,
5252
dataset_df=df,
5353
config=config,
54-
storage_type=data.StorageType.FS,
5554
)

src/openlayer/lib/data/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Data upload functions."""
22

3-
__all__ = ["upload_reference_dataframe", "StorageType"]
3+
__all__ = ["StorageType", "upload_reference_dataframe", "upload_batch_inferences"]
44

55
from ._upload import StorageType
66
from .reference_dataset import upload_reference_dataframe
7+
from .batch_inferences import upload_batch_inferences
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Upload a batch of inferences to the Openlayer platform."""
2+
3+
import os
4+
import tarfile
5+
import tempfile
6+
import time
7+
from typing import Optional
8+
import httpx
9+
10+
import pandas as pd
11+
12+
from ... import Openlayer
13+
from ..._utils import maybe_transform
14+
from ...types.inference_pipelines import data_stream_params
15+
from .. import utils
16+
from . import StorageType, _upload
17+
18+
19+
def upload_batch_inferences(
20+
client: Openlayer,
21+
inference_pipeline_id: str,
22+
dataset_df: pd.DataFrame,
23+
config: data_stream_params.Config,
24+
storage_type: Optional[StorageType] = None,
25+
) -> None:
26+
"""Uploads a batch of inferences to the Openlayer platform."""
27+
uploader = _upload.Uploader(client, storage_type)
28+
object_name = f"batch_data_{time.time()}_{inference_pipeline_id}.tar.gz"
29+
30+
# Fetch presigned url
31+
presigned_url_response = client.storage.presigned_url.create(
32+
object_name=object_name,
33+
)
34+
35+
# Write dataset and config to temp directory
36+
with tempfile.TemporaryDirectory() as tmp_dir:
37+
temp_file_path = f"{tmp_dir}/dataset.csv"
38+
dataset_df.to_csv(temp_file_path, index=False)
39+
40+
# Copy relevant files to tmp dir
41+
config["label"] = "production"
42+
utils.write_yaml(
43+
maybe_transform(config, data_stream_params.Config),
44+
f"{tmp_dir}/dataset_config.yaml",
45+
)
46+
47+
tar_file_path = os.path.join(tmp_dir, object_name)
48+
with tarfile.open(tar_file_path, mode="w:gz") as tar:
49+
tar.add(tmp_dir, arcname=os.path.basename("monitoring_data"))
50+
51+
# Upload to storage
52+
uploader.upload(
53+
file_path=tar_file_path,
54+
object_name=object_name,
55+
presigned_url_response=presigned_url_response,
56+
)
57+
58+
# Notify the backend
59+
client.post(
60+
f"/inference-pipelines/{inference_pipeline_id}/data",
61+
cast_to=httpx.Response,
62+
body={
63+
"storageUri": presigned_url_response.storage_uri,
64+
"performDataMerge": False,
65+
},
66+
)

src/openlayer/lib/data/reference_dataset.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Upload reference datasets to the Openlayer platform."""
22

33
import os
4-
import shutil
54
import tarfile
65
import tempfile
76
import time
@@ -23,9 +22,7 @@ def upload_reference_dataframe(
2322
config: data_stream_params.Config,
2423
storage_type: Optional[StorageType] = None,
2524
) -> None:
26-
"""Upload a reference dataset to the Openlayer platform and update the
27-
inference pipeline with the new reference dataset.
28-
"""
25+
"""Uploads a reference dataset to the Openlayer platform."""
2926
uploader = _upload.Uploader(client, storage_type)
3027
object_name = f"reference_dataset_{time.time()}_{inference_pipeline_id}.tar.gz"
3128

@@ -40,14 +37,11 @@ def upload_reference_dataframe(
4037
dataset_df.to_csv(temp_file_path, index=False)
4138

4239
# Copy relevant files to tmp dir
43-
folder_path = os.path.join(tmp_dir, "reference")
44-
os.mkdir(folder_path)
4540
config["label"] = "reference"
4641
utils.write_yaml(
4742
maybe_transform(config, data_stream_params.Config),
48-
f"{folder_path}/dataset_config.yaml",
43+
f"{tmp_dir}/dataset_config.yaml",
4944
)
50-
shutil.copy(temp_file_path, folder_path)
5145

5246
tar_file_path = os.path.join(tmp_dir, object_name)
5347
with tarfile.open(tar_file_path, mode="w:gz") as tar:
@@ -61,8 +55,7 @@ def upload_reference_dataframe(
6155
)
6256

6357
# Notify the backend
64-
response = client.inference_pipelines.update(
58+
client.inference_pipelines.update(
6559
inference_pipeline_id=inference_pipeline_id,
6660
reference_dataset_uri=presigned_url_response.storage_uri,
6761
)
68-
print(response)

0 commit comments

Comments
 (0)