Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9"]
ml-deps:
- "torch==2.1.2+cpu torchvision==0.16.2+cpu torchdata==0.7.1 tensorflow-cpu==2.15.1 "
- "torch==2.3.1+cpu torchvision==0.18.1+cpu torchdata==0.8.0 'tensorflow-cpu<2.16'"
python-version: ["3.12"]

env:
run_coverage: ${{ github.ref == 'refs/heads/master' }}

Expand All @@ -28,14 +26,13 @@ jobs:
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}:ml-deps=[${{ matrix.ml-deps }}]
key: ${{ runner.os }} # :ml-deps=[${{ matrix.ml-deps }}]

- name: Install dependencies
run: |
pip install --upgrade pip
pip install --extra-index-url https://download.pytorch.org/whl/cpu ${{ matrix.ml-deps }}
pip install pytest-mock pytest-cov scikit-learn==1.0.2
pip install -e .[cloud]
pip install --upgrade pip setuptools wheel
pip install pytest-mock pytest-cov
pip install -e .[full]

- name: Run pre-commit hooks
run: |
Expand All @@ -45,6 +42,7 @@ jobs:
- name: Run notebook examples
run: |
pip install pytest-xdist nbmake matplotlib idx2numpy "numpy<2"
rm -rf examples/data/openml
pytest --disable-warnings --nbmake examples/{models,readers}
# Run tiledb-cloud in parallel
if [[ "${{ secrets.TILEDB_API_TOKEN }}" != "" ]]; then
Expand Down
2 changes: 1 addition & 1 deletion examples/cloud/serverless_training/pytorch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Firstly, we ingest all training images and labels in TileDB arrays and register
and finally, we serverless-ly get some predictions using the trained model. In case you want to run the example, you will need a TileDB-Cloud account as described
[here](https://docs.tiledb.com/cloud/tutorials/start-here). After signing up, you should export your username and password
as environmental variables (**TILEDB_USER_NAME**, **TILEDB_PASSWD**), in order to run ingestion, model training and prediction UDFs. Moreover,
please add your TileDB namespace and your **S3** bucket in each script.
please add your TileDB teamspace and your **S3** bucket in each script.

# Steps

Expand Down
24 changes: 16 additions & 8 deletions examples/cloud/serverless_training/pytorch/data_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,25 @@
from typing import Any

import numpy as np
import tiledb.cloud
import tiledb.client

# Your TileDB username and password, exported as environmental variables
TILEDB_USER_NAME = os.environ.get("TILEDB_USER_NAME")
TILEDB_PASSWD = os.environ.get("TILEDB_PASSWD")

# Your TileDB namespace
TILEDB_NAMESPACE = "your_tiledb_namespace"
# Your TileDB workspace/teamspace
TILEDB_WORKSPACE = "your_tiledb_WORKSPACE"
TILEDB_TEAMSPACE = "your_tiledb_TEAMSPACE"

# Your S3 bucket
S3_BUCKET = "your_s3_bucket"

IMAGES_URI = "tiledb://{}/s3://{}/mnist_images".format(TILEDB_NAMESPACE, S3_BUCKET)
LABELS_URI = "tiledb://{}/s3://{}/mnist_labels".format(TILEDB_NAMESPACE, S3_BUCKET)
IMAGES_URI = (
f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_images"
)
LABELS_URI = (
f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_labels"
)


# Let's define an ingestion function
Expand Down Expand Up @@ -62,8 +67,11 @@ def mnist_ingest(ingestion_func: Any) -> None:
ingestion_func(data=labels, batch_size=64, uri=LABELS_URI)


tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)
tiledb.client.configure(
username=TILEDB_USER_NAME, password=TILEDB_PASSWD, workspace=TILEDB_WORKSPACE
)
tiledb.client.login()

tiledb.cloud.udf.exec(mnist_ingest, ingestion_func=ingest_in_tiledb)
tiledb.client.udf.exec(mnist_ingest, ingestion_func=ingest_in_tiledb)

print(tiledb.cloud.last_udf_task().logs)
print(tiledb.client.last_udf_task().logs)
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
import os
from typing import List

import tiledb.cloud
import tiledb.client

# Your TileDB username and password, exported as environmental variables
TILEDB_USER_NAME = os.environ.get("TILEDB_USER_NAME")
TILEDB_PASSWD = os.environ.get("TILEDB_PASSWD")

# Your TileDB namespace
TILEDB_NAMESPACE = "your_tiledb_namespace"
TILEDB_WORKSPACE = "your_tiledb_WORKSPACE"
TILEDB_TEAMSPACE = "your_tiledb_TEAMSPACE"

# Your S3 bucket
S3_BUCKET = "your_s3_bucket"

IMAGES_URI = "tiledb://{}/s3://{}/mnist_images".format(TILEDB_NAMESPACE, S3_BUCKET)
LABELS_URI = "tiledb://{}/s3://{}/mnist_labels".format(TILEDB_NAMESPACE, S3_BUCKET)
MODEL_URI = "tiledb://{}/s3://{}/mnist_model".format(TILEDB_NAMESPACE, S3_BUCKET)
IMAGES_URI = (
f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_images"
)
LABELS_URI = (
f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_labels"
)
MODEL_URI = (
f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_model"
)

IO_BATCH_SIZE = 20000

Expand Down Expand Up @@ -70,8 +77,11 @@ def forward(self, x: torch.Tensor) -> Any:
return [np.argmax(pred) for pred in output.numpy()]


tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)
tiledb.client.configure(
username=TILEDB_USER_NAME, password=TILEDB_PASSWD, workspace=TILEDB_WORKSPACE
)
tiledb.client.login()

predictions = tiledb.cloud.udf.exec(predict)
predictions = tiledb.client.udf.exec(predict)

print(predictions)
41 changes: 28 additions & 13 deletions examples/cloud/serverless_training/pytorch/model_training.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import os

import tiledb.cloud
import tiledb.client

from tiledb.ml.readers.types import ArrayParams

# Your TileDB username and password, exported as environmental variables
TILEDB_USER_NAME = os.environ.get("TILEDB_USER_NAME")
TILEDB_PASSWD = os.environ.get("TILEDB_PASSWD")

# Your TileDB namespace
TILEDB_NAMESPACE = "your_tiledb_namespace"

# Your S3 bucket
S3_BUCKET = "your_s3_bucket"
TILEDB_WORKSPACE = "TileDB-Inc."
TILEDB_TEAMSPACE = "tiledb-ml-ts"

IMAGES_URI = "tiledb://{}/s3://{}/mnist_images".format(TILEDB_NAMESPACE, S3_BUCKET)
LABELS_URI = "tiledb://{}/s3://{}/mnist_labels".format(TILEDB_NAMESPACE, S3_BUCKET)
MODEL_URI = "tiledb://{}/s3://{}/mnist_model".format(TILEDB_NAMESPACE, S3_BUCKET)
IMAGES_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/mnist_images"
LABELS_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/mnist_labels"
MODEL_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/mnist_model"

# The size of each slice from a image and label TileDB arrays.
IO_BATCH_SIZE = 20000
Expand Down Expand Up @@ -49,8 +49,20 @@ def forward(self, x: torch.Tensor) -> Any:
logits = self.linear_relu_stack(x)
return logits

def do_random_noise(img: np.ndarray, mag: float = 0.1) -> np.ndarray:
noise = np.random.uniform(-1, 1, img.shape) * mag
img = img + noise
img = np.clip(img, 0, 1)
return img

with tiledb.open(IMAGES_URI) as x, tiledb.open(LABELS_URI) as y:
train_loader = PyTorchTileDBDataLoader(x, y, batch_size=IO_BATCH_SIZE)
train_loader = PyTorchTileDBDataLoader(
ArrayParams(x, fn=do_random_noise),
ArrayParams(y),
batch_size=IO_BATCH_SIZE,
num_workers=0,
shuffle_buffer_size=256,
)

net = Net(shape=(28, 28))
criterion = nn.CrossEntropyLoss()
Expand Down Expand Up @@ -95,7 +107,7 @@ def forward(self, x: torch.Tensor) -> Any:

model = PyTorchTileDBModel(
uri="mnist_model",
namespace=TILEDB_NAMESPACE,
teamspace=TILEDB_TEAMSPACE,
model=net,
optimizer=optimizer,
)
Expand All @@ -104,8 +116,11 @@ def forward(self, x: torch.Tensor) -> Any:
model.save()


tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)
# tiledb.client.configure(
# username=TILEDB_USER_NAME, password=TILEDB_PASSWD, workspace=TILEDB_WORKSPACE
# )
tiledb.client.login()

tiledb.cloud.udf.exec(train)
tiledb.client.udf.exec(train)

print(tiledb.cloud.last_udf_task().logs)
print(tiledb.client.last_udf_task().logs)
Loading
Loading