## Data Owner Creates Dataset

In [None]:
import requests
import zipfile
import io
from pathlib import Path

from syft_rds.client.rds_client import init_session

In [None]:
HOST_EMAIL = "khoa@openmined.org"
CONFIG = "~/.syftbox/config_stage.json"
rds_client = init_session(HOST_EMAIL, syftbox_client_config_path=CONFIG)

print(f"host: {rds_client.host}")
print(f"me: {rds_client.email}")
print(f"is admin?: {rds_client.is_admin}")
print(f"server = {rds_client._syftbox_client.config.server_url}")
assert rds_client.is_admin

In [None]:
rds_client.datasets

In [None]:
if not Path("./data").exists():
    print("--- downloading some data into './data'")
    url = "https://raw.githubusercontent.com/khoaguin/datasets/main/census.zip"
    r = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall("./data/")

    url = "https://raw.githubusercontent.com/khoaguin/datasets/main/med_MNIST.zip"
    r = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall("./data/")

### First Dataset

In [None]:
data = rds_client.dataset.create(
    name="Census",  # MUST BE UNIQUE. Throw Exception if already exist.
    path="./data/census/private",  # MUST EXIST
    mock_path="./data/census/mock",
    summary="Sample census dataset, 10 rows for mock and 10 rows for private",
    description_path="./data/census/README.md",
)
data

In [None]:
census_data = rds_client.dataset.get(name="Census")
census_data

In [None]:
census_data.get_mock_path()

In [None]:
census_data.describe()

In [None]:
import pandas as pd

pd.read_csv(census_data.get_mock_path() / "data.csv")

In [None]:
census_data.get_readme_path()

In [None]:
census_data.get_description()

In [None]:
census_data.get_private_path()

### Second Dataset

In [None]:
data2 = rds_client.dataset.create(
    name="MedMNIST",  # MUST BE UNIQUE. Throw Exception if already exist.
    path="./data/med_MNIST/private",  # MUST EXIST
    mock_path="./data/med_MNIST/mock",
    summary="Sample Medical MNIST dataset, 5 images for each class",
    description_path="./data/med_MNIST/README.md",
)
data2

In [None]:
medMnist_data = rds_client.dataset.get(name="MedMNIST")
medMnist_data

In [None]:
medMnist_data.get_mock_path()

In [None]:
medMnist_data.describe()

In [None]:
!uv pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# show the first image
test_dir = medMnist_data.get_mock_path() / "test"
first_subdir = next(d for d in test_dir.iterdir() if d.is_dir())
first_jpeg = next(first_subdir.glob("*.jpeg"))
img = np.asarray(Image.open(first_jpeg))
plt.imshow(img)

In [None]:
medMnist_data.get_description()

In [None]:
medMnist_data.get_private_path()

### Delete Dataset

In [None]:
rds_client.dataset.delete(name="Census")

In [None]:
rds_client.dataset.delete(name="MedMNIST")