## Data Owner Creates Dataset

In [None]:
from pathlib import Path
import requests
import zipfile
import io

from syft_rds.client.rds_client import init_session
from syft_rds.orchestra import setup_rds_server, remove_rds_stack_dir

remove_rds_stack_dir(key="rds", root_dir=Path("."))

In [None]:
do_stack = setup_rds_server(email="do@openmined.org", key="rds", root_dir=Path("."))
do = init_session(host="do@openmined.org", syftbox_client=do_stack.client)

print(f"host: {do.host}")
print(f"me: {do.email}")
print(f"is admin?: {do.is_admin}")
print(f"server = {do._syftbox_client.config.server_url}")
assert do.is_admin

In [None]:
do.dataset.get_all()

In [None]:
if not Path("./data").exists():
    print("--- downloading some data into './data'")
    url = "https://raw.githubusercontent.com/khoaguin/datasets/main/census.zip"
    r = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall("./data/")

    url = "https://raw.githubusercontent.com/khoaguin/datasets/main/med_MNIST.zip"
    r = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall("./data/")

### First Dataset

In [None]:
data = do.dataset.create(
    name="Census",  # MUST BE UNIQUE. Throw Exception if already exist.
    path="./data/census/private",  # MUST EXIST
    mock_path="./data/census/mock",
    summary="Sample census dataset, 10 rows for mock and 10 rows for private",
    description_path="./data/census/README.md",
)
data

In [None]:
census_data = do.dataset.get(name="Census")
census_data

In [None]:
census_data.get_mock_path()

In [None]:
census_data.describe()

In [None]:
import pandas as pd

pd.read_csv(census_data.get_mock_path() / "data.csv")

In [None]:
census_data.get_readme_path()

In [None]:
census_data.get_description()

In [None]:
census_data.get_private_path()

### Second Dataset

In [None]:
data2 = do.dataset.create(
    name="MedMNIST",  # MUST BE UNIQUE. Throw Exception if already exist.
    path="./data/med_MNIST/private",  # MUST EXIST
    mock_path="./data/med_MNIST/mock",
    summary="Sample Medical MNIST dataset, 5 images for each class",
    description_path="./data/med_MNIST/README.md",
)
data2

In [None]:
medMnist_data = do.dataset.get(name="MedMNIST")
medMnist_data

In [None]:
medMnist_data.get_mock_path()

In [None]:
medMnist_data.describe()

In [None]:
!uv pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# show the first image
test_dir = medMnist_data.get_mock_path() / "test"
first_subdir = next(d for d in test_dir.iterdir() if d.is_dir())
first_jpeg = next(first_subdir.glob("*.jpeg"))
img = np.asarray(Image.open(first_jpeg))
plt.imshow(img)

In [None]:
medMnist_data.get_description()

In [None]:
medMnist_data.get_private_path()

## DS

In [None]:
from syft_rds.client.rds_client import init_session

ds_stack = setup_rds_server(email="ds@openmined.org", key="rds", root_dir=Path("."))

ds = ds_stack.init_session(host="do@openmined.org")
print(f"host: {ds.host}")
print(f"me: {ds.email}")
print(f"is admin?: {ds.is_admin}")
assert not ds.is_admin

In [None]:
datasets = ds.dataset.get_all()
datasets

In [None]:
assert len(datasets) == 2

## DO delete datasets

In [None]:
do.dataset.delete(name="Census")
do.dataset.delete(name="MedMNIST")

## Cleaning

In [None]:
remove_rds_stack_dir(key="rds", root_dir=Path("."))

In [None]:
!rm -rf "./data"