In [None]:
import syft_datasets as syd

In [None]:
# For demonstration purposes, we create a random mock and private dataset.

from pathlib import Path
import random
import pandas as pd

num_mock_rows = 100
num_private_rows = 100

mock_data = {
    "age": [random.randint(0, 100) for _ in range(num_mock_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_mock_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_mock_rows)],
}
private_data = {
    "age": [random.randint(0, 100) for _ in range(num_private_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_private_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_private_rows)],
}

mock_df = pd.DataFrame(mock_data)
private_df = pd.DataFrame(private_data)

data_dir = Path("./data")
data_dir.mkdir(parents=True, exist_ok=True)
mock_df.to_csv(data_dir / "mock_data.csv", index=False)
private_df.to_csv(data_dir / "private_data.csv", index=False)

In [None]:
readme_content = """
# My example dataset
"""
readme_path = data_dir / "README.md"
readme_path.write_text(readme_content)

# Creating a dataset

In [None]:
try:
    my_dataset = syd.create(
        name="my-dataset-2",
        mock_path=data_dir / "mock_data.csv",
        private_path=data_dir / "private_data.csv",
        readme_path=readme_path,
        summary="This is a mock dataset for demonstration purposes.",
        tags=["example", "testing"],
    )
    print(f"Dataset created: {my_dataset.name}")
except FileExistsError:
    pass

# Getting and searching datasets

In [None]:
# Getting a dataset by name
my_dataset = syd.get(name="my-dataset-2")
my_dataset.describe()

In [None]:
current_user = my_dataset.owner
print(f"Currently logged in as '{current_user}'")

In [None]:
# Getting all my datasets
my_datasets = syd.get_all(
    datasite=current_user,
    order_by="created_at",
    sort_order="desc",
    limit=10,
)

my_datasets

In [None]:
syd.delete(name="my-dataset-2")

In [None]:
syd.get_all()