In [None]:
# !uv pip install ucimlrepo

In [None]:
# third party
from ucimlrepo import fetch_ucirepo

# fetch dataset
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)

# data (as pandas dataframes)
X = breast_cancer_wisconsin_diagnostic.data.features
y = breast_cancer_wisconsin_diagnostic.data.targets

# metadata
metadata = breast_cancer_wisconsin_diagnostic.metadata
# variable information
variables = breast_cancer_wisconsin_diagnostic.variables

In [None]:
X.head(n=10)  # n specifies how many rows we want in the preview

In [None]:
y.head(n=10)

In [None]:
X.dtypes

In [None]:
# third party
# fix seed for reproducibility
import numpy as np

SEED = 12345
np.random.seed(SEED)

In [None]:
X_mock = X.apply(lambda s: s + np.mean(s) + np.random.uniform())

In [None]:
X_mock.shape

In [None]:
y.dtypes

In [None]:
y_mock = y.sample(frac=1, random_state=SEED).reset_index(drop=True)

In [None]:
# syft absolute
import syft as sy

SYFT_VERSION = ">=0.9.0,<0.9.1"
sy.requires(SYFT_VERSION)

In [None]:
features_asset = sy.Asset(name="Breast Cancer Data: Features", data=X, mock=X_mock)

In [None]:
targets_asset = sy.Asset(name="Breast Cancer Data: Targets", data=y, mock=y_mock)

In [None]:
features_asset.data.head(n=3)

In [None]:
features_asset.mock.head(n=3)

In [None]:
description = f'{metadata["abstract"]}\n{metadata["additional_info"]["summary"]}'

In [None]:
print(description)

In [None]:
paper = metadata["intro_paper"]

citation = (
    f'{paper["authors"]} - {paper["title"]}, {paper["published_in"]}, {paper["year"]}'
)

In [None]:
print(citation)

In [None]:
breast_cancer_dataset = sy.Dataset(
    name=metadata["name"],
    description=description,
    citation=citation,
    url=metadata["dataset_doi"],
)

In [None]:
breast_cancer_dataset

In [None]:
breast_cancer_dataset.add_asset(features_asset)

In [None]:
breast_cancer_dataset.add_asset(targets_asset)

In [None]:
breast_cancer_dataset

In [None]:
DATA_SITE_PORT = 8083
DATA_SITE_NAME = "cancer-research_centre"

data_site = sy.orchestra.launch(port=DATA_SITE_PORT, name=DATA_SITE_NAME, reset=True)

In [None]:
client = sy.login(
    port=DATA_SITE_PORT, email="info@openmined.org", password="changethis"
)
client

In [None]:
client.upload_dataset(dataset=breast_cancer_dataset)

In [None]:
client.datasets