### In this session we will go through the process of uploading a dataset

### Import packages

In [None]:
import syft as sy
import pandas as pd
import numpy as np

### Load dataset

In [None]:
data_subset = pd.read_pickle('https://github.com/OpenMined/datasets/blob/main/TissueMNIST/subsets/TissueMNIST-04b0fe07281c47d79276a4f4941dbdca.pkl?raw=true')

In [None]:
# Get a glimpse of the data
data_subset.head()

In [None]:
# Check the shape of the data
data_subset.shape

In [None]:
# Load the images

images = np.stack(data_subset["images"]).astype(np.int64)
labels = data_subset["labels"].astype(np.int64)
patient_ids = data_subset["patient_ids"]

### Begin here...

### Log into the domain node

In [None]:
### Use the API: sy.login
# domain_client = sy.login
domain_client = sy.login(email="info@openmined.org", password="changethis", port=8081)

### We need to add dp meta information to the data

In [None]:
### Before we add dp metadata to images, we need to create a data subject array
### which is a matrix used to represent to which user a data point belongs to.

# Create data subject array for image
# use the API: sy.DataSubjectArray.from_objs

image_data_subjects = np.ones(images.shape).astype(object)
for i, patient in enumerate(patient_ids):
    image_data_subjects[i] = sy.DataSubjectArray([str(patient)])

In [None]:
# Create a Syft image tensor
# use the API: sy.Tensor
image_tensor = sy.Tensor(images)

In [None]:
# Add dp metadata to images
# call `annotated_with_dp_metadata` the method on the syft tensor
# set the min_val and max_val for the tensor being annotated
dp_annotated_image_tensor = image_tensor.annotated_with_dp_metadata(
    min_val=0, max_val=255, data_subjects=image_data_subjects
)

In [None]:
image_data_subjects.shape

In [None]:
# Create data subject array for labels
# use the API: sy.DataSubjectArray.from_objs
label_data_subjects = np.ones(labels.shape).astype(object)
for i, patient in enumerate(patient_ids):
    label_data_subjects[i] = sy.DataSubjectArray([str(patient)])

In [None]:
# Create a Syft label tensor
# use the API: sy.Tensor
label_tensor = sy.Tensor(labels)

In [None]:
### Add dp metadata to labels
# call `annotated_with_dp_metadata` the method on the syft tensor
# set the min_val and max_val for the tensor being annotated
dp_annotated_label_tensor = label_tensor.annotated_with_dp_metadata(
    min_val=0, max_val=7, data_subjects=label_data_subjects
)

### Upload the dataset

In [None]:
### Call .load_dataset on the `domain_client` variable
### domain_client.load_dataset()

## Set name for the dataset
## Set description
## Pass the data as an dict to parameter assets
## Upload the dataset
# run this cell
domain_client.load_dataset(
    name="OMTissueMNISTDataset",
    assets={
        "images": dp_annotated_image_tensor,
        "labels": dp_annotated_label_tensor,
    },
    description="Tissue Specimen Dataset upload from OM."
)

In [None]:
domain_client.datasets

### Congratuations !!!