In [None]:
import os
from os import path
import urllib.request
import gzip
import struct
import array

import jax
import syft as sy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# relative import
from mnist_dataset import mnist, mnist_raw


print(f"{sy.__version__ = }")

## 1. Launch the domain, upload the data

In [None]:
node = sy.orchestra.launch(name="dk-domain", dev_mode=True, reset=True)
root_client = node.login(email="info@openmined.org", password="changethis")

### Load the MNIST dataset

Let's load the raw MNIST images and show with the `mnist_raw` function from [`mnist_datasets.py`](./datasets.py)

In [None]:
train_images, train_labels, _, _ = mnist_raw()

In [None]:
plt.imshow(train_images[0])

In [None]:
train_labels[0]

In [None]:
print(f"{train_images.shape = }")
print(f"{train_labels.shape = }")

### Processing: Flattening the MNIST images and apply one-hot encoding on the labels

In [None]:
train_images, train_labels, _, _ = mnist()

### Get a subset of MNIST

In [None]:
num_samples = 1000

In [None]:
train_images = train_images[:num_samples, :] 
train_labels = train_labels[:num_samples, :]

In [None]:
print(f"{train_images.shape = }")
print(f"{train_labels.shape = }")

The `train_images` and `train_labels` are the private data. Let's create similar mock data with the same shape

In [None]:
mock_images = np.random.rand(num_samples, 784)
mock_images.shape

In [None]:
mock_labels = np.eye(10)[np.random.choice(10, num_samples)]
mock_labels.shape

In [None]:
assert mock_labels.shape == train_labels.shape
assert mock_images.shape == train_images.shape

### The DO uploads the data

In [None]:
dataset = sy.Dataset(
    name="MNIST data",
    description="""Contains the flattened training images and one-hot encoded training labels.""",
    url="https://storage.googleapis.com/cvdf-datasets/mnist/"
)

dataset.add_contributor(role=sy.roles.UPLOADER, 
                        name="Alice", 
                        email="alice@openmined.com",
                        note="Alice is the data engineer at the OpenMined")

dataset.contributors

In [None]:
asset_mnist_train_input = sy.Asset(
    name="MNIST training images",
    description="""The training images of the MNIST dataset""",
    data=train_images,
    mock=mock_images
)

asset_mnist_train_labels = sy.Asset(
    name="MNIST training labels",
    description="""The training labels of MNIST dataset""",
    data=train_labels,
    mock=mock_labels
)

dataset.add_asset(asset_mnist_train_input)
dataset.add_asset(asset_mnist_train_labels)

In [None]:
root_client.upload_dataset(dataset)

### The DO inspects the uploaded data

In [None]:
datasets = root_client.api.services.dataset.get_all()
assert len(datasets) == 1
datasets

#### The first asset of the dataset contains the training and mock images

In [None]:
datasets[0].assets[0]

#### The second asset contains the training and mock labels

In [None]:
datasets[0].assets[1]

### The DO creates an account for the Data Scientist (DS)

In [None]:
root_client.register(name="Sheldon Cooper",
                     email="sheldon@caltech.edu",
                     password="changethis",
                     institution="Caltech",
                     website="https://www.caltech.edu/")

### ðŸ““ Now switch to the [DS's notebook](./Data%20Scientist%20(DS).ipynb)

## 2. After the DS submits code execution requests, DO review and approve the code

In [None]:
root_client.projects

In [None]:
requests = root_client.projects[0].requests
requests

In [None]:
request = requests[0]
request

In [None]:
change = request.changes[0]
change

In [None]:
# gettting a reference to the user code object
user_code = change.link

# viewing the actual code submitted for request
print(user_code.code)

In [None]:
user_code.assets

In [None]:
users_function = user_code.unsafe_function
users_function

In [None]:
mock_images = user_code.assets[0].mock
print(f"{mock_images.shape = }")
mock_labels = user_code.assets[1].mock
print(f"{mock_labels.shape = }")

#### The DO runs the code on mock data to ensure things are fine

In [None]:
mock_train_accs, mock_params = users_function(mnist_images=mock_images, mnist_labels=mock_labels)

#### If the DO inspects and checks that things are good, he can execute the code on the private data and then deposit the results to the domain

In [None]:
# private data associated with the asset
private_images = user_code.assets[0].data
print(f"{private_images.shape = }")
private_labels = user_code.assets[1].data
print(f"{private_labels.shape = }")

In [None]:
train_accs, params = users_function(mnist_images=private_images, mnist_labels=private_labels)

In [None]:
# shape of the model's parameters
jax.tree_map(lambda x: x.shape, params)

#### Deposit the results so the DS can get them

In [None]:
res = request.accept_by_depositing_result((train_accs, params))

In [None]:
res

### ðŸ““ Now switch to the DS's notebook at step 4