In [1]:

import syft as sy
from syft.core.node.common.node_service.tff.tff_messages import TFFMessageWithReply
from syft.core.node.common.node_service.ping.ping_messages import PingMessageWithReply
from syft.grid import GridURL
from syft.core.node.common.node_service.simple.simple_messages import NodeRunnableMessageWithReply
from syft.core.node.common.node_service.get_repr.get_repr_messages import GetReprMessage

import os, sys

import collections

import numpy as np
np.random.seed(0)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
domain = sy.login(email="info@openmined.org", password="changethis", port=8081)


Anyone can login as an admin to your node right now because your password is still the default PySyft username and password!!!

Connecting to localhost... done! 	 Logging into exciting_goodfellow... done!


In [3]:
# download MedNIST.pkl
if not os.path.exists("./MedNIST.pkl"):
    os.system('curl -O "https://media.githubusercontent.com/media/shubham3121/datasets/main/MedNIST/MedNIST.pkl"')
else:
    print("MedNIST already downloaded")

MedNIST already downloaded


In [4]:
# Helper Methods

import os
import json
import pandas as pd
from PIL import Image
from enum import Enum
from collections import defaultdict
import numpy as np
from syft.core.adp.data_subject_list import DataSubjectList
from syft.core.adp.data_subject_list import DataSubjectArray


def get_label_mapping():
    # the data uses the following mapping
    mapping = {
        "AbdomenCT": 0, 
        "BreastMRI": 1, 
        "CXR": 2, 
        "ChestCT": 3, 
        "Hand": 4, 
        "HeadCT": 5
    }
    return mapping

df = pd.read_pickle("./MedNIST.pkl")
mapping = get_label_mapping()

total_num = df.shape[0]
print("Columns:", df.columns)
print("Total Images:", total_num)
print("Label Mapping", mapping)

Columns: Index(['patient_id', 'image', 'label'], dtype='object')
Total Images: 58954
Label Mapping {'AbdomenCT': 0, 'BreastMRI': 1, 'CXR': 2, 'ChestCT': 3, 'Hand': 4, 'HeadCT': 5}


In [5]:
subset_idx = []
step = 10000
size = 50
for i in range(6):
    subset_idx.extend(list(range(step * i, step * i + size)))

images = df['image'][subset_idx]
images = np.dstack(images.values).astype(np.int64)
images = np.rollaxis(images,-1)
data_subjects = np.broadcast_to(np.array(DataSubjectList.from_series(df['patient_id'][subset_idx])),images.shape)
label_data_subjects = DataSubjectArray.from_objs(df['patient_id'][subset_idx])


In [6]:
labels = df['label'][subset_idx].to_numpy().astype("int64")


In [7]:
label_data_subjects.shape

(300,)

In [8]:
train_image_data = sy.Tensor(images).private(
    min_val=0, max_val=255, data_subjects=data_subjects
)
train_label_data = sy.Tensor(labels).private(
    min_val=0, max_val=5, data_subjects=label_data_subjects
)

In [9]:
domain.load_dataset(
    name='Mixed MedNIST 64 fo real',
    assets={
        'images': train_image_data,
        "labels": train_label_data
    },
    description="Small dataset for TFF testing"
)

Loading dataset... uploading...ðŸš€                                                                                                                                             

Uploading `images`: 100%|[32mâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ[0m| 1/1 [00:00<00:00,  4.23it/s][0m
Uploading `labels`: 100%|[32mâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ[0m| 1/1 [00:00<00:00, 218.10it/s][0m


Dataset is uploaded successfully !!! ðŸŽ‰

Run `<your client variable>.datasets` to see your new dataset loaded into your machine!


In [10]:
data_scientist_details = domain.create_user(
    name="Sam Carter",
    email="sam@stargate.net",
    password="changethis",
    budget=9999
)

User created successfully!
