# MNIST Syft Data Scientist

In [None]:
import syft as sy
sy.LOG_FILE = "syft_ds.log"
_ = sy.logger.add(sy.LOG_FILE, enqueue=True, colorize=False, diagnose=True, backtrace=True, level="TRACE")

# PART 1: Connect to a Remote Duet Server

As the data scientist, you want to perform data science on data that is sitting in the Data Owner's Duet server (in their Notebook).

In order to do this, we must run the code that the Data Owner sends us, which importantly includes their Duet Session ID. This will create a direct connection from my notebook to the remote Duet server. Once the connection is established all traffic is sent directly between the two nodes.

Let's run the code below and follow the instructions it gives.

In [None]:
#duet = sy.join_duet(network_url="http://localhost:5000")
duet = sy.join_duet(network_url="http://localhost:5000", loopback=True)

# PART 2: Launch a Duet Server and Connect

In [None]:
duet.store.pandas

In [None]:
x = duet.syft.lib.python.Int(1)
print(x)

In [None]:
%%time
sy.logger.critical("Start requesting")
if len(duet.store) > 0:
    t = duet.store[0].get(
        request_block=True,
        timeout_secs=15,
        request_name="age_data",
        reason="I want to see the age data",
        delete_obj=False
    )
    sy.logger.critical("Finished requesting")

In [None]:
# print(t)

In [None]:
# lets get some references to our data owners Duet torch and torchvision
torch = duet.torch
torchvision = duet.torchvision

# these are the same as the original mnist example
transforms = torchvision.transforms
datasets = torchvision.datasets
nn = torch.nn
F = torch.nn.functional
optim = torch.optim
StepLR = torch.optim.lr_scheduler.StepLR

In [None]:
import torch as th
import torchvision as tv

In [None]:
# we need some transforms for our MNIST data set
local_transform_1 = tv.transforms.ToTensor()  # this converts PIL images to Tensors
local_transform_2 = tv.transforms.Normalize(0.1307, 0.3081)  # this normalizes the dataset

# compose our transforms
local_transforms = tv.transforms.Compose([local_transform_1, local_transform_2])

In [None]:
# Training settings from original MNIST example command line args
args = {
    "batch_size": 64,
    "test_batch_size": 1000,
    "epochs": 14,
    "lr": 1.0,
    "gamma": 0.7,
    "no_cuda": False,
    "dry_run": False,
    "seed": 42, # the meaning of life
    "log_interval": 10,
    "save_model": False,
}

In [None]:
test_kwargs = {
    "batch_size": args["test_batch_size"],
}

# this is our carefully curated test data which represents the goal of our problem domain
test_data = tv.datasets.MNIST('../data', train=False, download=True, transform=local_transforms)
test_loader = th.utils.data.DataLoader(test_data,**test_kwargs)

In [None]:
test_data_length = len(test_loader.dataset)
print(test_data_length)

# test_data_length_ptr = duet.syft.lib.python.Int(test_data_length)
# print(test_data_length_ptr)

In [None]:
print(test_data, type(test_data))

The "duet" variable is now your reference to a whole world of remote operations including supported libraries like torch.

In [None]:
# # TODO replace with local inference so this doesn't need to be on the DO side
# test_data_ptr = torchvision.datasets.MNIST('../data', train=False, download=True, transform=transforms)
# print(test_data_ptr)

# test_loader_ptr = torch.utils.data.DataLoader(test_data_ptr,**test_kwargs)
# print(test_loader_ptr)
# # TODO

In [None]:
# seq_model = nn.Sequential(
#     nn.Conv2d(1, 32, 3, 1),
#     nn.ReLU(),
#     nn.Conv2d(32, 64, 3, 1),
#     nn.ReLU(),
#     nn.MaxPool2d(2),
#     nn.Dropout2d(0.25),
#     nn.Flatten(),
#     nn.Linear(9216, 128),
#     nn.ReLU(),
#     nn.Dropout2d(0.5),
#     nn.Linear(128, 10),
#     nn.LogSoftmax(dim=1),
# )

In [None]:
# class Net:
#     modules = []
#     training = False

#     def __init__(self) -> None:
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(1, 32, 3, 1)
#         self.conv2 = nn.Conv2d(32, 64, 3, 1)
#         self.dropout1 = nn.Dropout2d(0.25)
#         self.dropout2 = nn.Dropout2d(0.5)
#         self.fc1 = nn.Linear(9216, 128)
#         self.fc2 = nn.Linear(128, 10)

#         # add to modules list
#         self.modules.append(self.conv1)
#         self.modules.append(self.conv2)
#         self.modules.append(self.dropout1)
#         self.modules.append(self.dropout2)
#         self.modules.append(self.fc1)
#         self.modules.append(self.fc2)

#     def train(self, mode: bool = True):
#         self.training = mode
#         for module in self.modules:
#             module.train(mode)
#         return self

#     def eval(self):
#         return self.train(False)

#     def forward(self, x):
#         x = self.conv1(x)
#         x = F.relu(x)
#         x = self.conv2(x)
#         x = F.relu(x)
#         x = F.max_pool2d(x, 2)
#         x = self.dropout1(x)
#         x = torch.flatten(x, 1)
#         x = self.fc1(x)
#         x = F.relu(x)
#         x = self.dropout2(x)
#         x = self.fc2(x)
#         output = F.log_softmax(x, dim=1)
#         return output

#     def __call__(self, input):
#         return self.forward(input)

#     # local list of remote ListPointers of TensorPointers
#     def parameters(self, recurse: bool = True):
#         params_list = duet.syft.lib.python.List()
#         for module in self.modules:
#             param_pointers = module.parameters()
#             params_list += param_pointers

#         return params_list

#     def cuda(self, device) -> "Net":
#         for module in self.modules:
#             module.cuda(device)
#         return self

#     def cpu(self) -> "Net":
#         for module in self.modules:
#             module.cpu()
#         return self

In [None]:
# lets define our SOTA model to train on the data owners data
# note we subclass from sy.Module not nn.Module
fc1_scaling_factor = 0.25  # this can let us scale the fc1 layer down a bit
class SyNet(sy.Module):
    def __init__(self):
        super(SyNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, int(64 * fc1_scaling_factor), 3, 1)  # keep fc1 size down
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(int(9216 * fc1_scaling_factor), 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [None]:
# lets define our SOTA model to train on the data owners data
# note we subclass from sy.Module not nn.Module
# WARNING: be extra careful to use th. not the torch from duet here
class LocalSyNet(sy.Module):
    def __init__(self):
        super(LocalSyNet, self).__init__()
        self.conv1 = th.nn.Conv2d(1, 32, 3, 1)
        self.conv2 = th.nn.Conv2d(32, int(64 * fc1_scaling_factor), 3, 1)
        self.dropout1 = th.nn.Dropout2d(0.25)
        self.dropout2 = th.nn.Dropout2d(0.5)
        self.fc1 = th.nn.Linear(int(9216 * fc1_scaling_factor), 128)
        self.fc2 = th.nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = th.nn.functional.relu(x)
        x = self.conv2(x)
        x = th.nn.functional.relu(x)
        x = th.nn.functional.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = th.flatten(x, 1)
        x = self.fc1(x)
        x = th.nn.functional.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = th.nn.functional.log_softmax(x, dim=1)
        return output

In [None]:
# class SmallSyNet(sy.Module):
#     def __init__(self):
#         super(SmallSyNet, self).__init__()
#         self.fc1 = nn.Linear(784, 392)
#         self.fc2 = nn.Linear(392, 10)

#     def forward(self, x):
#         x = torch.flatten(x, 1)
#         x = self.fc1(x)
#         x = F.relu(x)
#         x = self.fc2(x)
#         return x

In [None]:
# class LocalSmallSyNet(sy.Module):
#     def __init__(self):
#         super(LocalSmallSyNet, self).__init__()
#         self.fc1 = th.nn.Linear(784, 392)
#         self.fc2 = th.nn.Linear(392, 10)

#     def forward(self, x):
#         x = th.flatten(x, 1)
#         x = self.fc1(x)
#         x = th.nn.functional.relu(x)
#         x = self.fc2(x)
#         return x    

In [None]:
# lets see if our Data Owner has CUDA
has_cuda = False
has_cuda_ptr = torch.cuda.is_available()
has_cuda = bool(has_cuda_ptr.get(
    request_block=True,
    request_name="cuda_is_available",
    reason="To run test and inference locally",
    timeout_secs=30,  # change to something slower
))
print(has_cuda)

In [None]:
use_cuda = not args["no_cuda"] and has_cuda
torch.manual_seed(args["seed"])

device = torch.device("cuda" if use_cuda else "cpu")
print(f"DO device is {device.type.get()}")

In [None]:
# instantiate our model
# this will construct everything inside init on the DO side
# model = Net()
model = SyNet()
# model = seq_model
# model = SmallSyNet()

In [None]:
print(len(model.modules))
print(model.modules)

In [None]:
local_model = LocalSyNet()
# local_model = LocalSmallSyNet()
local_model.zero_layers()  # so we can confirm that the weight download works
local_model.sum_layers()
print(local_model.modules)
# assert local_model.fc1.in_features == int(9216 * fc1_scaling_factor)

In [None]:
local_model.copy_remote_state(
    remote_model=model,
    request_name="model_download",
    reason="test evaluation",
    timeout_secs=30
)

In [None]:
print("done")
local_model.sum_layers()
# %%time
# skip_layers = []
# # warning fc1 is 9216x128 and stalls on copy, even a tensor size 9216x96 takes 45 seconds
# # skip_layers.append("fc1") # fc1 is too big???
# # warning fc1 at 4608x128 takes about 25 seconds
# local_model.copy_remote_state(remote_model=model, skip_layers=skip_layers)

In [None]:
# # check the weights were copied
# local_model.sum_layers()

In [None]:
# if we have CUDA lets send our model to the GPU
if has_cuda:
    model.cuda(device)
else:
    model.cpu()

In [None]:
# lets get our parameters for optimization
# params_list required for remote list concatenation
params = model.parameters(params_list=duet.syft.lib.python.List())
print(params, type(params))

In [None]:
optimizer = optim.Adadelta(params, lr=args["lr"])
print(optimizer, type(optimizer))

In [None]:
scheduler = StepLR(optimizer, step_size=1, gamma=args["gamma"])
print(scheduler, type(scheduler))

In [None]:
# now can define a simple training loop very similar to the original PyTorch MNIST example
@sy.logger.catch
def train(args, model, device, train_loader, optimizer, epoch, train_data_length):
    # + 0.5 lets us math.ceil without the import
    #train_batches = round((train_data_length / args["batch_size"]) + 0.5)
    train_batches = 100
    print(f"> Running train in {train_batches} batches")
    model.train()

    for batch_idx, data in enumerate(train_loader):
#         time.sleep(1)
        data_ptr, target_ptr = data[0], data[1]
        optimizer.zero_grad()
        output = model(data_ptr)
        loss = F.nll_loss(output, target_ptr)
        loss.backward()
        optimizer.step()
        loss_item = loss.item()
        train_loss = duet.syft.lib.python.Float(0)
        train_loss += loss_item
        if batch_idx % args["log_interval"] == 0:
            local_loss = loss_item.get(
                request_name="loss",
                reason="To evaluate training progress",
                request_block=True,
                timeout_secs=30
            )
            # local_loss = None
            if local_loss is not None:
                print("Train Epoch: {} {} {:.4}".format(epoch, batch_idx, local_loss))
            else:
                print("Train Epoch: {} {} ?".format(epoch, batch_idx))
            if args["dry_run"]:
                break
        if batch_idx >= train_batches - 1:
            print("batch_idx >= train_batches, breaking")
            break

In [None]:
# # TODO replace with local inference and local test set
# # the same for our test training loop except we will need to send our data over for inference
# def test(model, device, test_loader, test_data_length):
#     # + 0.5 lets us math.ceil without the import
#     test_batches = round((test_data_length / args["test_batch_size"]) + 0.5)
#     print(f"> Running test in {test_batches} batches")
#     model.eval()
#     test_loss = duet.syft.lib.python.Float(0)
#     correct_ptr = duet.syft.lib.python.Float(0)

#     with torch.no_grad():
#         for batch_idx, data in enumerate(test_loader):
#             data_ptr, target_ptr = data[0], data[1]
#             output = model(data_ptr)
#             loss = F.nll_loss(output, target_ptr, reduction='sum').item()
#             test_loss = test_loss + loss

#             pred = output.argmax(dim=1)
#             total = pred.eq(target_ptr).sum().item()
#             correct_ptr += total

#             if args["dry_run"]:
#                 break
                
#             if batch_idx >= test_batches - 1:
#                 print("batch_idx >= test_batches, breaking")
#                 break

#     accuracy = correct_ptr / test_data_length
#     # we need to batch or block these requests so the loop doesnt break
#     result = None
# #     result = accuracy.get(
# #         request_block=True,
# #         timeout_secs=0,
# #         request_name="accuracy",
# #         reason="To see the accuracy on DO's test set"
# #     )
#     if result is not None:
#         print("Test Set Average Loss:", 100 * result)
#     else:
#         print("Test Set Average Loss: ?")

In [None]:
# TODO replace with local inference and local test set
# the same for our test training loop except we will need to send our data over for inference
@sy.logger.catch
def test_local(model, remote_model, test_loader, test_data_length):
    # download remote model
    model.copy_remote_state(
        remote_model=remote_model,
        request_name="model_download",
        reason="test evaluation",
        timeout_secs=30
    )
    # visually check the weights have changed
    model.sum_layers()
    # + 0.5 lets us math.ceil without the import
    test_batches = round((test_data_length / args["test_batch_size"]) + 0.5)
    print(f"> Running test_local in {test_batches} batches")
    model.eval()
    test_loss = 0.0
    correct = 0.0

    with th.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
#             time.sleep(1)
            output = model(data)
            iter_loss = th.nn.functional.nll_loss(output, target, reduction='sum').item()
            test_loss = test_loss + iter_loss
            pred = output.argmax(dim=1)
            total = pred.eq(target).sum().item()
            correct += total
            if args["dry_run"]:
                break
                
            if batch_idx >= test_batches - 1:
                print("batch_idx >= test_batches, breaking")
                break

    accuracy = correct / test_data_length
    print("Test Set Average Loss:", 100 * accuracy)

In [None]:
# we need some transforms for our MNIST data set
transform_1 = torchvision.transforms.ToTensor()  # this converts PIL images to Tensors
transform_2 = torchvision.transforms.Normalize(0.1307, 0.3081)  # this normalizes the dataset
print(type(transform_1), type(transform_2))

remote_list = duet.syft.lib.python.List()
remote_list.append(transform_1)
remote_list.append(transform_2)

# compose our transforms
transforms = torchvision.transforms.Compose(remote_list)

# The DO has kindly let us initialise a DataLoader for their training set
train_kwargs = {
    "batch_size": args["batch_size"],
}
train_data_ptr = torchvision.datasets.MNIST('../data', train=True, download=True, transform=transforms)
print(train_data_ptr)
train_loader_ptr = torch.utils.data.DataLoader(train_data_ptr,**train_kwargs)
print(train_loader_ptr)

In [None]:
def get_train_length(train_data_ptr):
    train_length_ptr = train_data_ptr.__len__()
    train_data_length = train_length_ptr.get(
        request_block=True,
        request_name="train_size",
        reason="To write the training loop",
        timeout_secs=30,
    )
    return train_data_length

try:
    if train_data_length is None:
        train_data_length = get_train_length(train_data_ptr)
except NameError:
        train_data_length = get_train_length(train_data_ptr)

print(f"Training Dataset size is: {train_data_length}")

In [None]:
%%time
import time
# sequential CPU times: user 39.2 s, sys: 390 ms, total: 39.6s per epoch
# sy.module CPU times: user 1min 28s, sys: 645 ms, total: 1min 28s per epoch
# vanilla class CPU times: user 3min 5s, sys: 3.01 s, total: 3min 8s per epoch

args["dry_run"] = False
sy.logger.trace("Start Training")
for epoch in range(1, args["epochs"] + 1):
    epoch_start = time.time()
    print(f"Epoch: {epoch}")
    train(args, model, device, train_loader_ptr, optimizer, epoch, train_data_length)
    #test(model, device, test_loader_ptr, test_data_length_ptr)
    test_local(local_model, model, test_loader, test_data_length)  # real local data and model
    scheduler.step()
    epoch_end = time.time()
    print(f"Epoch time: {int(epoch_end - epoch_start)} seconds")
    break
sy.logger.trace("Finish Training")

In [None]:
print("Done")

# PART 3: Inference

In [None]:
import matplotlib.pyplot as plt
def draw_image_and_label(image, label):
    fig = plt.figure()
    plt.tight_layout()
    plt.imshow(image, cmap="gray", interpolation="none")
    plt.title("Ground Truth: {}".format(label))
    
def prep_for_inference(image):
    image_batch = image.unsqueeze(0).unsqueeze(0)
    image_batch = image_batch * 1.0
    return image_batch

In [None]:
def classify_local(image):
    image_tensor = th.Tensor(prep_for_inference(image))
    print("1", type(image_tensor))
    output = local_model(image_tensor)
    print("2", type(output))
    preds = th.exp(output)
    print("3", type(preds))
    local_y = preds
    local_y = local_y.squeeze()
    pos = local_y == max(local_y)
    index = th.nonzero(pos, as_tuple=False)
    class_num = index.squeeze()
    return class_num, local_y

In [None]:
def classify(image):
    image_tensor_ptr = torch.Tensor(prep_for_inference(image))
    output = model(image_tensor_ptr)
    preds = torch.exp(output)
    preds_result = preds.get(
        request_block=True,
        request_name="inference",
        reason="To see a real world example of inference",
        timeout_secs=10
    )
    if preds_result is None:
        print("No permission to do inference, request again")
        return -1, th.Tensor([-1])
    else:
        local_y = th.Tensor(preds_result)
        local_y = local_y.squeeze()
        pos = local_y == max(local_y)
        index = th.nonzero(pos, as_tuple=False)
        class_num = index.squeeze()
        return class_num, local_y

In [None]:
# lets grab something from the test set
import random
total_images = test_data_length # 10000
index = random.randint(0, total_images)
print("Random Test Image:", index)
count = 0
batch = index // test_kwargs["batch_size"]
batch_index = index % int(total_images / len(test_loader))
for tensor_ptr in test_loader:
    data, target = tensor_ptr[0], tensor_ptr[1]
    if batch == count:
        break
    count += 1

print(f"Displaying {index} == {batch_index} in Batch: {batch}/{len(test_loader)}")
image_1 = data[batch_index].reshape((28, 28))
label_1 = target[batch_index]
draw_image_and_label(image_1, label_1)

In [None]:
# # classify remote
# sy.logger.trace("Before running classify")
# class_num, preds = classify(image_1)
# print(f"Prediction: {class_num} Ground Truth: {label_1}")
# print(preds)

In [None]:
# classify local
sy.logger.trace("Before running classify")
class_num, preds = classify_local(image_1)
print(f"Prediction: {class_num} Ground Truth: {label_1}")
print(preds)

In [None]:
# error with recv_immediate_msg_with_reply. 'generator' object has no attribute 'serialize'
# remote_model_params_ptr = model.parameters()
# remote_model_params = remote_model_params_ptr.get(
#     request_block=True,
#     request_name="copy_model",
#     reason="To run test and inference locally",
#     timeout_secs=10,
#     delete_obj=False,
# )

In [None]:
# print(remote_model_params)