In [1]:
import syft as sy
from utils import *
from syft import NewDataSubject

In [None]:
MY_DATASET_URL = "https://raw.githubusercontent.com/OpenMined/datasets/main/BreastCancerDataset/subsets/BreastCancerDataset-02ec48b840824b1ea3e1f5d11c45314b.pkl"
dataset = download_dataset(MY_DATASET_URL)

In [None]:
dataset.head()

In [None]:
# run this cell
train, val, test = split_and_preprocess_dataset(data=dataset)

In [None]:
# run this cell
data_subjects_image = np.ones(train["images"].shape).astype(object)
for i,patient in enumerate(train["patient_ids"]):
    data_subjects_image[i] = NewDataSubject([str(patient)])

data_subjects_labels = np.ones(train["labels"].shape).astype(object)
for i,patient in enumerate(train["patient_ids"]):
    data_subjects_labels[i] = NewDataSubject([str(patient)])

    
train_image_data = sy.Tensor(train["images"]).annotated_with_dp_metadata(
    min_val=0, max_val=255, data_subjects=data_subjects_image
)
train_label_data = sy.Tensor(train["labels"]).annotated_with_dp_metadata(
    min_val=0, max_val=1, data_subjects=data_subjects_labels
)

### Model Training

In [None]:
from syft import nn

In [None]:
train_image_data = train_image_data[:2]
train_label_data = train_label_data[:2]

In [None]:
input_shape = train_image_data.public_shape
input_shape

In [None]:
y_train = train_label_data

In [None]:
input_shape = train_image_data[:1].child.shape

In [None]:
model = nn.Model()

# Layer 1
model.add(nn.Convolution(nb_filter=32, filter_size=3, padding=2, input_shape=input_shape))
model.add(nn.BatchNorm(activation=nn.leaky_ReLU()))
model.add(nn.MaxPool(pool_size=2, stride=2))

# # Layer 2
# model.add(nn.Convolution(nb_filter=64, filter_size=3, padding=2))
# model.add(nn.BatchNorm(activation=nn.leaky_ReLU()))
# model.add(nn.MaxPool(pool_size=2, stride=2))

# # Layer 3
# model.add(nn.Convolution(nb_filter=128, filter_size=3, padding=2))
# model.add(nn.BatchNorm(activation=nn.leaky_ReLU()))
# model.add(nn.MaxPool(pool_size=2, stride=2))

# # Layer 4
# model.add(nn.Convolution(nb_filter=256, filter_size=3, padding=2))
# model.add(nn.BatchNorm(activation=nn.leaky_ReLU()))
# model.add(nn.MaxPool(pool_size=2, stride=2))

# Layer 6
model.add(nn.AvgPool(1))

# Layer 7
model.add(nn.Flatten())

# Layer 8
model.add(nn.Linear(2, 512))

In [None]:
domain = sy.login(email="info@openmined.org", password="changethis", port=8081)

In [None]:
model.initialize_weights()

In [None]:
model_ptr = model.send(domain,send_to_blob_storage=False)

In [None]:
import time
from tqdm import tqdm
n_epochs = 1
batch_size = 2
for epoch in tqdm(range(n_epochs)):
    print(f"Epoch: {epoch}")
    total_images = train_image_data.public_shape[0]
    start_idx = 0
    while(start_idx < total_images):
        end_idx = start_idx+batch_size
        x_batch = train_image_data[start_idx:end_idx]
        y_batch = train_label_data[start_idx:end_idx]
        run_status = model_ptr.step(x_batch, y_batch)
        while(not run_status.exists):
            time.sleep(10)
        print("Epoch completed.")
        start_idx += batch_size

In [None]:
weights = model_ptr.download_weights(sigma=1000)

In [None]:
while not weights.exists:
    time.sleep(5)

In [None]:
weights.get_copy()

In [None]:
ser = sy.serialize(model, to_bytes=True)

In [None]:
loss = model.step(train_image_data.child, train_label_data.child)

In [None]:
"download_weights" in "syft.core.tensor.nn.Model.download_weights"

In [None]:
model.fit(train_image_data, train_label_data, shuffle=False, max_iter=1,batch_size=2)

In [None]:
import time

out = train_image_data[:2].child
total_time = 0.0
for layer in model.layers:
    start  = time.time()
    print("Layer Name: ", layer)
    out = layer.forward(out)
    end = time.time()
    print(f"Time for Layer: {layer}" , end-start)
    total_time += (end-start)
    print()

In [None]:
# Total time to execute forward pass
total_time

## Backwards

In [None]:
y_true = train_label_data[:2].child

In [None]:
y_true.child, out.child

In [None]:
out.data_subjects

In [None]:
curr_loss = model.loss.forward(out, y_true)
print("Loss on the batch", curr_loss.child)

In [None]:
next_grad = model.loss.backward(out, y_true)

In [None]:
# for layer in model.layers[::-1]:
#     print(f"Layer: {layer}")
#     print(f"InputShape: {layer.input_shape}, OutputShape: {layer.out_shape}")

In [None]:
total_time_backward = 0.0
for layer in model.layers[::-1]:
    print(layer)
    start = time.time()
    next_grad = layer.backward(next_grad)
    end = time.time()
    print(f"Time to execute backward for {layer}: {end-start}")
    print(f"Child Shape: {next_grad.shape}, DS Shape: {next_grad.data_subjects.shape}")
    
    total_time_backward += end-start
    print()

In [None]:
total_time_backward

#### Update Optimizer

In [None]:
model.optimizer.update(model.layers)