In [19]:
import torch; print(torch.__version__)

1.3.0


In [2]:
import argparse
import numpy as np
from itertools import count

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
from torchvision import transforms

import time

In [3]:
class Arguments():
    def __init__(self):
        self.batch_size = 14
        self.test_batch_size = 14
        self.epochs = 10
        self.lr = 0.02
        self.seed = 1
        self.log_interval = 1 # Log info at each batch
        self.precision_fractional = 3

args = Arguments()

_ = torch.manual_seed(args.seed)

In [4]:
import syft as sy

hook = sy.TorchHook(torch)

def connect_to_workers(n_workers):
    return [
        sy.VirtualWorker(hook, id=f"worker{i+1}")
        for i in range(n_workers)
    ]
def connect_to_crypto_provider():
    return sy.VirtualWorker(hook, id="crypto_provider")

workers = connect_to_workers(n_workers=2)
crypto_provider = connect_to_crypto_provider()

Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/home/souhail/.local/lib/python3.7/site-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_2.0.0.so'


Instructions for updating:
non-resource variables are not supported in the long term


In [5]:
import pandas as pd
import numpy as np

def getSamples(filename):
    data = pd.read_csv(filename, sep='\t')
    return data.values[:,1:].transpose()

data1 = getSamples("GSE2034-Normal-train.txt")
data2 = getSamples("GSE2034-Tumor-train.txt")

data1Label = np.zeros(len(data1)).reshape((-1, 1))
data2Label = np.ones(len(data2)).reshape((-1, 1))
x = np.concatenate((data1, data2))
y = np.concatenate((data1Label, data2Label))
z = np.concatenate((x, y), axis = 1)

In [6]:
# We don't use the whole dataset for efficiency purpose, but feel free to increase these numbers
n_train_items = 181
n_test_items = 46

# partition the data into training data and test data
x_train = x[:n_train_items]
y_train = y[:n_train_items]
z_train = z[:n_train_items]

x_test = x[n_train_items:]
y_test = y[n_train_items:]    
z_test = z[n_train_items:]    

In [7]:
z_test.shape

(46, 12635)

In [8]:

def get_private_data_loaders(precision_fractional, workers, crypto_provider):
    
    def one_hot_of(index_tensor):#Transforms to one hot tensor
     
        onehot_tensor = torch.zeros(*index_tensor.shape, 2) # 2 Output classes
        onehot_tensor = onehot_tensor.scatter(1, index_tensor.view(-1, 1), 1)
        return onehot_tensor
        
    def secret_share(tensor): #Transforms to fixed precision and secret share a tensor

        return (
            tensor
            .fix_precision(precision_fractional=precision_fractional)
            .share(*workers, crypto_provider=crypto_provider, requires_grad=True)
        )
    
    transformation = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    
    train_loader = torch.utils.data.DataLoader(
        x_train,
        batch_size=args.batch_size
    )

    private_train_loader = [
        (secret_share(torch.from_numpy(x_train)), secret_share(torch.from_numpy(y_train)))
        for i in range (n_train_items)
    ]
    
    test_loader = torch.utils.data.DataLoader(
        z_test,
        batch_size=args.test_batch_size
    )
        
    private_test_loader = [
        (secret_share(torch.from_numpy(x_test)), secret_share(torch.from_numpy(y_test)))
        for i in range (n_test_items)
    ]
    return private_train_loader, private_test_loader
    
    
private_train_loader, private_test_loader = get_private_data_loaders(
    precision_fractional=args.precision_fractional,
    workers=workers,
    crypto_provider=crypto_provider
)

In [9]:
dim = 12634

class Res1d(nn.Module):
    # the conv layers
    def __init__(self, inSize, outSize, kernel=(3,), strides=1,):
        super(Res1d, self).__init__()
        # hard-coded to do the padding correctly
        if inSize in (16,64,128,512) and strides is 2:
            pding = 0
        else:
            pding = 1
        self.l1 = nn.Conv1d(inSize, outSize, kernel, stride=strides, padding=pding, bias=False)
        self.l2 = nn.InstanceNorm1d(outSize)
        
        if strides > 1 or inSize != outSize:
            if strides > 1:
                self.r1 = nn.Identity()
                self.r2 = nn.AvgPool1d(strides)
            else:
                self.r1 = None
                self.r2 = None
            self.r3 = nn.Conv1d(inSize, outSize, 1, bias=False)
            self.r4 = nn.InstanceNorm1d(outSize)
    
        self.relu = nn.ReLU()
        
    def forward(self, x):
        l = self.l1(x)
        l = self.l2(l)
        
        if self.r1 is not None:
            r = self.r1(x)
            r = self.r2(r)
            r = self.r3(r)
            r = self.r4(r)
        else:
            r = self.r3(x)
            r = self.r4(r)
            
        x = l + r
        return self.relu(x)
    
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(dim, 64)
        self.l2 = nn.ReLU()
        
        self.r1 = Res1d(1, 4, 3)
        
        self.r2 = Res1d(4, 8, 3)
        self.r3 = Res1d(8, 8, 3, strides=2)
        
        self.r4 = Res1d(8, 16, 3)
        self.r5 = Res1d(16, 16, 3, strides=2)
        
        self.r6 = Res1d(16, 32, 3)
        self.r7 = Res1d(32, 32, 3, strides=2)
        
        self.r8 = Res1d(32, 64, 3)
        self.r9 = Res1d(64, 64, 3, strides=2)
        
        self.r10 = Res1d(64, 128, 3)
        self.r11 = Res1d(128, 128, 3, strides=2)
        
        self.r12 = Res1d(128, 256, 3)
        self.r13 = Res1d(256, 256, 3, strides=2)
        
        self.r14 = Res1d(256, 512, 3)
        self.r15 = Res1d(512, 512, 3, strides=2)
        
        self.r16 = Res1d(512, 1024, 3)
        self.r17 = Res1d(1024, 1024, 3, strides=2)
        
        # size is by experiment and hardcode
        self.lastLinear = nn.Linear(50240,32)
        self.lastRelu = nn.ReLU()
        self.lastAgg = nn.Linear(32,1)
        self.lastSigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # shape is (batch, channel, time)
        l = x
        l = self.l2(self.l1(l))
        
        # conv layers should operate on time
        r = x
        r = self.r4(self.r3(self.r2(self.r1(r))))
        r = self.r8(self.r7(self.r6(self.r5(r))))
        r = self.r12(self.r11(self.r10(self.r9(r))))
        r = self.r16(self.r15(self.r14(self.r13(r))))
        r = self.r17(r)
        
        # flatten l
        r = r.view(x.shape[0],1, -1)
        y = torch.cat((l,r),-1)
        y = self.lastLinear(y)
        y = self.lastRelu(y)
        y = self.lastAgg(y)
        y = self.lastSigmoid(y)
        return y

In [10]:
def train(args, model, private_train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(private_train_loader): # <-- now it is a private dataset
        start_time = time.time()
        
        optimizer.zero_grad()
        
        output = model(data)
        
        # loss = F.nll_loss(output, target)  <-- not possible here
        batch_size = output.shape[0]
        loss = ((output - target)**2).sum().refresh()/batch_size
        
        loss.backward()
        
        optimizer.step()

        if batch_idx % args.log_interval == 0:
            loss = loss.get().float_precision()
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tTime: {:.3f}s'.format(
                epoch, batch_idx * args.batch_size, len(private_train_loader) * args.batch_size,
                100. * batch_idx / len(private_train_loader), loss.item(), time.time() - start_time))

In [11]:
def test(args, model, private_test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in private_test_loader:
            start_time = time.time()
            
            output = model(data)
            pred = output.argmax(dim=1)
            correct += pred.eq(target.view_as(pred)).sum()

    correct = correct.get().float_precision()
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct.item(), len(private_test_loader)* args.test_batch_size,
        100. * correct.item() / (len(private_test_loader) * args.test_batch_size)))

In [12]:
model = Net()
model = model.fix_precision().share(*workers, crypto_provider=crypto_provider, requires_grad=True)

optimizer = optim.SGD(model.parameters(), lr=args.lr)
optimizer = optimizer.fix_precision() 

for epoch in range(1, args.epochs + 1):
    train(args, model, private_train_loader, optimizer, epoch)
    test(args, model, private_test_loader)

TypeError: __bool__ should return bool, returned Tensor

In [11]:
device = torch.device("cuda")
net = Net().to(device)


RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_set_

In [12]:
input_test = torch.randn(80, 1, dim).float().cuda()
out = net(input_test).cuda()
out.shape

NameError: name 'net' is not defined

# Train the model

In [7]:
# get the data into (batch, channel = 1, length=dim)
data_torch = torch.from_numpy(x).view([-1, 1, dim]).float()
label_torch = torch.from_numpy(y).view([-1,1,1]).float()

In [8]:
import torch.optim as optim
criterion = nn.BCELoss() # Binary Cross Entropy 
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [10]:
test_inputs = torch.from_numpy(x_test).view([-1, 1, dim]).float().cuda()
test_labels = torch.from_numpy(y_test).view([-1, 1]).float().cuda()

for batch in range(1000):  # loop over the dataset multiple times
    # get the inputs; data is a list of [inputs, labels]
    indices = np.random.choice(len(x_train), size=(30))
    inputs = x_train[indices]
    labels = y_train[indices]
    
    inputs = torch.from_numpy(inputs).view([-1, 1, dim]).float().cuda()
    labels = torch.from_numpy(labels).view([-1, 1]).float().cuda()
    
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs).view([-1,1]).cuda()
    loss = criterion(outputs, labels).cuda()
    loss.backward()
    optimizer.step()
    
    if batch % 10 == 0:
        print(batch / 10 + 1, "% Trained", "loss = ", loss)

print('Finished Training')

1.0 % Trained loss =  tensor(0.7136, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
2.0 % Trained loss =  tensor(0.5648, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
3.0 % Trained loss =  tensor(0.3351, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
4.0 % Trained loss =  tensor(0.1074, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
5.0 % Trained loss =  tensor(0.0421, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
6.0 % Trained loss =  tensor(0.0147, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
7.0 % Trained loss =  tensor(0.0081, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
8.0 % Trained loss =  tensor(0.0052, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
9.0 % Trained loss =  tensor(0.0051, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
10.0 % Trained loss =  tensor(0.0032, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
11.0 % Trained loss =  tensor(0.0024, device='cuda:0', grad_fn=<BinaryCrossEntr

90.0 % Trained loss =  tensor(0.0003, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
91.0 % Trained loss =  tensor(0.0003, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
92.0 % Trained loss =  tensor(0.0003, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
93.0 % Trained loss =  tensor(0.0002, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
94.0 % Trained loss =  tensor(0.0003, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
95.0 % Trained loss =  tensor(0.0002, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
96.0 % Trained loss =  tensor(0.0002, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
97.0 % Trained loss =  tensor(0.0003, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
98.0 % Trained loss =  tensor(0.0003, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
99.0 % Trained loss =  tensor(0.0002, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
100.0 % Trained loss =  tensor(0.0002, device='cuda:0', grad_fn=<Binar

In [11]:
# MPC learning
net

Net(
  (l1): Linear(in_features=12634, out_features=64, bias=True)
  (l2): ReLU()
  (r1): Res1d(
    (l1): Conv1d(1, 4, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (l2): InstanceNorm1d(4, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (r3): Conv1d(1, 4, kernel_size=(1,), stride=(1,), bias=False)
    (r4): InstanceNorm1d(4, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (relu): ReLU()
  )
  (r2): Res1d(
    (l1): Conv1d(4, 8, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (l2): InstanceNorm1d(8, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (r3): Conv1d(4, 8, kernel_size=(1,), stride=(1,), bias=False)
    (r4): InstanceNorm1d(8, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (relu): ReLU()
  )
  (r3): Res1d(
    (l1): Conv1d(8, 8, kernel_size=(3,), stride=(2,), padding=(1,), bias=False)
    (l2): InstanceNorm1d(8, eps=1e-05, momentum=0.1, affine=False, track_runnin

In [12]:
net(data_torch.cuda())

tensor([[[1.9235e-04]],

        [[2.7360e-04]],

        [[1.4392e-04]],

        [[3.3061e-04]],

        [[1.0315e-04]],

        [[3.0822e-04]],

        [[2.3279e-04]],

        [[2.8794e-04]],

        [[2.0224e-04]],

        [[2.3346e-04]],

        [[2.8597e-04]],

        [[2.5769e-04]],

        [[3.1846e-04]],

        [[2.9116e-04]],

        [[2.4118e-04]],

        [[3.2269e-04]],

        [[3.1257e-04]],

        [[2.3454e-04]],

        [[2.3503e-04]],

        [[3.3291e-04]],

        [[1.6589e-04]],

        [[2.6916e-04]],

        [[3.0157e-04]],

        [[1.8384e-04]],

        [[1.3061e-04]],

        [[2.0306e-04]],

        [[2.0023e-04]],

        [[2.0535e-04]],

        [[2.7104e-04]],

        [[2.9693e-04]],

        [[3.4671e-04]],

        [[3.1465e-04]],

        [[1.1507e-04]],

        [[2.5428e-04]],

        [[2.9876e-04]],

        [[3.1614e-04]],

        [[1.5914e-04]],

        [[1.0949e-04]],

        [[3.3419e-04]],

        [[2.8547e-04]],

