In [1]:
import torch; print(torch.__version__)

1.1.0


In [2]:
import argparse
import numpy as np
from itertools import count

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
from torchvision import transforms
from torchvision import datasets, transforms


import time

In [3]:
transformation = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True, transform=transformation),
        batch_size=7
    )

In [4]:
class Arguments():
    def __init__(self):
        self.batch_size = 14
        self.test_batch_size = 14
        self.epochs = 10
        self.lr = 0.001
        self.seed = 1
        self.log_interval = 1 # Log info at each batch
        self.precision_fractional = 3

args = Arguments()

_ = torch.manual_seed(args.seed)

In [5]:
import syft as sy

hook = sy.TorchHook(torch)

def connect_to_workers(n_workers):
    return [
        sy.VirtualWorker(hook, id=f"worker{i+1}")
        for i in range(n_workers)
    ]
def connect_to_crypto_provider():
    return sy.VirtualWorker(hook, id="crypto_provider")

workers = connect_to_workers(n_workers=2)
crypto_provider = connect_to_crypto_provider()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])








In [6]:
import pandas as pd
import numpy as np

def getSamples(filename):
    data = pd.read_csv(filename, sep='\t')
    return data.values[:,1:].transpose()

dim = 12634
data1 = getSamples("GSE2034-Normal-train.txt")
data2 = getSamples("GSE2034-Tumor-train.txt")

data1Label = np.zeros(len(data1)).reshape((-1, 1))
data2Label = np.ones(len(data2)).reshape((-1, 1))
x = np.concatenate((data1, data2))
y = np.concatenate((data1Label, data2Label))
# https://stackoverflow.com/questions/43229034/randomly-shuffle-data-and-labels-from-different-files-in-the-same-order
# shuffle the data
idx = np.random.permutation(len(x))
x,y = x[idx], y[idx]

z = np.concatenate((x, y), axis = 1)

In [7]:
# We don't use the whole dataset for efficiency purpose, but feel free to increase these numbers
n_train_items = 181
n_test_items = 46

# partition the data into training data and test data
x_train = x[:n_train_items]
y_train = y[:n_train_items]
z_train = z[:n_train_items]

x_test = x[n_train_items:]
y_test = y[n_train_items:]    
z_test = z[n_train_items:]    

In [8]:
y_test.shape

(46, 1)

In [9]:
x_train = x_train.reshape((-1,1,dim))
y_train = y_train.reshape((-1,1))
def get_private_data_loaders(precision_fractional, workers, crypto_provider):
    
    def one_hot_of(index_tensor):#Transforms to one hot tensor
     
        onehot_tensor = torch.zeros(*index_tensor.shape, 2) # 2 Output classes
        onehot_tensor = onehot_tensor.scatter(1, index_tensor.view(-1, 1), 1)
        return onehot_tensor
        
    def secret_share(tensor): #Transforms to fixed precision and secret share a tensor

        return (
            tensor
            .fix_precision(precision_fractional=precision_fractional)
            .share(*workers, crypto_provider=crypto_provider, requires_grad=True)
        )
    
    #transformation = transforms.Compose([
    #    transforms.ToTensor(),
    #    transforms.Normalize((0.1307,), (0.3081,))
    #])
    
    
    
    #train_loader = torch.utils.data.DataLoader(
    #    torch.Tensor(x_train),
    #    batch_size=args.batch_size
    #)

    private_train_loader = [
        (secret_share(torch.Tensor(x_train[i])), secret_share(one_hot_of(torch.LongTensor(y_train[i]))))
        for i in range (n_train_items)
        if i < n_train_items / args.batch_size
    ]
    
    #test_loader = torch.utils.data.DataLoader(
    #    torch.Tensor(z_test),
    #    batch_size=args.test_batch_size
    #)
        
    private_test_loader = [
        (secret_share(torch.Tensor(x_test[i])), secret_share(torch.Tensor(y_test[i])))
        for i in range (n_test_items)
        if i < n_test_items / args.test_batch_size
    ]
    return private_train_loader, private_test_loader
    
    
private_train_loader, private_test_loader = get_private_data_loaders(
    precision_fractional=args.precision_fractional,
    workers=workers,
    crypto_provider=crypto_provider
)

In [10]:

class Res1d(nn.Module):
    # the conv layers
    def __init__(self, inSize, outSize, kernel=(3,), strides=1,):
        super(Res1d, self).__init__()
        # hard-coded to do the padding correctly
        if inSize in (16,64,128,512) and strides is 2:
            pding = 0
        else:
            pding = 1
        self.l1 = nn.Conv1d(inSize, outSize, kernel, stride=strides, padding=pding, bias=False)
        self.l2 = nn.InstanceNorm1d(outSize)
        
        if strides > 1 or inSize != outSize:
            if strides > 1:
                self.r1 = nn.Identity()
                self.r2 = nn.AvgPool1d(strides)
            else:
                self.r1 = None
                self.r2 = None
            self.r3 = nn.Conv1d(inSize, outSize, 1, bias=False)
            self.r4 = nn.InstanceNorm1d(outSize)
    
        self.relu = nn.ReLU()
        
    def forward(self, x):
        l = x
        l = self.l1(l)
        l = self.l2(l)
        
        if self.r1 is not None:
            r = self.r1(x)
            r = self.r2(r)
            r = self.r3(r)
            r = self.r4(r)
        else:
            r = self.r3(x)
            r = self.r4(r)
            
        x = l + r
        return self.relu(x)
    
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(dim, 64)
        self.l2 = nn.ReLU()
        
        self.r1 = Res1d(1, 4, 3)
        
        self.r2 = Res1d(4, 8, 3)
        self.r3 = Res1d(8, 8, 3, strides=2)
        
        self.r4 = Res1d(8, 16, 3)
        self.r5 = Res1d(16, 16, 3, strides=2)
        
        self.r6 = Res1d(16, 32, 3)
        self.r7 = Res1d(32, 32, 3, strides=2)
        
        self.r8 = Res1d(32, 64, 3)
        self.r9 = Res1d(64, 64, 3, strides=2)
        
        self.r10 = Res1d(64, 128, 3)
        self.r11 = Res1d(128, 128, 3, strides=2)
        
        self.r12 = Res1d(128, 256, 3)
        self.r13 = Res1d(256, 256, 3, strides=2)
        
        self.r14 = Res1d(256, 512, 3)
        self.r15 = Res1d(512, 512, 3, strides=2)
        
        self.r16 = Res1d(512, 1024, 3)
        self.r17 = Res1d(1024, 1024, 3, strides=2)
        
        # size is by experiment and hardcode
        self.lastLinear = nn.Linear(50240,32)
        self.lastRelu = nn.ReLU()
        self.lastAgg = nn.Linear(32,1)
        self.lastSigmoid = nn.Sigmoid()
    
    def forward(self, x):           
        # shape is (batch, channel, time)
        l = x
        l = self.l1(l)
        l = self.l2(l)

        # conv layers should operate on time
        r = x
        r = self.r4(self.r3(self.r2(self.r1(r))))
        r = self.r8(self.r7(self.r6(self.r5(r))))
        r = self.r12(self.r11(self.r10(self.r9(r))))
        r = self.r16(self.r15(self.r14(self.r13(r))))
        r = self.r17(r)
        
        # flatten l
        r = r.view(x.shape[0],1, -1)
        y = torch.cat((l,r),-1)
        y = self.lastLinear(y)
        y = self.lastRelu(y)
        y = self.lastAgg(y)
        y = self.lastSigmoid(y)
        return y

In [11]:
def train(args, model, private_train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(private_train_loader): # <-- now it is a private dataset
        start_time = time.time()
        optimizer.zero_grad()
        
        inputs = data.view([-1, 1, dim])
        labels = target.view([-1, 1])
#         inputs = data
#         labels = target
        output = model(inputs)
        
        # loss = F.nll_loss(output, target)  <-- not possible here
        batch_size = output.shape[0]
        loss = ((output - labels)**2).sum().refresh()/batch_size
        
        loss.backward()
        
        optimizer.step()

        if batch_idx % args.log_interval == 0:
            loss = loss.get().float_precision()
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tTime: {:.3f}s'.format(
                epoch, batch_idx * args.batch_size, len(private_train_loader) * args.batch_size,
                100. * batch_idx / len(private_train_loader), loss.item(), time.time() - start_time))

In [12]:
def test(args, model, private_test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in private_test_loader:
            start_time = time.time()
            
            output = model(data)
            pred = output.argmax(dim=1)
            correct += pred.eq(target.view_as(pred)).sum()

    correct = correct.get().float_precision()
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct.item(), len(private_test_loader)* args.test_batch_size,
        100. * correct.item() / (len(private_test_loader) * args.test_batch_size)))

In [13]:
#device = torch.device("cuda")
#net = Net().to(device)


In [14]:
#input_test = torch.randn(80, 1, dim).float().cuda()
#out = net(input_test).cuda()
#out.shape

# Train the model

In [15]:
model = Net()
model = model.fix_precision().share(*workers, crypto_provider=crypto_provider, requires_grad=True)

optimizer = optim.SGD(model.parameters(), lr=args.lr)
optimizer = optimizer.fix_precision() 

for epoch in range(1, args.epochs + 1):
    train(args, model, private_train_loader, optimizer, epoch)
    test(args, model, private_test_loader)

TypeError: relu(): argument 'input' (position 1) must be Tensor, not NoneType

# Encrypted Training Ends Here.

In [30]:
# get the data into (batch, channel = 1, length=dim)
data_torch = torch.from_numpy(x).view([-1, 1, dim]).float()
label_torch = torch.from_numpy(y).view([-1,1,1]).float()

In [31]:
import torch.optim as optim
net=Net()
criterion = nn.BCELoss() # Binary Cross Entropy 
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [33]:
test_inputs = torch.from_numpy(x_test).view([-1, 1, dim]).float()
test_labels = torch.from_numpy(y_test).view([-1, 1]).float()
for batch in range(100):  # loop over the dataset multiple times
    # get the inputs; data is a list of [inputs, labels]
    indices = np.random.choice(len(x_train), size=(30))
    inputs = x_train[indices]
    labels = y_train[indices]
    
    inputs = torch.from_numpy(inputs).float()
    labels = torch.from_numpy(labels).float()
#     inputs = torch.from_numpy(inputs).view([-1, 1, dim]).float()
#     labels = torch.from_numpy(labels).view([-1, 1]).float()
    
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs).view([-1,1])
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    print(batch, "% Trained", "loss = ", loss.item())

print('Finished Training')

0 % Trained loss =  0.7347368001937866
1 % Trained loss =  0.677176296710968
2 % Trained loss =  0.6332848072052002
3 % Trained loss =  0.682956337928772
4 % Trained loss =  0.5419864654541016
5 % Trained loss =  0.6018826365470886
6 % Trained loss =  0.5016396045684814
7 % Trained loss =  0.5087392330169678
8 % Trained loss =  0.47488686442375183
9 % Trained loss =  0.5279948711395264
10 % Trained loss =  0.44033798575401306
11 % Trained loss =  0.4272150695323944
12 % Trained loss =  0.35304442048072815
13 % Trained loss =  0.36744973063468933
14 % Trained loss =  0.3320218622684479
15 % Trained loss =  0.28930553793907166
16 % Trained loss =  0.4309445917606354
17 % Trained loss =  0.43173375725746155
18 % Trained loss =  0.2578074336051941
19 % Trained loss =  0.33714187145233154
20 % Trained loss =  0.2857415974140167
21 % Trained loss =  0.18926239013671875
22 % Trained loss =  0.16352412104606628
23 % Trained loss =  0.3497416377067566
24 % Trained loss =  0.4228338301181793
25 

In [57]:
# MPC learning
net(data_torch)

tensor([[[2.1642e-03]],

        [[9.9946e-01]],

        [[9.9937e-01]],

        [[9.9949e-01]],

        [[9.9772e-01]],

        [[9.9855e-01]],

        [[9.9930e-01]],

        [[9.9884e-01]],

        [[3.6327e-03]],

        [[2.6941e-03]],

        [[2.3771e-03]],

        [[9.9801e-01]],

        [[2.4945e-03]],

        [[9.9907e-01]],

        [[4.9189e-03]],

        [[1.6478e-03]],

        [[9.9898e-01]],

        [[9.9872e-01]],

        [[9.9893e-01]],

        [[4.1863e-03]],

        [[9.9802e-01]],

        [[9.9917e-01]],

        [[9.9854e-01]],

        [[3.4620e-03]],

        [[2.9419e-03]],

        [[3.7487e-03]],

        [[1.1803e-03]],

        [[4.6078e-03]],

        [[9.9912e-01]],

        [[9.9887e-01]],

        [[4.2135e-03]],

        [[9.9907e-01]],

        [[9.9903e-01]],

        [[5.2026e-03]],

        [[4.4798e-03]],

        [[9.9760e-01]],

        [[9.9961e-01]],

        [[9.9619e-01]],

        [[1.4576e-03]],

        [[9.9898e-01]],

