In [2]:
import argparse
import numpy as np
from itertools import count

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

In [3]:
import pandas as pd
import numpy as np

def getSamples(filename):
    data = pd.read_csv(filename, sep='\t')
    return data.values[:,1:].transpose()

data1 = getSamples("GSE2034-Normal-train.txt")
data2 = getSamples("GSE2034-Tumor-train.txt")

data1Label = np.zeros(len(data1)).reshape((-1, 1))
data2Label = np.ones(len(data2)).reshape((-1, 1))
x = np.concatenate((data1, data2))
y = np.concatenate((data1Label, data2Label))

In [4]:
dim = 12634
class Res1d(nn.Module):
    # the conv layers
    def __init__(self, inSize, outSize, kernel=(3,), strides=1,):
        super(Res1d, self).__init__()
        # hard-coded to do the padding correctly
        if inSize in (16,64,128,512) and strides is 2:
            pding = 0
        else:
            pding = 1
        self.l1 = nn.Conv1d(inSize, outSize, kernel, stride=strides, padding=pding, bias=False)
        self.l2 = nn.InstanceNorm1d(outSize)
        
        if strides > 1 or inSize != outSize:
            if strides > 1:
                self.r1 = nn.Identity()
                self.r2 = nn.AvgPool1d(strides)
            else:
                self.r1 = None
                self.r2 = None
            self.r3 = nn.Conv1d(inSize, outSize, 1, bias=False)
            self.r4 = nn.InstanceNorm1d(outSize)
    
        self.relu = nn.ReLU()
        
    def forward(self, x):
        l = self.l1(x)
        l = self.l2(l)
        
        if self.r1 is not None:
            r = self.r1(x)
            r = self.r2(r)
            r = self.r3(r)
            r = self.r4(r)
        else:
            r = self.r3(x)
            r = self.r4(r)
            
#         print(self.l1, l.shape, r.shape)
        x = l + r
        return self.relu(x)
    
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(dim, 64)
        self.l2 = nn.ReLU()
        
        self.r1 = Res1d(1, 4, 3)
        
        self.r2 = Res1d(4, 8, 3)
        self.r3 = Res1d(8, 8, 3, strides=2)
        
        self.r4 = Res1d(8, 16, 3)
        self.r5 = Res1d(16, 16, 3, strides=2)
        
        self.r6 = Res1d(16, 32, 3)
        self.r7 = Res1d(32, 32, 3, strides=2)
        
        self.r8 = Res1d(32, 64, 3)
        self.r9 = Res1d(64, 64, 3, strides=2)
        
        self.r10 = Res1d(64, 128, 3)
        self.r11 = Res1d(128, 128, 3, strides=2)
        
        self.r12 = Res1d(128, 256, 3)
        self.r13 = Res1d(256, 256, 3, strides=2)
        
        self.r14 = Res1d(256, 512, 3)
        self.r15 = Res1d(512, 512, 3, strides=2)
        
        self.r16 = Res1d(512, 1024, 3)
        self.r17 = Res1d(1024, 1024, 3, strides=2)
        
        # size is by experiment and hardcode
        self.lastLinear = nn.Linear(50240,32)
        self.lastRelu = nn.ReLU()
        self.lastAgg = nn.Linear(32,1)
        self.lastSigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # shape is (batch, channel, time)
        l = x
        l = self.l2(self.l1(l))
        
        # conv layers should operate on time
        r = x
        r = self.r4(self.r3(self.r2(self.r1(r))))
        r = self.r8(self.r7(self.r6(self.r5(r))))
        r = self.r12(self.r11(self.r10(self.r9(r))))
        r = self.r16(self.r15(self.r14(self.r13(r))))
        r = self.r17(r)
        
        # flatten l
        r = r.view(x.shape[0],1, -1)
        y = torch.cat((l,r),-1)
        y = self.lastLinear(y)
        y = self.lastRelu(y)
        y = self.lastAgg(y)
        y = self.lastSigmoid(y)
        return y

In [5]:
net = Net()


In [6]:
input_test = torch.randn(80, 1, dim).float()
out = net(input_test)
out.shape

torch.Size([80, 1, 1])

# Train the model

In [7]:
# get the data into (batch, channel = 1, length=dim)
data_torch = torch.from_numpy(x).view([-1, 1, dim]).float()
label_torch = torch.from_numpy(y).view([-1,1,1]).float()

In [8]:
import torch.optim as optim
criterion = nn.BCELoss() # Binary Cross Entropy 
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [9]:
for batch in range(100):  # loop over the dataset multiple times
    # get the inputs; data is a list of [inputs, labels]
    indices = np.random.choice(len(x), size=(30))
    inputs = x[indices]
    labels = y[indices]
    
    inputs = torch.from_numpy(inputs).view([-1, 1, dim]).float()
    labels = torch.from_numpy(labels).view([-1, 1]).float()
    
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs).view([-1,1])
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    if batch % 10 == 0:
        print(batch, "out of 1000")

print('Finished Training')

0 out of 1000
10 out of 1000
20 out of 1000
30 out of 1000
40 out of 1000
50 out of 1000
60 out of 1000
70 out of 1000
80 out of 1000
90 out of 1000
Finished Training


In [11]:
# TODO: Run code on CUDA
# MPC learning
net

Net(
  (l1): Linear(in_features=12634, out_features=64, bias=True)
  (l2): ReLU()
  (r1): Res1d(
    (l1): Conv1d(1, 4, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (l2): InstanceNorm1d(4, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (r3): Conv1d(1, 4, kernel_size=(1,), stride=(1,), bias=False)
    (r4): InstanceNorm1d(4, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (relu): ReLU()
  )
  (r2): Res1d(
    (l1): Conv1d(4, 8, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (l2): InstanceNorm1d(8, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (r3): Conv1d(4, 8, kernel_size=(1,), stride=(1,), bias=False)
    (r4): InstanceNorm1d(8, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (relu): ReLU()
  )
  (r3): Res1d(
    (l1): Conv1d(8, 8, kernel_size=(3,), stride=(2,), padding=(1,), bias=False)
    (l2): InstanceNorm1d(8, eps=1e-05, momentum=0.1, affine=False, track_runnin

In [13]:
net(data_torch)

tensor([[[0.0038]],

        [[0.0028]],

        [[0.0015]],

        [[0.0016]],

        [[0.0059]],

        [[0.0053]],

        [[0.0035]],

        [[0.0044]],

        [[0.0045]],

        [[0.0059]],

        [[0.0029]],

        [[0.0030]],

        [[0.0021]],

        [[0.0025]],

        [[0.0029]],

        [[0.0039]],

        [[0.0019]],

        [[0.0020]],

        [[0.0037]],

        [[0.0014]],

        [[0.0022]],

        [[0.0054]],

        [[0.0024]],

        [[0.0031]],

        [[0.0024]],

        [[0.0036]],

        [[0.0026]],

        [[0.0028]],

        [[0.0011]],

        [[0.0054]],

        [[0.0021]],

        [[0.0030]],

        [[0.0057]],

        [[0.0053]],

        [[0.0102]],

        [[0.0044]],

        [[0.0044]],

        [[0.0011]],

        [[0.0044]],

        [[0.0028]],

        [[0.0020]],

        [[0.0030]],

        [[0.0048]],

        [[0.0021]],

        [[0.0026]],

        [[0.0025]],

        [[0.0053]],

        [[0.0