In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils import data

%matplotlib inline

# import data

In [2]:
# a = np.loadtxt('./GSE107011_Processed_data_TPM.txt', skiprows = 1,dtype=str, unpack = True)
# a = np.genfromtxt('./GSE107011_Processed_data_TPM.txt', names = True, dtype = None, unpack = False, encoding='ascii')
# Todo: read the head(sample name) directly as index
a = np.loadtxt('./GSE107011_Processed_data_TPM.txt', dtype=float, skiprows=1, usecols=(range(1,128)), unpack=True)
# data = torch.from_numpy(a)
# data.size()

## Data preparation

In [3]:
class Dataset(data.Dataset):
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
#         'Denotes the total number of samples'
        return len(self.data)
    
    def __getitem__(self, index):
#         'Generates one sample of data'
        return torch.from_numpy(self.data[index])

dataset = Dataset(a)
train_loader = data.DataLoader(dataset=dataset, batch_size=10, shuffle=False)

# Simple Linear Autoencoder

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

Using gpu: True 


## Stacked Autoencoder

In [5]:
def train_model(model,loss_fn,data_loader=None,epochs=1,optimizer=None):
    model.train()
    for epoch in range(epochs):
        i = 0
        for batch_data in train_loader:
            i+=1
            batch_data = batch_data.type('torch.FloatTensor')
            data = batch_data.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, data)
            loss.backward()
            optimizer.step()
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(data), len(data_loader.dataset),
                100. * i / len(data_loader), loss.data.item()))

In [6]:
class DeepAutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(DeepAutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 2048),
#             nn.ReLU(True),
#             nn.Linear(16384, 8192),
#             nn.ReLU(True), 
            nn.Linear(2048,1024),
#             nn.ReLU(True), 
            nn.Linear(1024, encoding_dim), 
#             nn.ReLU(True),
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 1024),
#             nn.ReLU(True),
            nn.Linear(1024,2048),
#             nn.ReLU(True), 
#             nn.Linear(8192,16384),
#             nn.ReLU(True), 
            nn.Linear(2048, input_dim),
        )
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [7]:
input_dim = len(a[1])
encoding_dim = 512

model = DeepAutoEncoder(input_dim, encoding_dim)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model = model.to(device)
optimizer = optim.Adam(model.parameters())
loss_fn = torch.nn.MSELoss()

In [8]:
train_model(model, loss_fn,data_loader=train_loader,epochs=100,optimizer=optimizer)

















In [9]:
test_data0 = torch.from_numpy(a[0])
test_data0 = test_data0.type('torch.FloatTensor')
test_data0 = test_data0.to(device)
encoded_data0 = model.module.encoder(test_data0)

In [10]:
test_data1 = torch.from_numpy(a[1])
test_data1 = test_data1.type('torch.FloatTensor')
test_data1 = test_data1.to(device)
encoded_data1 = model.module.encoder(test_data1)

In [11]:
test_data3 = torch.from_numpy(a[0]+a[1])
test_data3 = test_data3.type('torch.FloatTensor')
test_data3 = test_data3.to(device)
encoded_data3 = model.module.encoder(test_data3)

In [12]:
encoded_data3 - encoded_data0 -encoded_data1

tensor([ 6.3477e-03, -2.8503e-02, -3.5980e-02, -4.2717e-02, -1.0071e-03,
         2.7954e-02, -3.7109e-02, -5.6534e-03, -1.9974e-02, -3.1555e-02,
         4.3396e-02,  4.6177e-02, -2.6779e-02,  7.5684e-03, -1.4038e-02,
        -7.6294e-04, -2.4780e-02,  1.8066e-02, -2.4246e-02, -4.3335e-03,
         2.0473e-02, -2.2644e-02,  4.2419e-03, -2.8778e-02, -1.4475e-02,
        -2.7100e-02, -5.7068e-03, -3.1738e-03, -8.2397e-04, -5.6152e-03,
        -1.7845e-02, -8.6060e-03,  2.5696e-02,  8.9188e-03, -1.7813e-02,
         2.3499e-02, -1.3672e-02, -3.2166e-02, -4.0161e-02, -5.9814e-03,
        -2.6245e-03, -7.0229e-03, -2.8076e-03, -2.7618e-02,  2.6947e-02,
        -8.1177e-03,  3.2013e-02, -9.7046e-03,  4.6326e-02, -5.8594e-03,
         2.5757e-02,  1.1658e-02,  6.9275e-03, -1.8311e-04, -3.2883e-02,
         2.5513e-02,  5.8136e-03,  7.9346e-04,  7.9117e-03,  1.6907e-02,
         1.4038e-02,  8.3618e-03,  4.3945e-03,  3.4790e-03,  9.2163e-03,
        -2.9816e-02, -6.3477e-02,  2.5513e-02, -6.4

In [13]:
test_data0

tensor([ 0.2143,  4.7596,  0.0000,  ...,  0.0000,  1.6853, 53.6174],
       device='cuda:0')

In [14]:
encoded_data0

tensor([-7.0996e+02,  1.3838e+03, -2.7657e+02,  1.7381e+02, -8.0764e+02,
        -4.4667e+02,  1.6538e+03, -3.3959e+02,  3.0095e+01, -1.0127e+03,
        -8.0965e+02, -2.2723e+02, -1.1859e+02,  1.9088e+03,  7.7771e+02,
         1.3478e+00, -9.8462e+02, -4.6163e+02,  3.8760e+02,  3.4913e+02,
        -5.3808e+02,  1.0432e+03, -7.9333e+02, -5.8211e+01, -5.1734e+02,
        -1.0182e+02,  3.0921e+02, -8.5020e+02,  7.4049e+02, -4.1386e+02,
        -4.8993e+02,  6.4466e+02,  7.2970e+02, -4.6636e+02,  1.0676e+02,
         8.7692e+02,  2.7649e+03, -5.6593e+02,  9.3169e+02, -4.4889e+02,
        -1.3800e+02, -2.5234e+02,  1.2548e+03, -9.1330e+02,  7.3846e+02,
        -1.1178e+03, -2.1298e+02,  3.7805e+02,  6.8760e+01,  1.6672e+03,
        -1.3964e+03,  1.7100e+02, -3.6106e+01, -1.1435e+02,  8.4892e+01,
         7.0213e+02, -7.0040e+00, -1.0834e+03,  3.9108e+02,  9.3170e+02,
        -1.6041e+03, -7.5843e+02,  4.2016e+02,  1.1246e+03,  4.6138e+02,
        -2.6019e+02,  3.0228e+03,  2.2010e+02,  1.7