In [1]:
import pandas as pd
import torch.nn as nn
import torch.utils
import torch.distributions
import numpy as np
import torch.utils.data as data_utils
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Import data
data_load_exp = pd.read_csv('data/VAE_transformed_50epochs_fixed.tsv', sep = '\t', index_col=[0])
data_load_iso = pd.read_csv('data/isoform_data_reduced.tsv', sep = '\t', index_col=[0])

In [3]:
## Transform data
x_values = torch.from_numpy(np.float32(data_load_exp.values)) 

# log-transform isoform data 
y = np.float32(data_load_iso.values)
y = np.log10(y +1)
y_values = torch.from_numpy(y)

# create training and testing datasets
data = data_utils.TensorDataset(x_values , y_values)
train_dataset, test_dataset = torch.utils.data.random_split(data, [int(len(x_values)*.8), int(len(x_values)*.2)])

# dataloader
train_loader = data_utils.DataLoader(train_dataset, batch_size = 5000, shuffle=True)
test_loader = data_utils.DataLoader(test_dataset, batch_size = 5000, shuffle=True)

In [4]:
input_layer_size = x_values.shape[1] # input size
hidden_layer_size = input_layer_size * 10 # size of hidden layer

num_output = y_values.shape[1] # size of output features

# define network
class Net(nn.Module):

    def __init__(self,  input_layer_size, hidden_layer_size, num_output):
        super(Net, self).__init__()  
        # input layer
        self.inputl=nn.Linear(input_layer_size, hidden_layer_size)
        # output layer
        self.outputl=nn.Linear(hidden_layer_size, num_output)
        # activation
        self.activation = torch.nn.ReLU()

    def forward(self, x):

        out = self.inputl(x)
        out = self.activation(out)
        out = self.outputl(out)

        return out

net = Net( input_layer_size, hidden_layer_size, num_output).to(device)

In [5]:
# set parameters for running FNN
network = net
data = train_loader
data_eval = test_loader
epochs = 2000 

# define optimizer
opt = torch.optim.SGD(network.parameters(), lr=0.001, momentum = 0.05,weight_decay=0.0001)

loss=[]
lossfunc=nn.MSELoss()
train_acc , valid_acc = [] , []

# Start FNN 
for epoch in range(epochs):

    k = 0
    cur_loss = 0
    net.train()
    for x,y in data:
        x = x.to(device)
        y = y.to(device) # GPU

        opt.zero_grad()
        out = network(x)
        batch_loss = lossfunc(out,y)
        batch_loss.backward()
        opt.step()

        k += 1
        cur_loss += batch_loss.detach().item()

    loss.append(cur_loss / k)

    net.eval()
    ### Evaluate training
    train_acc_cur = 0
    k = 0

    for x, y in data:
        x = x.to(device)
        y = y.to(device)

        out = network(x)
        batch_acc = lossfunc(out,y)

        k += 1
        train_acc_cur += batch_acc.detach().item()

    train_acc.append(train_acc_cur / k)

    ### Evaluate validation
    valid_acc_cur = 0
    k = 0
    predicted_validate = []
    actual_validate = []

    for x, y in data_eval:
        x = x.to(device)
        y = y.to(device)

        out = network(x)
        batch_acc = lossfunc(out,y)

        valid_acc_cur += batch_acc.detach().item()
        k += 1
        predicted_validate.append(out.cpu().detach().numpy())
        actual_validate.append(y.cpu().detach().numpy())

    valid_acc.append(valid_acc_cur / k)

    if epoch % 10 == 0:
        print("Epoch %2i : Train Loss %f , Train acc %f, Valid acc %f" % (
            epoch, loss[-1], train_acc[-1], valid_acc[-1]))

Epoch  0 : Train Loss 3.000348 , Train acc 1.333586, Valid acc 1.329988
Epoch 10 : Train Loss 0.163823 , Train acc 0.161878, Valid acc 0.165285
Epoch 20 : Train Loss 0.146762 , Train acc 0.145565, Valid acc 0.148343
Epoch 30 : Train Loss 0.132668 , Train acc 0.131076, Valid acc 0.134318
Epoch 40 : Train Loss 0.121115 , Train acc 0.119485, Valid acc 0.122573
Epoch 50 : Train Loss 0.111864 , Train acc 0.110627, Valid acc 0.112724
Epoch 60 : Train Loss 0.102346 , Train acc 0.102693, Valid acc 0.104402
Epoch 70 : Train Loss 0.095967 , Train acc 0.095989, Valid acc 0.097296
Epoch 80 : Train Loss 0.090039 , Train acc 0.089485, Valid acc 0.091233
Epoch 90 : Train Loss 0.084928 , Train acc 0.084534, Valid acc 0.085995
Epoch 100 : Train Loss 0.079838 , Train acc 0.080086, Valid acc 0.081465
Epoch 110 : Train Loss 0.076661 , Train acc 0.076369, Valid acc 0.077504
Epoch 120 : Train Loss 0.073049 , Train acc 0.072669, Valid acc 0.074048
Epoch 130 : Train Loss 0.069819 , Train acc 0.069765, Valid a

In [6]:
predicted_validate = predicted_validate[0].ravel()
actual_validate = actual_validate[0].ravel()

In [37]:
np.savetxt('output/predicted_data.tsv', predicted_validate)
np.savetxt('output/actual_data.tsv', actual_validate)