In [1]:
import os 
import pandas as pd 
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from Code_manual import Coxnnet, PartialNLL

In [2]:
os.getcwd()

'd:\\Survival Analysis\\Cox-nnet'

Data Loading

In [15]:
expr_data = pd.read_table("./KIRC_expr.tsv", header=None)
print(expr_data.shape)   # output : (540,19962)

time_data = pd.read_table("./KIRC_time.tsv", header=None)
#time_data.shape # output : (540,1)

observed_data = pd.read_table("./KIRC_event.tsv", header=None)
#observed_data.shape # output : (540,1)

(540, 19962)


In [23]:
map_dict = {'DECEASED' : 1,
            'LIVING' : 0}
observed_data = observed_data[0].apply(lambda x : map_dict[x])

In [24]:
X = torch.tensor(np.array(expr_data), dtype=torch.float)
time = torch.tensor(time_data.to_numpy(), dtype=torch.long)
observed = torch.tensor(observed_data.to_numpy(), dtype=torch.float)
train_idx, test_idx, _, _ = train_test_split(np.arange(X.shape[0]), observed, test_size=0.3, random_state=42)

train_X = X[train_idx,:]
test_X = X[test_idx,:]

train_time = time[train_idx]
test_time = time[test_idx]

train_observed = observed[train_idx]
test_observed = observed[test_idx]

Training

In [26]:
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

class TrainDataset(Dataset):
    def __init__(self):
        self.x_data = train_X
        self.time_data = train_time
        self.observed_data = train_observed
    
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, index):
        x = torch.tensor(self.x_data[index]).clone().detach()
        time = torch.tensor(self.time_data[index]).clone().detach()
        observed = torch.tensor(self.observed_data[index]).clone().detach()
        return x, time, observed
    
class TestDataset(Dataset):
    def __init__(self):
        self.x_data = test_X
        self.time_data = test_time
        self.observed_data = test_observed
    
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, index):
        x = torch.tensor(self.x_data[index]).clone().detach()
        time = torch.tensor(self.time_data[index]).clone().detach()
        observed = torch.tensor(self.observed_data[index]).clone().detach()
        return x, time, observed
    
training_data = TrainDataset()
test_data = TestDataset()

train_dataloader = DataLoader(training_data, batch_size = 10)
test_dataloader = DataLoader(test_data, batch_size = 10)

In [27]:
hidden_dim = 143
model = Coxnnet(train_X.shape[1], hidden_dim)

learning_rate = 3e-5
batch_size = 10
epochs = 20

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = PartialNLL()

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, time, observed) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, time, observed)

        # Backpropagation
        optimizer.zero_grad()   # bc/ gradients by default add up; to prevent double counting.
        loss.backward() # deposits gradients
        optimizer.step()    # adjust parameters with gradients

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# iterate over test dataset to check model performance
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, time, observed in dataloader:
            pred = model(X)
    #         test_loss += loss_fn(pred, time, observed).item()
    #         correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    # test_loss /= num_batches
    # correct /= size
    # print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [28]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    #test_loop(test_dataloader, model, loss_fn)

Epoch 1
-------------------------------
loss: 2.141417  [    0/  378]
tensor([-1.5419], grad_fn=<UnbindBackward0>) tensor([0.2140], grad_fn=<UnbindBackward0>)
tensor([-0.7205], grad_fn=<UnbindBackward0>) tensor([0.4865], grad_fn=<UnbindBackward0>)
tensor([-0.9988], grad_fn=<UnbindBackward0>) tensor([0.3683], grad_fn=<UnbindBackward0>)
tensor([-0.7590], grad_fn=<UnbindBackward0>) tensor([0.4681], grad_fn=<UnbindBackward0>)
tensor([-1.2904], grad_fn=<UnbindBackward0>) tensor([0.2752], grad_fn=<UnbindBackward0>)
tensor([-0.7449], grad_fn=<UnbindBackward0>) tensor([0.4748], grad_fn=<UnbindBackward0>)
tensor([-0.6776], grad_fn=<UnbindBackward0>) tensor([0.5078], grad_fn=<UnbindBackward0>)
tensor([-1.0179], grad_fn=<UnbindBackward0>) tensor([0.3614], grad_fn=<UnbindBackward0>)
tensor([-1.5363], grad_fn=<UnbindBackward0>) tensor([0.2152], grad_fn=<UnbindBackward0>)
tensor([-0.7996], grad_fn=<UnbindBackward0>) tensor([0.4495], grad_fn=<UnbindBackward0>)
tensor([nan], grad_fn=<UnbindBackward0>)

KeyboardInterrupt: 

In [11]:
import warnings
warnings.filterwarnings('ignore')