In [78]:
import os 
import pandas as pd 
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from Code_manual import Coxnnet, PartialNLL
from lifelines.utils import concordance_index

In [79]:
os.getcwd()

'd:\\Survival Analysis\\Cox-nnet'

Data Loading

In [80]:
expr_data = pd.read_table("./KIRC_expr.tsv", header=None)
print(expr_data.shape)   # output : (540,19962)

time_data = pd.read_table("./KIRC_time.tsv", header=None)
#time_data.shape # output : (540,1)

observed_data = pd.read_table("./KIRC_event.tsv", header=None)
#observed_data.shape # output : (540,1)

(540, 19962)


In [81]:
map_dict = {'DECEASED' : 1,
            'LIVING' : 0}
observed_data = observed_data[0].apply(lambda x : map_dict[x])

In [82]:
X = torch.tensor(np.array(expr_data), dtype=torch.float)
time = torch.tensor(time_data.to_numpy(), dtype=torch.long)
observed = torch.tensor(observed_data.to_numpy(), dtype=torch.float)
train_idx, test_idx, _, _ = train_test_split(np.arange(X.shape[0]), observed, test_size=0.3, random_state=42)

train_X = X[train_idx,:]
test_X = X[test_idx,:]

train_time = time[train_idx]
test_time = time[test_idx]

train_observed = observed[train_idx]
test_observed = observed[test_idx]

Training

In [83]:
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

class TrainDataset(Dataset):
    def __init__(self):
        self.x_data = train_X
        self.time_data = train_time
        self.observed_data = train_observed
    
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, index):
        x = torch.tensor(self.x_data[index]).clone().detach()
        time = torch.tensor(self.time_data[index]).clone().detach()
        observed = torch.tensor(self.observed_data[index]).clone().detach()
        return x, time, observed
    
class TestDataset(Dataset):
    def __init__(self):
        self.x_data = test_X
        self.time_data = test_time
        self.observed_data = test_observed
    
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, index):
        x = torch.tensor(self.x_data[index]).clone().detach()
        time = torch.tensor(self.time_data[index]).clone().detach()
        observed = torch.tensor(self.observed_data[index]).clone().detach()
        return x, time, observed
    
training_data = TrainDataset()
test_data = TestDataset()

train_dataloader = DataLoader(training_data, batch_size = training_data.__len__())
test_dataloader = DataLoader(test_data, batch_size = test_data.__len__())

In [84]:
hidden_dim = 500
model = Coxnnet(train_X.shape[1], hidden_dim)

learning_rate = 3e-5
epochs = 100

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = PartialNLL()

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, time, observed) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, time, observed)

        # Backpropagation
        optimizer.zero_grad()   # bc/ gradients by default add up; to prevent double counting.
        loss.backward() # deposits gradients
        optimizer.step()    # adjust parameters with gradients

# iterate over test dataset to check model performance
def test_loop(dataloader, model):
    size = len(dataloader.dataset)
    test_perf, perf_sum = 0, 0

    with torch.no_grad():
        for X, time, observed in dataloader:
            pred = model.forward(X)
            # print(f"event_times shape: {time.numpy().shape}")
            # print(f"predicted shape: {pred.numpy().shape}\n")
            test_perf = concordance_index(event_times = time.numpy(),
                                          event_observed = observed.numpy(),
                                          predicted_scores = -pred.numpy())

    print(f"Accuracy: {test_perf}\n")

In [85]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model)

Epoch 1
-------------------------------
Accuracy: 0.5528521774688203

Epoch 2
-------------------------------
Accuracy: 0.5737068084236352

Epoch 3
-------------------------------
Accuracy: 0.5651196074422409

Epoch 4
-------------------------------
Accuracy: 0.5657329789409119

Epoch 5
-------------------------------
Accuracy: 0.5737068084236352

Epoch 6
-------------------------------
Accuracy: 0.575751380085872

Epoch 7
-------------------------------
Accuracy: 0.5759558372520956

Epoch 8
-------------------------------
Accuracy: 0.5802494377427929

Epoch 9
-------------------------------
Accuracy: 0.5857697812308321

Epoch 10
-------------------------------
Accuracy: 0.5912901247188714

Epoch 11
-------------------------------
Accuracy: 0.5910856675526477

Epoch 12
-------------------------------
Accuracy: 0.5961970967082396

Epoch 13
-------------------------------
Accuracy: 0.6013085258638315

Epoch 14
-------------------------------
Accuracy: 0.6043753833571867

Epoch 15
-------

In [11]:
import warnings
warnings.filterwarnings('ignore')