# Outcome Prediction with RGCN Model

## 1. Load data from "processed_data" folder

In [2]:
import time
import joblib
import pandas as pd

import torch
import torch.nn.functional as F

from torch_geometric.data import Data
from torch_geometric.nn import RGCNConv

In [3]:
entity = pd.read_csv('processed_data/sphn_entities_noOutcome.tsv', sep='\t', index_col=0, header=None)
entity = entity.to_dict()[1]
num_patients = 10000
indices = []
for i in range(num_patients):
    idx = f'<http://nvasc.org/synth_patient_{i}>'
    indices.append(entity[idx])
# events = pd.read_csv('../data/SPHN_events_noOutcome.tsv', sep='\t', header=None)
events = pd.read_csv('processed_data/sphn_events_noOutcome.tsv', sep='\t', header=None)
y = joblib.load('../Data Generation/outcomes_1000_0.joblib')

edge_index = torch.vstack((torch.Tensor(events[0]).long(),torch.Tensor(events[2]).long()))
edge_type = torch.Tensor(events[1]).long()
train_idx = torch.Tensor(indices[:int(num_patients*0.8)]).long()
train_y = torch.Tensor(y[:int(num_patients*0.8)]).long()
test_idx = torch.Tensor(indices[int(num_patients*0.8):]).long()
test_y = torch.Tensor(y[int(num_patients*0.8):]).long()
num_nodes = len(entity)
data = Data(
    edge_index=edge_index,
    edge_type=edge_type,
    train_idx=train_idx,
    train_y=train_y,
    test_idx=test_idx,
    test_y=test_y,
    num_nodes=num_nodes,
)
embedding = torch.nn.Embedding(data.num_nodes, 16)
data.x = embedding.weight
data.num_relations = data.num_edge_types
data.num_classes = 3

## 2. Training the model

In [4]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = RGCNConv(data.num_nodes, 16, data.num_relations,
                          num_bases=10)
        self.conv2 = RGCNConv(16, data.num_classes, data.num_relations,
                          num_bases=10)

    def forward(self, edge_index, edge_type):
        x = F.relu(self.conv1(None, edge_index, edge_type))
        x = self.conv2(x, edge_index, edge_type)
        return F.log_softmax(x, dim=1)


if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005)

In [5]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.edge_index, data.edge_type)
    loss = F.nll_loss(out[data.train_idx], data.train_y)
    loss.backward()
    optimizer.step()
    return float(loss)


@torch.no_grad()
def test():
    model.eval()
    pred = model(data.edge_index, data.edge_type).argmax(dim=-1)
    train_acc = float((pred[data.train_idx] == data.train_y).float().mean())
    test_acc = float((pred[data.test_idx] == data.test_y).float().mean())
    return train_acc, test_acc

In [6]:
times = []
for epoch in range(1, 51):
    start = time.time()
    loss = train()
    train_acc, test_acc = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f} '
          f'Test: {test_acc:.4f}')
    times.append(time.time() - start)
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")

Epoch: 01, Loss: 1.0990, Train: 0.5459 Test: 0.4445
Epoch: 02, Loss: 1.0716, Train: 0.6028 Test: 0.4440
Epoch: 03, Loss: 1.0479, Train: 0.4781 Test: 0.4440
Epoch: 04, Loss: 1.0246, Train: 0.5688 Test: 0.4440
Epoch: 05, Loss: 0.9950, Train: 0.7073 Test: 0.4440
Epoch: 06, Loss: 0.9614, Train: 0.8628 Test: 0.4440
Epoch: 07, Loss: 0.9224, Train: 0.8753 Test: 0.4440
Epoch: 08, Loss: 0.8828, Train: 0.8753 Test: 0.4440
Epoch: 09, Loss: 0.8451, Train: 0.8753 Test: 0.4440
Epoch: 10, Loss: 0.8114, Train: 0.8753 Test: 0.4440
Epoch: 11, Loss: 0.7818, Train: 0.8753 Test: 0.4440
Epoch: 12, Loss: 0.7552, Train: 0.8753 Test: 0.4440
Epoch: 13, Loss: 0.7288, Train: 0.8753 Test: 0.4440
Epoch: 14, Loss: 0.7026, Train: 0.8753 Test: 0.4440
Epoch: 15, Loss: 0.6775, Train: 0.8753 Test: 0.4440
Epoch: 16, Loss: 0.6531, Train: 0.8753 Test: 0.4440
Epoch: 17, Loss: 0.6311, Train: 0.8753 Test: 0.4285
Epoch: 18, Loss: 0.6123, Train: 0.8753 Test: 0.4285
Epoch: 19, Loss: 0.5962, Train: 0.8753 Test: 0.4285
Epoch: 20, L