In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
design = pd.read_csv('../data/design_matrix_group_de.tsv', sep="\t")
samples = design['sample'].tolist()
y = np.array(design['group'])-1

In [3]:
edgelist = pd.read_csv('../data/processed/sepsis_edgelist_w_values.csv')
edgelist.fillna(0, inplace=True)
edgelist = edgelist[['parent','child'] + samples]
edgelist.set_index(['parent','child'], inplace=True)

scaled_features = StandardScaler().fit_transform(edgelist.values)
edgelist = pd.DataFrame(scaled_features, index=edgelist.index, columns=edgelist.columns)
edgelist.reset_index(inplace=True)

In [4]:
all_nodes = list(set(edgelist['child'].values.tolist() + edgelist['parent'].values.tolist()))
all_nodes.remove(0)
G = nx.DiGraph()
for node in all_nodes:
    if node in edgelist['child']:
        node_weights = edgelist[edgelist['child'] == node][samples]
    else:
        node_weights = [0]*len(samples)
    G.add_node(node, weight = node_weights)

for row in edgelist.iterrows():
    row = row[1]
    source = row['child']
    target = row['parent']
    if source in G.nodes():
        G.add_edge(source, target)
    
print(G.number_of_nodes())
print(G.number_of_edges())

2219
6960


In [5]:
pd_adj = nx.to_pandas_adjacency(G)
adj= pd_adj.values
edge_index = (adj > 0).nonzero()
row, col = edge_index
coo = np.array(list(zip(row,col))).T


In [6]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader


data_list = []
for i in range(len(samples)):
    sample = samples[i]
    x = []
    for node in G.nodes():
        x.append([G.nodes()[node]['weight'][i]])
    data_i = Data(x=torch.tensor(x, dtype=torch.float), edge_index=torch.tensor(coo, dtype=torch.long), y=torch.tensor(y[i], dtype=torch.long))
    data_list.append(data_i)
    
dataloader = DataLoader(data_list, shuffle=True, batch_size=16, drop_last=True)
dataset = dataloader.dataset
data = dataloader.dataset[0]

print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Number of nodes: 2219
Number of edges: 6960
Average node degree: 3.14
Has isolated nodes: False
Has self-loops: False
Is undirected: False


In [7]:

from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

class GCN(torch.nn.Module):
    def __init__(self,  out_channels, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(-1, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)        
        self.conv4 = GCNConv(hidden_channels, hidden_channels)
        self.conv5 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = x.relu()
        x = self.conv5(x, edge_index)
        x = x.relu()
        x = self.conv5(x, edge_index)
        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]
        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        return x




In [10]:

model = GCN(2, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    for data in dataloader:  # Iterate in batches over the training dataset.
        out = model.forward(data.x, data.edge_index, data.batch) 
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
    return loss
         
def test(loader):
     model.eval()
     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.
         
for epoch in range(1, 200):
    loss = train()
    acc = test(dataloader)
    print(f'Epoch {epoch:>3} | Loss: {loss:.2f} | Acc: {acc*100:.2f}%')