In [1]:
from pgmpy.readwrite import BIFReader
from pathlib import Path
from src.utils import adj_df_from_BIF, get_train_test_splits, encode_data, get_terminal_connection_nodes
from src.data import BNDataset, reconstruct_adj_mats
from src.models.BNNet import BNNet
from src.train import train
from src.constants import HEPAR_TARGET

import pandas as pd
from scipy.stats import bernoulli

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import yaml

from torch_geometric.nn import GCNConv, FAConv, GATv2Conv
from torch_geometric.utils import dense_to_sparse, to_torch_coo_tensor

In [2]:
fpath_bif = Path("/home/gaurang/bayesian_network/data/hepar/hepar2.bif")
fpath_data = Path("/home/gaurang/bayesian_network/data/hepar/HEPARTWO10k.csv")
fpath_config = Path("/home/gaurang/bayesian_network/code/src/config.yaml")

with open(fpath_config, 'r') as f:
    config = yaml.load(f, Loader=yaml.Loader)
config

{'embedding_dim': 16,
 'gnn_hidden_dim': 64,
 'gnn_out_dim': 16,
 'fc1_out_dim': 16,
 'gat_heads': 4,
 'batch_size_train': 64,
 'batch_size_val': 64,
 'batch_size_test': 64,
 'num_epochs': 50,
 'patience': 10,
 'min_delta': 1e-05,
 'lr': 0.01}

In [3]:
df_data = pd.read_csv(fpath_data, dtype=str)
bn = BIFReader(fpath_bif)
df_data, encoder = encode_data(df_data, bn)
df_train, df_valid, df_test = get_train_test_splits(df_data, 123, False)

In [4]:
# create datasets
perturbation_factor = 0.0
adj_df = adj_df_from_BIF(bn, HEPAR_TARGET, perturbation_factor)

train_set = BNDataset(df_data=df_train, target_node=HEPAR_TARGET, bn=bn, adj_df=adj_df, perturbation_factor=perturbation_factor)
val_set = BNDataset(df_data=df_valid, target_node=HEPAR_TARGET, bn=bn, adj_df=adj_df, perturbation_factor=perturbation_factor)
test_set = BNDataset(df_data=df_test, target_node=HEPAR_TARGET, bn=bn, adj_df=adj_df, perturbation_factor=perturbation_factor)

In [5]:
adj_df['pain']

alcoholism     0.0
vh_amn         0.0
hepatotoxic    0.0
THepatitis     0.0
hospital       0.0
              ... 
hcv_anti       0.0
palms          0.0
hbeag          0.0
carcinoma      0.0
pain           0.0
Name: pain, Length: 70, dtype: float64

In [6]:
get_terminal_connection_nodes(adj_df, target=HEPAR_TARGET)

(['PBC', 'joints'], [13, 33])

In [7]:
dataloader_train = DataLoader(train_set, batch_size=config["batch_size_train"])
dataloader_valid = DataLoader(val_set, batch_size=config["batch_size_val"])
dataloader_test = DataLoader(test_set, batch_size=config["batch_size_test"])

In [8]:
it = iter(dataloader_train)
batch = next(it)
X, y = batch

In [9]:
model = BNNet(
        config=config,
        num_nodes= len(train_set.input_nodes),
        node_states=train_set.input_states,
        edge_index=train_set.edge_index,
        terminal_node_ids=train_set.terminal_node_ids,
        target_node_states=train_set.target_states,
        )

In [10]:
len(train_set.input_nodes)

69

In [11]:
model(X).shape

torch.Size([64, 2])

In [12]:
with torch.no_grad():
    model.inference = True
    model.gnn.inference = True
    model(X)
    model(X)

In [13]:
model.gnn.edge_weights.shape

torch.Size([128, 121])

In [14]:
adj_mats = reconstruct_adj_mats(input_edge_weights=model.gnn.edge_weights, 
                              terminal_edge_weights=model.terminal_edge_weights,
                              input_edge_index=train_set.edge_index,
                              terminal_node_ids=model.terminal_node_ids,
                              node_list=adj_df.columns)

In [23]:
adj_mats.numpy().max()

0.8246825

In [18]:
adj_mat = (torch.mean(adj_mats, dim=0) > 0.4).float().numpy()

In [19]:
adj_mat.sum()

27.0

In [21]:
torch.mean(adj_mats, dim=0).shape

torch.Size([70, 70])

In [17]:
adj_mat = adj_mats[0].unsqueeze(0)
adj_mat.shape

torch.Size([1, 70, 70])

In [19]:
adj_mat.repeat(128, 1, 1).shape

torch.Size([128, 70, 70])

In [27]:
from torch_geometric.utils import to_dense_adj

dense_adj = to_dense_adj(edge_index=train_set.edge_index, edge_attr=model.gnn.edge_weights[0:2])

RuntimeError: The expanded size of the tensor (2) must match the existing size (121) at non-singleton dimension 0.  Target sizes: [2, 121].  Tensor sizes: [121, 1]

In [26]:
dense_adj.shape

torch.Size([1, 69, 69])

In [47]:
(edge_weights > 0.1).sum()

tensor(117)

In [23]:
torch.as_tensor([[1,2,3], [3,4,3]]).shape

torch.Size([2, 3])

In [27]:
torch.equal(train_set.edge_index, model.gnn.batch_edge_index[:, 121:(121+121)])

False

In [24]:
def get_edge_index(edge_index):
    num_nodes = 69*2
    return edge_index - num_nodes

torch.equal(train_set.edge_index, get_edge_index(model.gnn.batch_edge_index[:, 242:363]))

True

In [64]:
model.MLP[1].weight.sum(dim=1)

tensor([ 0.4880, -0.3466], grad_fn=<SumBackward1>)

In [65]:
from torch.nn import MultiheadAttention

In [72]:
attn = MultiheadAttention(32, 4, dropout=True, batch_first=True)

x = torch.rand(10, 3, 32)

out, weights = attn(x, x, x)

weights.shape

torch.Size([10, 3, 3])

In [83]:
out.shape

torch.Size([10, 3, 32])

In [84]:
torch.bmm(weights, out).shape

torch.Size([10, 3, 32])

In [77]:
attn_weights_diag = torch.diagonal(weights, dim1=-2, dim2=-1)
attn_weights_scalar = attn_weights_diag.sum(dim=-1, keepdim=True)

In [81]:
torch.cat((attn_weights_diag, attn_weights_diag)).shape

torch.Size([20, 3])

In [79]:
attn_weights_diag.shape

torch.Size([10, 3])

In [30]:
gnn_input = []

for i, node_embedding_layer in enumerate(model.node_embedding_layers):
    gnn_input.append(node_embedding_layer(X[:, i]))

x = torch.stack(gnn_input, dim=1)
x = x[:4, :, :]
x.shape

torch.Size([4, 69, 64])

In [35]:
x.shape
edge_index = model.gnn.edge_index
edge_index.shape

torch.Size([2, 121])

In [33]:
batch_edge_index = edge_index
for i in range(1, x.shape[0]):
    batch_edge_index = torch.cat((batch_edge_index, edge_index + i*model.num_nodes), 1)

In [38]:
print(x.shape)
print(batch_edge_index.shape)

torch.Size([4, 69, 64])
torch.Size([2, 484])


In [19]:
gnn = FAConv(channels=-1)


In [41]:
gnn(x.view(x.shape[0]*x.shape[1], -1), x.view(x.shape[0]*x.shape[1], -1), batch_edge_index).view(-1, model.num_nodes, 64).shape

torch.Size([4, 69, 64])

In [None]:
model(train)

In [11]:
num_embeddings_list = [len(state) for state in dataset.input_states]
node_embedding_layers = [
    nn.Embedding(num_emdeddings, 7)
    for num_emdeddings in num_embeddings_list
]

In [13]:
gnn_input = []

for i, node_embedding_layer in enumerate(node_embedding_layers):
    gnn_input.append(node_embedding_layer(X[:, i]))

len(gnn_input)


36

In [30]:
gnn_input = torch.stack(gnn_input, dim=1)
gnn_input.shape

torch.Size([4, 36, 7])

In [24]:
gnn = GCNConv(7, 10)

In [33]:
x = gnn(gnn_input, edge_index)
x.shape

torch.Size([4, 36, 10])

In [37]:
x = x.view(4, -1)
x.shape

torch.Size([4, 360])

In [48]:
pmf = bernoulli(0.0)
pmf.rvs(size=1)[0]

0

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device == torch.device("cuda")

True

In [3]:
from sklearn.metrics import accuracy_score

fpath_inference = Path("/home/gaurang/bayesian_network/experiments/alarm/20230213_163627_training_record/inference.csv")
df_inference = pd.read_csv(fpath_inference)

y = df_inference['HRSAT']
pred = df_inference['predicted_values']

accuracy_score(y, pred)

0.955