In [2]:
from pgmpy.readwrite import BIFReader
from pathlib import Path
from src.utils import adj_df_from_BIF, get_train_test_splits, encode_data, get_terminal_connection_nodes
from src.data import BNDataset
from src.models.BNNet import BNNet
from src.train import train
from src.constants import HEPAR_TARGET

import pandas as pd
from scipy.stats import bernoulli

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import yaml

from torch_geometric.nn import GCNConv
from torch_geometric.utils import dense_to_sparse, to_torch_coo_tensor

In [3]:
fpath_bif = Path("/home/gaurang/bayesian_network/data/hepar/hepar2.bif")
fpath_data = Path("/home/gaurang/bayesian_network/data/hepar/HEPARTWO10k.csv")
fpath_config = Path("/home/gaurang/bayesian_network/code/src/config.yaml")

with open(fpath_config, 'r') as f:
    config = yaml.load(f, Loader=yaml.Loader)
config

{'embedding_dim': 64,
 'gnn_hidden_dim': 64,
 'gnn_out_dim': 3,
 'fc1_out_dim': 16,
 'batch_size_train': 64,
 'batch_size_val': 64,
 'batch_size_test': 64,
 'num_epochs': 50,
 'patience': 10,
 'min_delta': 1e-05,
 'lr': 0.01}

In [4]:
bn = BIFReader(fpath_bif)
adj_df = adj_df_from_BIF(bn)
adj_df_perturb = adj_df_from_BIF(bn, 0.1)

In [5]:
get_terminal_connection_nodes(adj_df, target=HEPAR_TARGET)

(['PBC', 'joints'], [13, 33])

In [6]:
df_data = pd.read_csv(fpath_data, dtype=str)
df_data, encoder = encode_data(df_data, bn)
df_train, df_valid, df_test = get_train_test_splits(df_data, 123, False)
reader = BIFReader(fpath_bif)

In [7]:
# create datasets
perturbation_factor = 0.5
adj_df = adj_df_from_BIF(reader, perturbation_factor)

train_set = BNDataset(df_data=df_train, target_node=HEPAR_TARGET, bn=reader, adj_df=adj_df, perturbation_factor=perturbation_factor)
val_set = BNDataset(df_data=df_valid, target_node=HEPAR_TARGET, bn=reader, adj_df=adj_df, perturbation_factor=perturbation_factor)
test_set = BNDataset(df_data=df_test, target_node=HEPAR_TARGET, bn=reader, adj_df=adj_df, perturbation_factor=perturbation_factor)

In [8]:
dataloader_train = DataLoader(train_set, batch_size=config["batch_size_train"])
dataloader_valid = DataLoader(val_set, batch_size=config["batch_size_val"])
dataloader_test = DataLoader(test_set, batch_size=config["batch_size_test"])

In [9]:
it = iter(dataloader_train)
batch = next(it)
X, y = batch

In [10]:
model = BNNet(
        config=config,
        num_nodes= len(train_set.input_nodes),
        node_states=train_set.input_states,
        edge_index=train_set.edge_index,
        terminal_node_ids=train_set.terminal_node_ids,
        target_node_states=train_set.target_states
        )

In [11]:
model

BNNet(
  (node_embedding_layers): ModuleList(
    (0): Embedding(2, 64)
    (1): Embedding(2, 64)
    (2): Embedding(2, 64)
    (3): Embedding(2, 64)
    (4): Embedding(2, 64)
    (5): Embedding(2, 64)
    (6): Embedding(2, 64)
    (7): Embedding(2, 64)
    (8): Embedding(2, 64)
    (9): Embedding(2, 64)
    (10): Embedding(3, 64)
    (11): Embedding(2, 64)
    (12): Embedding(4, 64)
    (13): Embedding(2, 64)
    (14): Embedding(2, 64)
    (15): Embedding(2, 64)
    (16): Embedding(2, 64)
    (17): Embedding(2, 64)
    (18): Embedding(3, 64)
    (19): Embedding(2, 64)
    (20): Embedding(3, 64)
    (21): Embedding(2, 64)
    (22): Embedding(2, 64)
    (23): Embedding(4, 64)
    (24): Embedding(2, 64)
    (25): Embedding(2, 64)
    (26): Embedding(2, 64)
    (27): Embedding(2, 64)
    (28): Embedding(2, 64)
    (29): Embedding(3, 64)
    (30): Embedding(2, 64)
    (31): Embedding(2, 64)
    (32): Embedding(2, 64)
    (33): Embedding(2, 64)
    (34): Embedding(2, 64)
    (35): Embedding

In [21]:
t = torch.rand(4, 10, 3)
torch.mean(t, 1).shape

torch.Size([4, 3])

In [12]:
model(X)

tensor([[ 1.5105e-02,  2.7054e-01],
        [ 8.8343e-03,  2.6897e-01],
        [-6.1352e-03,  2.8246e-01],
        [-2.1062e-02,  2.7114e-01],
        [-1.8376e-02,  2.7396e-01],
        [-1.1731e-02,  2.7468e-01],
        [-2.3099e-02,  2.7158e-01],
        [ 2.4742e-02,  2.5692e-01],
        [-1.0178e-03,  2.7386e-01],
        [ 2.1685e-02,  2.6322e-01],
        [ 1.7473e-02,  2.4702e-01],
        [-6.9723e-03,  2.6868e-01],
        [-4.5472e-03,  2.7752e-01],
        [ 8.0732e-03,  2.6227e-01],
        [ 2.3318e-02,  2.4276e-01],
        [-2.0328e-02,  2.7826e-01],
        [ 5.4301e-03,  2.5697e-01],
        [ 1.6076e-02,  2.7205e-01],
        [ 2.0962e-02,  2.3470e-01],
        [-1.4158e-02,  2.7347e-01],
        [ 2.3200e-02,  2.6288e-01],
        [-3.6817e-04,  2.7776e-01],
        [ 1.7817e-02,  2.7586e-01],
        [ 1.1436e-02,  2.5865e-01],
        [ 1.8086e-02,  2.6617e-01],
        [ 1.2256e-02,  2.7670e-01],
        [ 2.4006e-02,  2.7639e-01],
        [-5.4381e-04,  2.666

In [None]:
model(train)

In [11]:
num_embeddings_list = [len(state) for state in dataset.input_states]
node_embedding_layers = [
    nn.Embedding(num_emdeddings, 7)
    for num_emdeddings in num_embeddings_list
]

In [13]:
gnn_input = []

for i, node_embedding_layer in enumerate(node_embedding_layers):
    gnn_input.append(node_embedding_layer(X[:, i]))

len(gnn_input)


36

In [30]:
gnn_input = torch.stack(gnn_input, dim=1)
gnn_input.shape

torch.Size([4, 36, 7])

In [24]:
gnn = GCNConv(7, 10)

In [33]:
x = gnn(gnn_input, edge_index)
x.shape

torch.Size([4, 36, 10])

In [37]:
x = x.view(4, -1)
x.shape

torch.Size([4, 360])

In [48]:
pmf = bernoulli(0.0)
pmf.rvs(size=1)[0]

0

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device == torch.device("cuda")

True

In [3]:
from sklearn.metrics import accuracy_score

fpath_inference = Path("/home/gaurang/bayesian_network/experiments/alarm/20230213_163627_training_record/inference.csv")
df_inference = pd.read_csv(fpath_inference)

y = df_inference['HRSAT']
pred = df_inference['predicted_values']

accuracy_score(y, pred)

0.955