In [1]:
from pgmpy.readwrite import BIFReader
from pathlib import Path
from src.utils import adj_df_from_BIF, get_train_test_splits, encode_data
from src.data import AlarmDataset
from src.models.BNNet import BNNet
from src.train import train

import pandas as pd
from scipy.stats import bernoulli

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import yaml

from torch_geometric.nn import GCNConv
from torch_geometric.utils import dense_to_sparse, to_torch_coo_tensor

In [2]:
fpath_bif = Path("/home/gaurang/bayesian_network/data/alarm/alarm.bif")
fpath_data = Path("/home/gaurang/bayesian_network/data/alarm/ALARM10k.csv")
fpath_config = Path("/home/gaurang/bayesian_network/code/src/config.yaml")

with open(fpath_config, 'r') as f:
    config = yaml.load(f, Loader=yaml.Loader)
config

{'embedding_dim': 1,
 'gnn_hidden_dim': 32,
 'gnn_out_dim': 16,
 'fc1_out_dim': 8,
 'batch_size_train': 32,
 'batch_size_val': 32,
 'batch_size_test': 32,
 'num_epochs': 50,
 'patience': 10,
 'min_delta': 1e-05,
 'lr': 0.01}

In [3]:
bn = BIFReader(fpath_bif)
adj_df = adj_df_from_BIF(bn)
adj_df

Unnamed: 0,HISTORY,CVP,PCWP,HYPOVOLEMIA,LVEDVOLUME,LVFAILURE,STROKEVOLUME,ERRLOWOUTPUT,HRBP,HREKG,...,MINVOLSET,VENTMACH,VENTTUBE,VENTLUNG,VENTALV,ARTCO2,CATECHOL,HR,CO,BP
HISTORY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CVP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PCWP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HYPOVOLEMIA,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LVEDVOLUME,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LVFAILURE,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
STROKEVOLUME,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ERRLOWOUTPUT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HRBP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HREKG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
bn.get_values()

{'HISTORY': array([[0.9 , 0.01],
        [0.1 , 0.99]]),
 'CVP': array([[0.95, 0.04, 0.01],
        [0.04, 0.95, 0.29],
        [0.01, 0.01, 0.7 ]]),
 'PCWP': array([[0.95, 0.04, 0.01],
        [0.04, 0.95, 0.04],
        [0.01, 0.01, 0.95]]),
 'HYPOVOLEMIA': array([[0.2],
        [0.8]]),
 'LVEDVOLUME': array([[0.95, 0.01, 0.98, 0.05],
        [0.04, 0.09, 0.01, 0.9 ],
        [0.01, 0.9 , 0.01, 0.05]]),
 'LVFAILURE': array([[0.05],
        [0.95]]),
 'STROKEVOLUME': array([[0.98, 0.5 , 0.95, 0.05],
        [0.01, 0.49, 0.04, 0.9 ],
        [0.01, 0.01, 0.01, 0.05]]),
 'ERRLOWOUTPUT': array([[0.05],
        [0.95]]),
 'HRBP': array([[0.98, 0.3 , 0.01, 0.4 , 0.98, 0.01],
        [0.01, 0.4 , 0.98, 0.59, 0.01, 0.01],
        [0.01, 0.3 , 0.01, 0.01, 0.01, 0.98]]),
 'HREKG': array([[0.3333333, 0.3333333, 0.01     , 0.3333333, 0.98     , 0.01     ],
        [0.3333333, 0.3333333, 0.98     , 0.3333333, 0.01     , 0.01     ],
        [0.3333333, 0.3333333, 0.01     , 0.3333333, 0.01     , 0

In [4]:
df_data = pd.read_csv(fpath_data, dtype=str)
df_data, encoder = encode_data(df_data, bn)
df_train, df_valid, df_test = get_train_test_splits(df_data, 123, False)
reader = BIFReader(fpath_bif)

In [5]:
# create datasets
perturbation_factor = 0.0
adj_df = adj_df_from_BIF(reader, perturbation_factor)

train_set = AlarmDataset(df_data=df_train, bn=reader, adj_df=adj_df, perturbation_factor=perturbation_factor)
val_set = AlarmDataset(df_data=df_valid, bn=reader, adj_df=adj_df, perturbation_factor=perturbation_factor)
test_set = AlarmDataset(df_data=df_test, bn=reader, adj_df=adj_df, perturbation_factor=perturbation_factor)

In [6]:
dataloader_train = DataLoader(train_set, batch_size=config["batch_size_train"])
dataloader_valid = DataLoader(val_set, batch_size=config["batch_size_val"])
dataloader_test = DataLoader(test_set, batch_size=config["batch_size_test"])

In [7]:
it = iter(dataloader_train)
batch = next(it)
X, y = batch

In [8]:
model = BNNet(
        config=config,
        num_nodes= len(train_set.input_nodes),
        node_states=train_set.input_states,
        edge_index=train_set.edge_index,
        terminal_node_ids=train_set.terminal_node_ids,
        target_node_states=train_set.target_states
        )

GNN(
  (layer1): GraphConv(1, 32)
  (layer2): GraphConv(32, 16)
)

In [10]:
model(X)

torch.float32


AttributeError: 'GraphConv' object has no attribute 'dtype'

In [None]:
model(train)

In [11]:
num_embeddings_list = [len(state) for state in dataset.input_states]
node_embedding_layers = [
    nn.Embedding(num_emdeddings, 7)
    for num_emdeddings in num_embeddings_list
]

In [13]:
gnn_input = []

for i, node_embedding_layer in enumerate(node_embedding_layers):
    gnn_input.append(node_embedding_layer(X[:, i]))

len(gnn_input)


36

In [30]:
gnn_input = torch.stack(gnn_input, dim=1)
gnn_input.shape

torch.Size([4, 36, 7])

In [24]:
gnn = GCNConv(7, 10)

In [33]:
x = gnn(gnn_input, edge_index)
x.shape

torch.Size([4, 36, 10])

In [37]:
x = x.view(4, -1)
x.shape

torch.Size([4, 360])

In [48]:
pmf = bernoulli(0.0)
pmf.rvs(size=1)[0]

0

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device == torch.device("cuda")

True