In [1]:
import os
from utils import logging, get_trajectory_from_neo4j, Neo4jConnection

logging.getLogger().setLevel(logging.INFO)

# load_dotenv()
# neo4j_url = os.environ.get("NEO4J_URI")
# neo4j_username = os.environ.get("NEO4J_USERNAME")
# neo4j_password = os.environ.get("NEO4J_PASSWORD")

# Initialize the Neo4j driver
neo4j_connection = Neo4jConnection()
driver = neo4j_connection.get_driver()

In [6]:
import networkx as nx

In [7]:
from torch_geometric.data import HeteroData
import torch

In [4]:

# Example from docs, do not execute. 
# https://pytorch-geometric.readthedocs.io/en/2.5.0/generated/torch_geometric.data.HeteroData.html
# data = HeteroData()

# data['paper'].x = ... # [num_papers, num_features_paper]
# data['author'].x = ... # [num_authors, num_features_author]
# data['institution'].x = ... # [num_institutions, num_features_institution]
# data['field_of_study'].x = ... # [num_field, num_features_field]

# data['paper', 'cites', 'paper'].edge_index = ... # [2, num_edges_cites]
# data['author', 'writes', 'paper'].edge_index = ... # [2, num_edges_writes]
# data['author', 'affiliated_with', 'institution'].edge_index = ... # [2, num_edges_affiliated]
# data['paper', 'has_topic', 'field_of_study'].edge_index = ... # [2, num_edges_topic]

# data['paper', 'cites', 'paper'].edge_attr = ... # [num_edges_cites, num_features_cites]
# data['author', 'writes', 'paper'].edge_attr = ... # [num_edges_writes, num_features_writes]
# data['author', 'affiliated_with', 'institution'].edge_attr = ... # [num_edges_affiliated, num_features_affiliated]
# data['paper', 'has_topic', 'field_of_study'].edge_attr = ... # [num_edges_topic, num_features_topic]



In [5]:
# Explain in the text why the first dimension in edges is 2. That is not intuitive. Source nodes - target nodes

In [10]:
patient, time_steps, actions = get_trajectory_from_neo4j(driver=driver, traj=6)

## IMPORTANT: Timestep nodes start from 0!!! in the data and in the heterograph

In [11]:
graph = HeteroData()
# Add patient nodes
patient_data = torch.tensor([list(patient.values())]) # shape should be (num patients, num features)
graph['patient'].x = patient_data


In [12]:
# Add TimeStep nodes for this patient
timestepdata = []
for timestep in time_steps:
    timestepdata.append(list(timestep.values()))

graph['timestep'].x = torch.FloatTensor(timestepdata)

In [13]:
# Add edges between patient and timestep
num_timesteps = graph['timestep'].x.size()[0]
patient_to_timestep_edge_index = torch.tensor([
    [0] * num_timesteps,  # Patient node repeated `num_timesteps` times (source nodes)
    list(range(num_timesteps))  # Timestep node indices (target nodes) - start from 0
])

graph["patient", "to", "timestep"].edge_index = patient_to_timestep_edge_index

print(graph)

HeteroData(
  patient={ x=[1, 6] },
  timestep={ x=[20, 48] },
  (patient, to, timestep)={ edge_index=[2, 20] }
)


In [14]:
# Add edges between timesteps. Last timestep node is not connected with any node.
timestep_to_timestep_edge_index = torch.tensor([
    list(range(num_timesteps-1)),  #source timestep nodes
    list(range(1, num_timesteps))  #target timestep nodes
])

graph["timestep", "to", "timestep"].edge_index = timestep_to_timestep_edge_index

print(graph)

HeteroData(
  patient={ x=[1, 6] },
  timestep={ x=[20, 48] },
  (patient, to, timestep)={ edge_index=[2, 20] },
  (timestep, to, timestep)={ edge_index=[2, 19] }
)


In [15]:
patient_to_timestep_edge_index

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19]])

In [16]:
timestep_to_timestep_edge_index

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18],
        [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19]])

In [17]:
# to do visualise graph

In [18]:
# Add property(action) to the edges between timesteps
actions_values = [action['action'] for action in actions]
graph["timestep", "to", "timestep"].edge_attr = actions_values

NameError: name 'actions' is not defined

In [37]:
# Add dummy weight to patient-to-timestep nodes. Without doing that it is not possible to use .to_hetero()

graph["patient", "to", "timestep"].edge_attr = [1] * num_timesteps

In [38]:
# Try to fix an error that patient is not a destination node. Creating 2 directed edges from patient to timestep and back
# That hack worked
timestep_to_patient_edge_index = torch.tensor([
    list(range(num_timesteps)), # Timestep node indices (target nodes) - start from 0
    [0] * num_timesteps  # Patient node repeated `num_timesteps` times (source nodes)
      
])

graph["timestep", "to", "patient"].edge_index = timestep_to_patient_edge_index
graph["timestep", "to", "patient"].edge_attr = [1] * num_timesteps

In [39]:
graph

HeteroData(
  patient={ x=[1, 6] },
  timestep={ x=[20, 48] },
  (patient, to, timestep)={
    edge_index=[2, 20],
    edge_attr=[20],
  },
  (timestep, to, timestep)={
    edge_index=[2, 19],
    edge_attr=[19],
  },
  (timestep, to, patient)={
    edge_index=[2, 20],
    edge_attr=[20],
  }
)

In [17]:

# import torch.nn as nn
# from torch_geometric.nn import GCNConv

# class GNN_DQN_Agent(nn.Module):
#     def __init__(self, input_dim, hidden_dim, action_dim):
#         super(DQNGNN, self).__init__()
#         self.conv1 = GCNConv(input_dim, hidden_dim)
#         self.conv2 = GCNConv(hidden_dim, hidden_dim)
#         self.fc = nn.Linear(hidden_dim, action_dim)

#     def forward(self, x, edge_index):
#         x = self.conv1(x, edge_index).relu()
#         x = self.conv2(x, edge_index).relu()
#         q_values = self.fc(x)
#         return q_values

In [18]:
# Taken from the documentation https://pytorch-geometric.readthedocs.io/en/stable/notes/heterogeneous.html
# from torch_geometric.nn import GATConv, Linear, to_hetero

# num_actions = 25

# class GAT(torch.nn.Module):
#     def __init__(self, hidden_channels, out_channels):
#         super().__init__()
#         self.conv1 = GATConv((-1, -1), hidden_channels, add_self_loops=False)
#         self.lin1 = Linear(-1, hidden_channels)
#         self.conv2 = GATConv((-1, -1), out_channels, add_self_loops=False)
#         self.lin2 = Linear(-1, out_channels)

#     def forward(self, x, edge_index):
#         x = self.conv1(x, edge_index) + self.lin1(x)
#         x = x.relu()
#         x = self.conv2(x, edge_index) + self.lin2(x)
#         return x


# model = GAT(hidden_channels=64, out_channels=num_actions)
# model = to_hetero(model, graph.metadata(), aggr='sum')

In [23]:
# Taken from the documentation, too https://pytorch-geometric.readthedocs.io/en/stable/notes/heterogeneous.html
import torch_geometric.transforms as T
# from torch_geometric.datasets import OGB_MAG
from torch_geometric.nn import SAGEConv, to_hetero

num_actions = 25

# dataset = OGB_MAG(root='./data', preprocess='metapath2vec', transform=T.ToUndirected())
data = graph

class GNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


model = GNN(hidden_channels=64, out_channels=num_actions)
model = to_hetero(model, data.metadata(), aggr='sum')

# Note
# If we want to let messages be passed in both directions then we can convert the graph into undirected
# import torch_geometric.transforms as T
# data = T.ToUndirected()(data)


In [20]:

# data.metadata()

(['patient', 'timestep'],
 [('patient', 'to', 'timestep'), ('timestep', 'to', 'timestep')])

In [40]:
data = graph

In [41]:
with torch.no_grad():  # Initialize lazy modules.
    out = model(data.x_dict, data.edge_index_dict)

In [26]:
out

{'patient': tensor([[-1.8355e+08,  2.6512e+08,  1.0451e+09, -3.1577e+08,  8.7460e+08,
           3.8660e+08, -6.3425e+07, -5.6955e+08, -1.1633e+09, -1.0965e+09,
          -9.9315e+08,  1.6576e+09,  7.7405e+08,  1.5872e+08, -3.0918e+08,
           1.0827e+08, -8.7755e+08,  3.3335e+08, -3.9115e+08, -4.2389e+08,
          -5.2957e+08, -8.4399e+08,  9.4244e+08, -1.5308e+09,  9.4130e+08]]),
 'timestep': tensor([[ 1.1748e+09,  1.3496e+09, -2.3367e+08, -1.4399e+09,  4.7409e+07,
          -3.6693e+08, -1.1341e+09, -2.5111e+09, -6.6362e+08, -9.0351e+08,
          -1.1194e+09,  8.0793e+07, -4.4441e+08,  1.2101e+08,  8.4049e+08,
          -4.2966e+07, -1.2024e+09, -1.1215e+08, -9.5991e+08,  1.1301e+09,
           1.1114e+09,  2.5230e+08,  4.2663e+08,  6.7458e+08, -7.0377e+08],
         [ 2.0304e+09,  1.4012e+09, -2.1393e+08, -7.6534e+08, -1.9393e+08,
          -1.1762e+09, -1.6628e+09, -2.0092e+09, -9.7625e+08, -1.2797e+09,
          -1.0285e+09, -7.0516e+08, -2.0949e+08,  1.1533e+09,  1.6803e+09

In [None]:
# i get my inspiration from here
# https://github.com/corl-team/CORL/blob/main/algorithms/offline/cql.py

In [11]:
# Im am here
# was runing the import in the next cell to figure out which libraries are missing
# no more libs are missing -> let's dig into the code
# is it only for continious actions???? make sure it will work 

In [12]:
# I can actually import stuff from python files easily
from cql_adapted import print_hello

In [None]:
# from my internship

In [None]:
# Define the environment
class DynamicGraphEnvironment:
    def __init__(self):
        self.graph = nx.Graph()
        G = nx.Graph()
        G.add_node(1)
        G.add_node(2)
        self.step_count = 0
    
    def step(self):
        # Simulate a new node being added to the graph
        new_node = self.graph.number_of_nodes()
        self.graph.add_node(new_node)
        return new_node

# Define the Graph Neural Network (GNN) model
class GNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, output_num):
        super(GNN, self).__init__()
        self.gcn_layer = nn.GraphConv(input_dim, hidden_dim)
        self.linear_layer = nn.Linear(hidden_dim, output_dim)
        
    
    def forward(self, node_features, edge_index):
        h = self.gcn_layer(x=node_features, edge_index=edge_index)
        h = self.linear_layer(h)
        probabilities = F.softmax(h, dim=-1)
        return probabilities


def adjacency_matrix_to_edge_index(adjacency_matrix):
    # Convert the scipy csr_matrix to a COO (Coordinate) format, which is compatible with torch
    coo_matrix = adjacency_matrix.tocoo()
    # Create a torch tensor for the non-zero elements' indices
    edge_index = torch.tensor([coo_matrix.row, coo_matrix.col], dtype=torch.long)
    # edge_index = torch.nonzero(adjacency_matrix)
    return edge_index.t().contiguous()


# Define the reinforcement learning agent
class RLAgent:
    def __init__(self, state_dim, action_dim, action_num):
        self.state_dim = state_dim
        self.model = GNN(state_dim, HIDDEN_DIM, action_dim, action_num)
        self.action_num = action_num
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
    
    def select_action(self, state):
        
        number_of_nodes = env.graph.number_of_nodes()
        # mock_features = torch.randn(number_of_nodes, self.state_dim)
        mock_features = torch.ones(number_of_nodes, self.state_dim)
        self.x = mock_features
        
        pyg_graph = from_networkx(state)
        
        
        if random.uniform(0, 1) < EPSILON:
            action = np.random.randint(self.action_num) # Explore action space
        
        action_probs = self.model(node_features=self.x, edge_index=pyg_graph.edge_index)
        # action_probs = self.model(node_features=pyg_graph.x, edge_index=pyg_graph.edge_index)
        action = torch.argmax(action_probs)
        
        return action.item()
        # return 0
    
    def update(self, state, action, reward):
        self.optimizer.zero_grad()
        pyg_graph = from_networkx(state)
        action_probs = self.model(node_features=self.x, edge_index=pyg_graph.edge_index)
        loss = -action_probs[action] * reward
        loss.backward()
        self.optimizer.step()
        pass

# Define a reward function
def calculate_reward(node_created, link_created, target_node):
    if link_created:
        return 1.0
    return -1.0  # Penalize if no link created

# Training loop
env = DynamicGraphEnvironment()
# agent = RLAgent(state_dim=env.graph.number_of_nodes(), action_dim=1)
feature_dim = 10
agent = RLAgent(state_dim=feature_dim, action_dim=1, action_num=2)

for episode in range(1000):
    #state is a graph
    state = env.graph
    new_node = env.step()
    
    action = agent.select_action(state)
    print(f"Action {action}")
    link_created = action == new_node
    reward = calculate_reward(new_node, link_created, target_node=new_node)
    
    agent.update(state, action, reward)
    
    print(f"Episode {episode + 1}: Reward {reward}")

In [None]:
time_variant_properties = ['step', 'm:charttime',
        'o:mechvent', 'o:max_dose_vaso', 'o:re_admission',
       'o:Weight_kg', 'o:GCS', 'o:HR', 'o:SysBP', 'o:MeanBP', 'o:DiaBP',
       'o:RR', 'o:Temp_C', 'o:FiO2_1', 'o:Potassium', 'o:Sodium', 'o:Chloride',
       'o:Glucose', 'o:Magnesium', 'o:Calcium', 'o:Hb', 'o:WBC_count',
       'o:Platelets_count', 'o:PTT', 'o:PT', 'o:Arterial_pH', 'o:paO2',
       'o:paCO2', 'o:Arterial_BE', 'o:HCO3', 'o:Arterial_lactate', 'o:SOFA',
       'o:SIRS', 'o:Shock_Index', 'o:PaO2_FiO2', 'o:cumulated_balance',
       'o:SpO2', 'o:BUN', 'o:Creatinine', 'o:SGOT', 'o:SGPT', 'o:Total_bili',
       'o:INR', 'o:input_total', 'o:input_4hourly', 'o:output_total',
       'o:output_4hourly', 'a:action', 'r:reward']

In [None]:
def clean_name_of_column(old_name: str):
    if ':' in old_name:
        # Use regex to capture the part after the colon
        match = re.search(r':\s*(.+)', old_name)
        return match.group(1).strip()
    else:
        # If no colon, return the whole text stripped
        return old_name.strip()

In [None]:
from neo4j import GraphDatabase
import networkx as nx

class Neo4jToNetworkX:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def fetch_graph(self):
        with self.driver.session() as session:
            # Fetch nodes
            nodes_query = """
            MATCH (n)
            RETURN id(n) AS node_id, n.name AS name
            """
            nodes = session.run(nodes_query)

            # Fetch edges
            edges_query = """
            MATCH (n)-[r]->(m)
            RETURN id(n) AS source, id(m) AS target, r.weight AS weight
            """
            edges = session.run(edges_query)

            # Create NetworkX graph
            G = nx.DiGraph()  # Use `` for undirected graphs

            # Add nodes
            for record in nodes:
                G.add_node(record["node_id"], name=record["name"])

            # Add edges
            for record in edges:
                G.add_edge(
                    record["source"], 
                    record["target"], 
                    weight=record.get("weight", 1.0)  # Default weight if none
                )

            return G

# Example usage
converter = Neo4jToNetworkX("bolt://localhost:7687", "neo4j", "password")
graph = converter.fetch_graph()

In [35]:
G = nx.Graph()

G.add_node(patient)

TypeError: unhashable type: 'dict'