There are several ways to represent graphs in Python. We list those that can be relevant for use in molecular generation. 

In [None]:
# graph represented as dictionary
# keys are nodes and values for a key are nodes connected to the node in that key

g = { "A" : {"B"},
      "B" : {"A", "C"},
      "C" : {"B"},
      "D" : {"B", "E"},
      "E" : {"D"},
      "F" : {} # empty set of values: F is not connected to any nodes
     }

We remark that molecules are defined to be conected graphs, so an isolated note like F wouldn't be allowed. It is still important to have functions to recognize and deal with nodes like F to take care of isolated nodes that may be formed after an action.  

In [None]:
# given a graph, can get a list of all edges (i.e. a list of bonds in a molecule)

def get_edges(graph):
    edges = []

    for node in graph:
        for neighbour in graph[node]:
            edges.append({node, neighbour})

    return edges

print(get_edges(g))

[{'B', 'A'}, {'B', 'A'}, {'B', 'C'}, {'B', 'C'}, {'B', 'D'}, {'D', 'E'}, {'D', 'E'}]


In [None]:
# it can be useful to be able to find all isolated nodes, 
# for example to check that there isn't any after we perform an action

def get_isolated_nodes(graph):
    isolated_nodes = set()

    for node in graph:
        if not graph[node]:
            isolated_nodes.add(node)
    return isolated_nodes

print(get_isolated_nodes(g))

{'F'}


Below is a more comprehensive graph class. 

In [None]:
class Graph(object):

    def __init__(self, graph_dict=None):
        """ Initializes graph object
            If no dictionary or None is given, uses empty dictionary
        """
        if graph_dict == None:
            graph_dict = {}
        self._graph_dict = graph_dict

    def get_node_edges(self, node):
        """ returns a list of all the edges connected to a node"""
        return self._graph_dict[node]
        
    def all_nodes(self):
        """ returns the set of all nodes of a graph """
        return set(self._graph_dict.keys())

    def all_edges(self):
        """ returns all edges of a graph """
        return get_edges(self)

    def add_node(self, node):
        """ If the node "node" is not in 
            self._graph_dict, a key "node" with an empty
            list as a value is added to the dictionary. 
            Otherwise nothing is done. 
        """
        if node not in self._graph_dict:
            self._graph_dict[node] = []

    def add_edge(self, edge):
        """ assumes that edge is of type set, tuple or list; 
            between two nodes there can be multiple edges
        """
        edge = set(edge)
        node1, node2 = tuple(edge)
        for x, y in [(node1, node2), (node2, node1)]:
            if x in self._graph_dict:
                self._graph_dict[x].add(y)
            else:
                self._graph_dict[x] = [y]
    
    def remove_node(self, node):
        """ If the node "node" is in 
            self._graph_dict, remove the key.  
            Otherwise nothing is done. 
        """
        if node in self._graph_dict:
            self._graph_dict.pop(node)
  

Example: applying these functions to the example above. 

In [None]:
print(g)

{'A': {'B'}, 'B': {'A', 'C'}, 'C': {'B'}, 'D': {'B', 'E'}, 'E': {'D'}, 'F': {}}


In [None]:
graph = Graph(g)
graph.get_node_edges("A")

{'B'}

In [None]:
graph.all_nodes()

{'A', 'B', 'C', 'D', 'E', 'F'}

In [None]:
get_edges(g)

[{'A', 'B'},
 {'A', 'B'},
 {'B', 'C'},
 {'B', 'C'},
 {'B', 'D'},
 {'D', 'E'},
 {'D', 'E'}]

In [None]:
graph.add_node("H")

In [None]:
graph.all_nodes()

{'A', 'B', 'C', 'D', 'E', 'F', 'H'}

In [None]:
print(graph)

<__main__.Graph object at 0x7f02f4412e50>


In [None]:
graph.add_edge({"M", "L"})

In [None]:
get_edges(g)

[{'A', 'B'},
 {'A', 'B'},
 {'B', 'C'},
 {'B', 'C'},
 {'B', 'D'},
 {'D', 'E'},
 {'D', 'E'},
 {'L', 'M'},
 {'L', 'M'}]

In [None]:
graph.remove_node("A")

In [None]:
graph.all_nodes()

{'B', 'C', 'D', 'E', 'F', 'L', 'M'}

MDP operations can be carried out this way, but this of course doens't ensure chemical validity. For that, the MolDQN uses functions built into RDKit. Our next step is to see if we can rephrase any of them or reuse them. 

In [None]:
## anything specific to molecules that has been done?