In [1]:
from typing import Tuple, List, Iterable
from pydot import Dot, graph_from_dot_data, Edge
from graphviz.graphs import BaseGraph
from graphviz import Source
import amrlib
from amrlib.graph_processing.amr_plot import AMRPlot
import numpy as np
import pandas as pd
from tqdm import tqdm 

In [2]:
def edge_to_node_ids(edge: Edge) -> Tuple[str, str]:
    """Returns the node id pair for the edge object"""
    return (edge.get_source(), edge.get_destination())


def get_graph_dot_obj(graph_spec) -> List[Dot]:
    """Get a dot (graphs) object list from a variety of possible sources (postelizing inputs here)"""
    _original_graph_spec = graph_spec
    if isinstance(graph_spec, (BaseGraph, Source)):
        # get the source (str) from a graph object
        graph_spec = graph_spec.source
    if isinstance(graph_spec, str):
        # get a dot-graph from dot string data
        graph_spec = graph_from_dot_data(graph_spec)
    # make sure we have a list of Dot objects now
    assert isinstance(graph_spec, list) and all(
        isinstance(x, Dot) for x in graph_spec
    ), (
        f"Couldn't get a proper dot object list from: {_original_graph_spec}. "
        f"At this point, we should have a list of Dot objects, but was: {graph_spec}"
    )
    return graph_spec


def get_edges(graph_spec, postprocess_edges=edge_to_node_ids):
    """Get a list of edges for a given graph (or list of lists thereof).
    If ``postprocess_edges`` is ``None`` the function will return ``pydot.Edge`` objects from
    which you can extract any information you want.
    By default though, it is set to extract the node pairs for the edges, and you can
    replace with any function that takes ``pydot.Edge`` as an input.
    """
    graphs = get_graph_dot_obj(graph_spec)
    n_graphs = len(graphs)

    if n_graphs > 1:
        return [get_edges(graph, postprocess_edges) for graph in graphs]
    elif n_graphs == 0:
        raise ValueError(f"Your input had no graphs")
    else:
        graph = graphs[0]
        edges = graph.get_edges()
        if callable(postprocess_edges):
            edges = list(map(postprocess_edges, edges))
        return edges
    
#cite: https://stackoverflow.com/questions/47426249/finding-list-of-edges-in-graphviz-in-python    

In [3]:
def get_adj_matrix(graph):
    AP = AMRPlot()
    AP.build_from_graph(entry = graph)


    edges = tuple(get_edges(AP.graph))

    node_names, adj_idx = np.unique(edges, return_inverse=True)
    adj_idx = adj_idx.reshape(-1, 2)
    adj_matrix = np.zeros((len(node_names),)*2)
    adj_matrix[adj_idx[:, 0], adj_idx[:, 1]] = 1
    adj_matrix
    #cite: https://stackoverflow.com/questions/21324774/convert-non-numeric-tuples-to-numpy-matrix
    return adj_matrix

In [13]:
X = ['Text classification is an important and classical problem in natural language processing. There have been a number of studies that applied convolutional neural networks (convolution on regular grid, e.g., sequence) to classification',
    'In this work, we propose to use graph convolutional networks for text classification']

In [14]:
X

['Text classification is an important and classical problem in natural language processing. There have been a number of studies that applied convolutional neural networks (convolution on regular grid, e.g., sequence) to classification',
 'In this work, we propose to use graph convolutional networks for text classification']

In [15]:
stog = amrlib.load_stog_model()
X_matrices = []
for txt in tqdm(X):
    graph = stog.parse_sents([txt], add_metadata = False)[0]
    X_matrices.append(get_adj_matrix(graph)) 
    


100%|███████████████████████████████████████████████████████████████████████████| 2/2 [00:21<00:00, 10.67s/it]


In [16]:
txt

'In this work, we propose to use graph convolutional networks for text classification'

In [17]:
print(graph)

(p / propose-01
      :ARG0 (w / we)
      :ARG1 (u / use-01
            :ARG0 w
            :ARG1 (n / network
                  :mod (c / convolutional
                        :mod (g / graph)))
            :ARG2 (c2 / classify-01
                  :ARG1 (t / text)))
      :subevent-of (w2 / work-01
            :mod (t2 / this)))


In [18]:
print(X_matrices[-1])

[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]


In [12]:
# padding to have unified matrix sizes? - is it needed?
# What are the inputs for GCN.