In [None]:
import torch
import os
import json
import pandas as pd
import torch.nn.functional as F
print(torch.__version__)
from platform import python_version
print(python_version())
from collections import defaultdict
from typing import Any, Iterable, List, Optional, Tuple, Union
from torch import Tensor
from torch_geometric.utils import to_dense_adj
from torch_geometric.utils import negative_sampling
from torch_geometric.loader import DataLoader
from sklearn.metrics import roc_auc_score

# The PyG built-in GCNConv
from torch_geometric.nn import GCNConv
from torch_geometric.nn import NNConv
from torch_geometric.nn import global_mean_pool
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
# here is the modified to_networkx function that doesn't throw exceptions

def from_networkx(
    G: Any,
    group_node_attrs: Optional[Union[List[str], all]] = None,
    group_edge_attrs: Optional[Union[List[str], all]] = None,
) -> 'torch_geometric.data.Data':
    r"""Converts a :obj:`networkx.Graph` or :obj:`networkx.DiGraph` to a
    :class:`torch_geometric.data.Data` instance.

    Args:
        G (networkx.Graph or networkx.DiGraph): A networkx graph.
        group_node_attrs (List[str] or all, optional): The node attributes to
            be concatenated and added to :obj:`data.x`. (default: :obj:`None`)
        group_edge_attrs (List[str] or all, optional): The edge attributes to
            be concatenated and added to :obj:`data.edge_attr`.
            (default: :obj:`None`)

    .. note::

        All :attr:`group_node_attrs` and :attr:`group_edge_attrs` values must
        be numeric.

    Examples:

        >>> edge_index = torch.tensor([
        ...     [0, 1, 1, 2, 2, 3],
        ...     [1, 0, 2, 1, 3, 2],
        ... ])
        >>> data = Data(edge_index=edge_index, num_nodes=4)
        >>> g = to_networkx(data)
        >>> # A `Data` object is returned
        >>> from_networkx(g)
        Data(edge_index=[2, 6], num_nodes=4)
    """
    import networkx as nx

    from torch_geometric.data import Data

    G = nx.convert_node_labels_to_integers(G)
    G = G.to_directed() if not nx.is_directed(G) else G

    if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):
        edges = list(G.edges(keys=False))
    else:
        edges = list(G.edges)

    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

    data = defaultdict(list)

    if G.number_of_nodes() > 0:
        node_attrs = list(next(iter(G.nodes(data=True)))[-1].keys())
    else:
        node_attrs = {}

    if G.number_of_edges() > 0:
        edge_attrs = list(next(iter(G.edges(data=True)))[-1].keys())
    else:
        edge_attrs = {}

    for i, (_, feat_dict) in enumerate(G.nodes(data=True)):
        if set(feat_dict.keys()) != set(node_attrs):
            raise ValueError('Not all nodes contain the same attributes')
        for key, value in feat_dict.items():
            data[str(key)].append(value)

    for i, (_, _, feat_dict) in enumerate(G.edges(data=True)):
        if set(feat_dict.keys()) != set(edge_attrs):
            raise ValueError('Not all edges contain the same attributes')
        for key, value in feat_dict.items():
            key = f'edge_{key}' if key in node_attrs else key
            data[str(key)].append(value)

    for key, value in G.graph.items():
        key = f'graph_{key}' if key in node_attrs else key
        data[str(key)] = value

    for key, value in data.items():
        if isinstance(value, (tuple, list)) and isinstance(value[0], Tensor):
            data[key] = torch.stack(value, dim=0)
        else:
            try:
                data[key] = torch.tensor(value)
            except:
                pass

    data['edge_index'] = edge_index.view(2, -1)
    data = Data.from_dict(data)

    if group_node_attrs is all:
        group_node_attrs = list(node_attrs)
    if group_node_attrs is not None:
        xs = []
        for key in group_node_attrs:
            x = data[key]
            x = x.view(-1, 1) if x.dim() <= 1 else x
            xs.append(x)
            del data[key]
        data.x = torch.cat(xs, dim=-1)

    if group_edge_attrs is all:
        group_edge_attrs = list(edge_attrs)
    if group_edge_attrs is not None:
        xs = []
        for key in group_edge_attrs:
            key = f'edge_{key}' if key in node_attrs else key
            x = data[key]
            x = x.view(-1, 1) if x.dim() <= 1 else x
            xs.append(x)
            del data[key]
        data.edge_attr = torch.cat(xs, dim=-1)

    if data.x is None and data.pos is None:
        data.num_nodes = G.number_of_nodes()

    return data

In [None]:
# adding this function that was missing in our version 
from torch_geometric.typing import SparseTensor

def to_edge_index(adj: Union[Tensor, SparseTensor]) -> Tuple[Tensor, Tensor]:
    r"""Converts a :class:`torch.sparse.Tensor` or a
    :class:`torch_sparse.SparseTensor` to edge indices and edge attributes.

    Args:
        adj (torch.sparse.Tensor or SparseTensor): The adjacency matrix.

    :rtype: (:class:`LongTensor`, :class:`Tensor`)

    Example:

        >>> edge_index = torch.tensor([[0, 1, 1, 2, 2, 3],
        ...                            [1, 0, 2, 1, 3, 2]])
        >>> adj = to_torch_coo_tensor(edge_index)
        >>> to_edge_index(adj)
        (tensor([[0, 1, 1, 2, 2, 3],
                [1, 0, 2, 1, 3, 2]]),
        tensor([1., 1., 1., 1., 1., 1.]))
    """
    if isinstance(adj, SparseTensor):
        row, col, value = adj.coo()
        if value is None:
            value = torch.ones(row.size(0), device=row.device)
        return torch.stack([row, col], dim=0), value

    if adj.requires_grad:
        # Calling adj._values() will return a detached tensor.
        # Use `adj.coalesce().values()` instead to track gradients.
        adj = adj.coalesce()
        return adj.indices(), adj.values()

    return adj._indices(), adj._values()

In [None]:
# label indices for node and edge classes
label_dict = {
        "Point": 0,
        "Line": 1,
        "Circle": 2,
        "Ellipse": 3,
        "Spline": 4,
        "Conic": 5,
        "Arc": 6,
        "External": 7,
        "Stop": 8,
        "Unknown": 9,
        "SN_Start": 11,
        "SN_End": 12,
        "SN_Center": 13
    }

edge_dict = {
    "Coincident": 0,
    "Projected": 1,
    "Mirror": 2,
    "Distance": 3,
    "Horizontal": 4,
    "Parallel": 5,
    "Vertical": 6,
    "Tangent": 7,
    "Length": 8,
    "Perpendicular": 9,
    "Midpoint": 10,
    "Equal": 11,
    "Diameter": 12,
    "Offset": 13,
    "Radius": 14,
    "Concentric": 15,
    "Fix": 16,
    "Angle": 17,
    "Circular_Pattern": 18,
    "Pierce": 19,
    "Linear_Pattern": 20,
    "Centerline_Dimension": 21,
    "Intersected": 22,
    "Silhoutted": 23,
    "Quadrant": 24,
    "Normal": 25,
    "Minor_Diameter": 26,
    "Major_Diameter": 27,
    "Rho": 28,
    "Unknown": 29,
    "Subnode": 30
}

In [None]:
# extracts features from nodes (positions, labels, etc.) 
from torch_geometric.utils import degree
def get_sketch_features(graph, feature_dim):
    x = torch.zeros([graph.num_nodes, feature_dim])

    
    
    for idx, p in enumerate(graph.parameters):
        
        # add one hot encoding to feature vector for node label
        #onePos = label_dict[graph.label[idx]]/7
        #for i in range(0, 14):
        #    x[idx, i] = 1 if onePos==i else 0
        x[idx, label_dict[graph.label[idx]]] = 1
        # convert label text into a feature value
        #x[idx, 14] = label_dict[graph.label[idx]]/7
        
        param_dict = json.loads(p)
        for i, k in enumerate(param_dict.keys()):
            
            if i+2 == feature_dim:
                break
            
            # convert each parameter value into a feature value
            x[idx, i+15] = float(param_dict[k])
        
        x[idx, -1] = degree(graph.edge_index[0], graph.num_nodes)[idx]
        #print(idx, p)
        #print(x[idx])
    return x

In [None]:
# (UNUSED) Prepares ground truth for node label prediction
def get_sketch_attr_y(graph):
    y = torch.zeros([graph.num_nodes, 1], dtype=torch.int64)

    for i, l in enumerate(graph.label):
        y[i, 0] = label_dict[l]
    
    return y

In [None]:
# generates adjacency matrix
def get_sketch_adj(graph):
    tst = T.ToSparseTensor()
    return tst(graph).adj_t

In [None]:
# extracts edge labels as features vectors for edges
def get_sketch_edge_attr(graph):
    dim = 31
    edge_attr = torch.zeros([len(graph.edge_label), dim])
    for idx, l in enumerate(graph.edge_label):
        edge_attr[idx, edge_dict[l]] = 1
    return edge_attr

In [None]:

import numpy as np
# converts node/edge sequences from sketchgraphs into a sequence of one hot encoded vectors, representing node/edge classes
def get_sketchgraph_node_constraint_sequence(sg, graph):
    y = torch.zeros((len(sg)-1), dtype=torch.long)
    node_idx=0
    edge_idx=0
    for idx, elem in enumerate(sg):
        if type(elem) == sketchgraphs.data.sequence.NodeOp and not elem.label == sketchgraphs.data._entity.EntityType.Stop:
            y[idx] = label_dict[graph.label[node_idx]]  # 0.5 because we're marking 2 spaces in a vector of zeroes, to sum up to 1
            node_idx+=1
        elif type(elem) == sketchgraphs.data.sequence.EdgeOp:
            y[idx] = 15+edge_dict[graph.edge_label[edge_idx]]
            edge_idx+=1
    return y


# the name of the method
def convert_sketchgraph_to_pytorch(sg):
    # convert first to pyGraphViz graph using sketchgraph's function
    pgv_graph = sketchgraphs.data.sequence.pgvgraph_from_sequence(sg)
    # then to networkx graph
    nx_graph = nx.Graph(pgv_graph)
    # finally to pyTorch graph
    graph = from_networkx(nx_graph)
    return graph

# prepare all data our graphs need for learning
def assign_attributes_to_graph(graph):
    graph.x = get_sketch_features(graph, 30)
    graph.y = get_sketch_attr_y(graph)
    graph.adj_t = get_sketch_adj(graph)
    graph.edge_index = to_edge_index(graph.adj_t)[0]
    graph.edge_attr = get_sketch_edge_attr(graph)
    return graph

# generates a sequence of graphs during construction, not the most efficient way, but it works
def get_sketchgraph_graph_sequence(sg):
    generated_graph = []
    sequence = []
    for elem in sg:
        generated_graph.append(elem)
        new_graph = convert_sketchgraph_to_pytorch(generated_graph)
        if not hasattr(new_graph, 'edge_label'):
            new_graph.edge_label = []
        new_graph = assign_attributes_to_graph(new_graph)
        sequence.append(new_graph)
    return sequence

In [None]:
# custom dataset class of custom attributes
from torch_geometric.data import Dataset
class SketchgraphDataset(Dataset):
    def __init__(self, start_idx, end_idx, transform=None, pre_transform=None, pre_filter=None):
        super().__init__(transform, pre_transform, pre_filter)
        
        self.data = []
        seq_data = flat_array.load_dictionary_flat('datasets/sg_t16_validation.npy')
        print(len(seq_data['sequences']))


        test_graph_seq = seq_data['sequences'][206778]
        test_graph_seq1 = seq_data['sequences'][10]

        sketchgraps_list = seq_data['sequences'][start_idx:end_idx]


        
        for sg in sketchgraps_list:

            graph = convert_sketchgraph_to_pytorch(sg)
            
            if not hasattr(graph, 'edge_label'):
                continue

            # next we need to add required attributes: x, y, adj_t
            graph.x = get_sketch_features(graph, 30)
            #graph.y = get_sketch_attr_y(graph)
            graph.adj_t = get_sketch_adj(graph)
            graph.edge_index = to_edge_index(graph.adj_t)[0]
            graph.edge_attr = get_sketch_edge_attr(graph)
            graph.seq = get_sketchgraph_graph_sequence(sg)
            graph.seq_y = get_sketchgraph_node_constraint_sequence(sg, graph)

            self.data.append(graph)
            
    def len(self):
        return len(self.data)
    
    def get(self, idx):
        return self.data[idx]

In [None]:
import sketchgraphs
import networkx as nx
from sketchgraphs.data import flat_array

train_dataset = SketchgraphDataset(0, 300)
#test_dataset = SketchgraphDataset(101, 111)
    
data_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
#test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

#next(iter(data_loader)).seq_y

val_dataset = SketchgraphDataset(500, 700)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)


In [None]:
# simple MLP for NNConv layer for including edge features in predictions
class nnconvnn(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(nnconvnn, self).__init__()
        
        self.simpleLin = torch.nn.Linear(31, input_dim*output_dim)
        
        self.reset_parameters()
        
    def reset_parameters(self):
        self.simpleLin.reset_parameters()
        
    def forward(self, x):
        
        x = self.simpleLin(x)
        return x

In [None]:
from torch_geometric.graphgym import init
class newrnnmodel(torch.nn.Module):
    def __init__(self, input_dim, hidden_size, output_dim_second, num_nnconv_layers):
        super(newrnnmodel, self).__init__()
        self.layer_count = 2
        self.hidden_size = hidden_size
        self.softmax = torch.nn.LogSoftmax(dim=-1)
        self.input_gcn = torch.nn.ModuleList()
        self.input_gcn.append(NNConv(30, hidden_size, nnconvnn(30, hidden_size)))
        for i in range(num_nnconv_layers-1):
            self.input_gcn.append(NNConv(hidden_size, hidden_size, nnconvnn(hidden_size, hidden_size)))
            
        self.rnn_class = torch.nn.RNN(hidden_size, hidden_size, self.layer_count) # batch first = True means the first dimension is batch size
        self.mlp_pre_rnn = torch.nn.Linear(hidden_size, hidden_size)
        self.mlp_class = torch.nn.Linear(hidden_size, output_dim_second)
        
        self.reset_parameters()
        
    def reset_parameters(self):

        self.rnn_class.reset_parameters()
        self.mlp_class.reset_parameters()
        for l in range(len(self.input_gcn)):
            self.input_gcn[l].reset_parameters()
            
        #for l in range(len(self.input_gcn)):
        #    torch.nn.init.xavier_uniform(self.input_gcn[l].weight.data)
        for l in range(len(self.rnn_class.all_weights)):
            torch.nn.init.xavier_uniform_(self.rnn_class.all_weights[l][0])
            torch.nn.init.xavier_uniform_(self.rnn_class.all_weights[l][1])
        torch.nn.init.xavier_uniform_(self.mlp_pre_rnn.weight)
        torch.nn.init.xavier_uniform_(self.mlp_class.weight)
        
    def forward(self, graph_sequence):

        hidden = torch.zeros((self.layer_count, self.hidden_size), dtype=torch.float).to(device)
        F.dropout.training = self.training
        output = torch.tensor([]).to(device)
        for g in graph_sequence:
            x = g.x
            for l in range(len(self.input_gcn)):
                x = self.input_gcn[l](x, g.edge_index, g.edge_attr)
                x = F.relu(x)
                #x = F.dropout(x, p=0.2)
            x = global_mean_pool(x, torch.zeros((g.x.size(0)), dtype=torch.long).to(device))
            x = self.mlp_pre_rnn(x)
            x, hidden = self.rnn_class(x, hidden)
            x = self.mlp_class(x)
            x = self.softmax(x)
            output = torch.cat((output, x))
        return output, hidden.detach()
        
    def predict_next(self, sequence):
        pred, hidden = self.forward(sequence)
        return torch.argmax(pred[-1], dim=0)
        

In [None]:
# training the model
rnn = newrnnmodel(15, 16, 31+15, 2).to(device)

optimizer_class = torch.optim.Adam(rnn.parameters(), lr=0.01)
loss_fn = F.nll_loss
#loss_fn = F.mse_loss
epochs = 20
rnn.train()

# (training loop)
for e in range(epochs):
    loss_avg = 0
    count = 0
    acc = 0
    for batch in iter(data_loader):
        count+=1
        batch.to(device)

        optimizer_class.zero_grad()
        pred_class, _ = rnn.forward(batch.seq[0])
        #print(pred_type)
        #print(pred_class)
        #print(batch.seq_y.size())
        loss_class = loss_fn(pred_class[:-1], batch.seq_y)
        pred_index = torch.argmax(pred_class[:-1], dim=1)
        acc += torch.sum(pred_index==batch.seq_y).item() / len(batch.seq_y)
        loss_class.backward()
        
        loss_avg += loss_class.item()
        

        optimizer_class.step()
        
        
    print(loss_avg/count, "Acc: " + str(acc/count * 100) + "%")

In [None]:
# testing model performance on unseen graph sequences
rnn.eval()
acc = 0
count = 0
loss_avg = 0
for batch in iter(val_loader):
    count+=1
    batch.to(device)
    pred_class,_ = rnn.forward(batch.seq[0])
    loss_class = loss_fn(pred_class[:-1], batch.seq_y)
    pred_index = torch.argmax(pred_class[:-1], dim=1)
    acc += torch.sum(pred_index==batch.seq_y).item() / len(batch.seq_y)
    loss_avg += loss_class.item()
print(loss_avg/count, "Acc: " + str(acc/count * 100) + "%")