# Data 

In [265]:
import json 
import numpy as np
import torch
import scipy.sparse as sparse
import dgl
from gli.io import save_heterograph, Attribute

In [266]:
dataset = dgl.data.MUTAGDataset()
graph = dataset[0]

Done loading data from cached files.


In [269]:
node_groups = graph.ntypes
node_attrs = {}
num_nodes_dict = {}
for node_group in node_groups:
    node_group_scheme = graph.node_attr_schemes(node_group)
    node_group_attributes = []
    for key in node_group_scheme:
        name=str(key)
        data=graph.nodes[node_group].data[name].numpy()
        print(f'Adding attribute {name} to {node_group}')
        print(data.shape)
        node_group_attributes.append(Attribute(name, data))
    node_attrs[node_group] = node_group_attributes
    num_nodes_dict[node_group] = graph.num_nodes(node_group)

Adding attribute _ID to SCHEMA
(3687,)
Adding attribute _ID to _Literal
(1104,)
Adding attribute _ID to bond
(9317,)
Adding attribute label to d
(9529,)
Adding attribute labels to d
(9529,)
Adding attribute test_mask to d
(9529,)
Adding attribute train_mask to d
(9529,)
Adding attribute _ID to d
(9529,)
Adding attribute _ID to hasStructure
(3526,)


In [287]:
arr = graph.nodes['d'].data['label'].numpy()
len(arr)

9529

In [270]:
edge_groups = graph.canonical_etypes
edge_attrs = {}
for edge_group in edge_groups:
    edge_group_scheme = graph.edge_attr_schemes(edge_group)
    edge_group_attributes = []
    for key in edge_group_scheme:
        name=str(key)
        data=graph.edges[edge_group].data[name].numpy()
        print(f'Adding attribute {name} to {edge_group}')
        print(data.shape)
        edge_group_attributes.append(Attribute(name, data))
    edge_attrs[edge_group] = edge_group_attributes

Adding attribute _ID to ('SCHEMA', '22-rdf-syntax-ns#type', 'SCHEMA')
(3526,)
Adding attribute _ID to ('SCHEMA', 'owl#disjointWith', 'SCHEMA')
(6,)
Adding attribute _ID to ('SCHEMA', 'rdf-schema#domain', 'SCHEMA')
(19,)
Adding attribute _ID to ('SCHEMA', 'rdf-schema#range', 'SCHEMA')
(4,)
Adding attribute _ID to ('SCHEMA', 'rdf-schema#subClassOf', 'SCHEMA')
(138,)
Adding attribute _ID to ('SCHEMA', 'rev-22-rdf-syntax-ns#type', 'SCHEMA')
(3526,)
Adding attribute _ID to ('SCHEMA', 'rev-22-rdf-syntax-ns#type', 'bond')
(9317,)
Adding attribute _ID to ('SCHEMA', 'rev-22-rdf-syntax-ns#type', 'd')
(9529,)
Adding attribute _ID to ('SCHEMA', 'rev-owl#disjointWith', 'SCHEMA')
(6,)
Adding attribute _ID to ('SCHEMA', 'rev-rdf-schema#domain', 'SCHEMA')
(19,)
Adding attribute _ID to ('SCHEMA', 'rev-rdf-schema#range', 'SCHEMA')
(4,)
Adding attribute _ID to ('SCHEMA', 'rev-rdf-schema#subClassOf', 'SCHEMA')
(138,)
Adding attribute _ID to ('_Literal', 'rev-amesTestPositive', 'd')
(340,)
Adding attribute

In [271]:
edge={}
for edge_group in edge_groups:
    src_edges = graph.edges(etype=edge_group, order='srcdst')[0].numpy()
    dst_edges = graph.edges(etype=edge_group, order='srcdst')[1].numpy()
    edge_matrix = np.zeros((src_edges.size,2), dtype=np.int32)
    for i,(src,dst) in enumerate(zip(src_edges,dst_edges)):
        edge_matrix[i] = [src,dst]
    edge[edge_group] = edge_matrix

In [272]:
save_heterograph(name="MUTAG",
                edge=edge,
                node_attrs=node_attrs,
                num_nodes_dict=num_nodes_dict,
                edge_attrs=edge_attrs,
                description="MUTAG Dataset.",
                citation="@article{Debnath1991StructureactivityRO,\ntitle={Structure-activity relationship of mutagenic aromatic and heteroaromatic nitro compounds. Correlation with molecular orbital energies and hydrophobicity.},\nauthor={Asim Kumar Debnath and R L Compadre and Gargi Debnath and Alan J. Shusterman and Corwin Hansch},\njournal={Journal of medicinal chemistry},\nyear={1991},\nvolume={34 2},\npages={786-97},\nurl={https://api.semanticscholar.org/CorpusID:19990980}}")

{'description': 'MUTAG Dataset.',
 'citation': '@article{Debnath1991StructureactivityRO,\ntitle={Structure-activity relationship of mutagenic aromatic and heteroaromatic nitro compounds. Correlation with molecular orbital energies and hydrophobicity.},\nauthor={Asim Kumar Debnath and R L Compadre and Gargi Debnath and Alan J. Shusterman and Corwin Hansch},\njournal={Journal of medicinal chemistry},\nyear={1991},\nvolume={34 2},\npages={786-97},\nurl={https://api.semanticscholar.org/CorpusID:19990980}}',
 'data': {'Node': {'SCHEMA': {'_ID': {'description': '',
     'type': 'int',
     'format': 'Tensor',
     'file': 'MUTAG__heterograph__9cea5bb4360603ef4117ff6b4c6c0103.npz',
     'key': 'Node_SCHEMA__ID'}},
   '_Literal': {'_ID': {'description': '',
     'type': 'int',
     'format': 'Tensor',
     'file': 'MUTAG__heterograph__9cea5bb4360603ef4117ff6b4c6c0103.npz',
     'key': 'Node__Literal__ID'}},
   'bond': {'_ID': {'description': '',
     'type': 'int',
     'format': 'Tensor',
   

In [288]:
from gli.dataloading import read_gli_graph, read_gli_task, combine_graph_and_task

g = read_gli_graph("./metadata.json")


MUTAG Dataset.
