# Importing some utilities

In [None]:
import sys
sys.path.append('../python/')
from json2graph import jsonFile2graph
import networkx as nx
from vocabulary import Vocabulary
from graphUtils import plot_graph, graph2data, data2graph
from neuralmodel import GNN_MoRec

# generator must belong to {VIATRA,RANDOMEMF,ALLOY,RAND}
generator = 'RAND'
# modelType must belong to {Ecore,RDS,Yakindu}
modelType = 'Ecore'

# Loading vocabularies and train/test/val used in the paper

Loading the vocabularies. These objects contains a dictionary that associates each node type or edge type to an integer between $[0\dots n-1]$ where $n$ is the length of the vocabulary.

In [None]:
import json

vocab_edges = Vocabulary()
vocab_nodes = Vocabulary()

with open('../Vocabularies/vocab_edges-'+modelType+'-'+generator+'.json') as json_file:
    data = json.load(json_file)
    vocab_edges.word2id_names = data
    vocab_edges.id2word_names = {y:x for x,y in data.items()}
    
with open('../Vocabularies/vocab_nodes-'+modelType+'-'+generator+'.json') as json_file:
    data = json.load(json_file)
    vocab_nodes.word2id_names = data
    vocab_nodes.id2word_names = {y:x for x,y in data.items()}

Loading train/test and val splits used in the paper.

In [None]:
import torch

train = torch.load('../TrainTestValSplits/Train-'+modelType+'-'+generator)
val = torch.load('../TrainTestValSplits/Val-'+modelType+'-'+generator)
test = torch.load('../TrainTestValSplits/Test-'+modelType+'-'+generator)

# Loading the trained model

Loading that model was used to report the results in the paper:

In [None]:
model2 = GNN_MoRec(64,64,0.0,vocab_nodes,vocab_edges).cpu()
path_to_model = '../models/'+modelType+'/'+modelType+'-'+generator+'-GNN'
checkpoint = torch.load(path_to_model,map_location=torch.device('cpu'))
model2.load_state_dict(checkpoint['model_state_dict'])

epoch = checkpoint['epoch']
loss = checkpoint['loss']

model2.eval()

# Performing C2ST

Evaluating the model over the test set and reporting the accuracy.

In [None]:
from torch_geometric.data import DataLoader
test_loader = DataLoader(test, batch_size=1, num_workers = 5, shuffle=True)

model2.eval()
count = 0
i0 = 0
i1 = 0
for data in test_loader:
    
    pred = model2(data.x.cpu(), data.edge_index.cpu(),
          torch.squeeze(data.edge_attr,dim=1).cpu(),data.batch.cpu())
    if pred[0].item() > 0.5:
        pred = 1
    else:
        pred = 0
    if pred == data.y.long().item():
        count = count + 1
    
print('Acc', count/len(test_loader))

Performing C2ST using the accuracy and the length of the test set.

In [None]:
from C2ST import C2ST_pvalue

acc =  count/len(test_loader)
n_test = len(test_loader)
print('p-value:', C2ST_pvalue(acc,n_test))
print('samples', n_test)

# Interpreting the test

For all graph in the test set that is synthetic and the model is sure that it is synthetic, the attention map is printed over it.

In [None]:
from interpretation import heatMap, plot_graph_attention, importantSubgraph, getMapAttention
i = 0
for data in test:
    G = data2graph(data,vocab_nodes,vocab_edges)
    batch = torch.zeros(len(G)).long()
    atts = model2.getAttentions(data.x.cpu(), data.edge_index.cpu(),
          torch.squeeze(data.edge_attr.cpu(),dim=1),batch.cpu())
    map_colors = getMapAttention(G,atts)
    
    pred = model2(data.x.cpu(), data.edge_index.cpu(),
          torch.squeeze(data.edge_attr.cpu(),dim=1),batch.cpu())
    if pred[0].item() < 0.1 and data.y.item() == 0:
        plot_graph_attention(G,map_colors)
        #plot_graph_attention(importantSubgraph(G, atts.detach().cpu().numpy(), 0.2, 2),map_colors)
        #heatMap(G,atts,str(i),'./interpretation/'+modelType+'/'+generator+'/')
        #heatMap(importantSubgraph(G, atts.detach().cpu().numpy(), 0.2, 2),atts,str(i),
        #        './interpretation/'+modelType+'/'+generator+'/subgraph/')
        i = i + 1
        print('--'*50)