# Compute Subgraph Motif

## Imports

In [1]:
from typing import Dict
import signal
from contextlib import contextmanager
from pathlib import Path
import os
import sys
from joblib import Parallel, delayed
from collections import ChainMap
import pickle

In [2]:
import pymfinder
import pymfinder.mfinder.mfinder as cmfinder

In [3]:
sys.path.append(str(Path(os.path.abspath("")).parent))

import config
from dataset import load_ids
from dataset import _process_problem
from config import NODE_TYPE

## Constants

In [4]:
MOTIFSIZE=3
NRADOMGRAPHS=64
TIME_LIMIT = 100

In [5]:
#ID_FILE = '../id_files/deepmath.txt'
ID_FILE = '../id_files/train.txt'

ids = load_ids(ID_FILE)

#ids = ids[:5] # FIXME
print('Number of problems', len(ids))

Number of problems 5


## Helper

In [6]:
class TimeoutException(Exception): pass

@contextmanager
def time_limit(seconds):
    def signal_handler(signum, frame):
        raise TimeoutException("Timed out!")
    signal.signal(signal.SIGALRM, signal_handler)
    signal.alarm(seconds)
    try:
        yield
    finally:
        signal.alarm(0)

In [7]:
def compute_motifs(network, motifsize, nrandom_graphs) -> Dict[int, int]:
    
    res = pymfinder.pymfinder(network, motifsize=motifsize, nrandomizations=nrandom_graphs, links=False)
    
    res_count = {motif_id: {'count': motif.real, 'z-score': motif.real_z} for motif_id, motif in res.motifs.items()}
    return res_count

In [8]:
def analyse_graph(prob):
    print(prob)
    
    data = _process_problem(prob, config.PROBLEM_DIR, remove_argument_node=False)
    network = data.edge_index.T.numpy().tolist()

    try:
        with time_limit(TIME_LIMIT):
            res = compute_motifs(network, MOTIFSIZE, NRADOMGRAPHS)
    except (TimeoutException, SystemError) as e: # mfinder throws system error terminated on timeout
        res = -1


    return {prob: res}

In [9]:
# The network motifs
#pymfinder.print_motifs(3, links=True)

In [10]:
'''
res = []
for i in ids:
    print(i)
    r = analyse_graph(i)
    res.append(r)
    
res
#'''

l16_msuhom_1
l16_msuhom_1
t17_pdiff_3
t17_pdiff_3
t46_intpro_1
t46_intpro_1
t12_circled1
t12_circled1
t20_zf_model
t20_zf_model


[{'l16_msuhom_1': {6: {'count': 567, 'z-score': -49.18978331532573},
   12: {'count': 1374, 'z-score': -28.14761509069853},
   36: {'count': 1441, 'z-score': -49.18978331532565},
   38: {'count': 76, 'z-score': 49.189783315325705},
   98: {'count': 0, 'z-score': -0.7302967433402214}}},
 {'t17_pdiff_3': -1},
 {'t46_intpro_1': {6: {'count': 431, 'z-score': -41.27522797568107},
   12: {'count': 973, 'z-score': -21.79757491787719},
   36: {'count': 2705, 'z-score': -41.27522797568103},
   38: {'count': 113, 'z-score': 41.275227975681055},
   98: {'count': 0, 'z-score': -0.8164965809277261}}},
 {'t12_circled1': {6: {'count': 333, 'z-score': -45.17964143284009},
   12: {'count': 792, 'z-score': -11.384199576606164},
   36: {'count': 1092, 'z-score': -45.17964143284002},
   38: {'count': 41, 'z-score': 45.17964143284008},
   98: {'count': 0, 'z-score': -0.6708203932499369}}},
 {'t20_zf_model': {6: {'count': 1155, 'z-score': -95.7172306081518},
   12: {'count': 2365, 'z-score': -50.37251316585

In [11]:
result = Parallel(n_jobs=os.cpu_count() - 2)(delayed(analyse_graph)(i) for i in ids)
data = dict(ChainMap(*result)) # Convert lsit to dict of dict

In [12]:
with open(f'graph_motif_{MOTIFSIZE}_nrandom_{NRADOMGRAPHS}_timelimit_{TIME_LIMIT}.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
#data

{'t20_zf_model': {6: {'count': 1155, 'z-score': -92.30708131164141},
  12: {'count': 2365, 'z-score': -65.8654411510935},
  36: {'count': 6112, 'z-score': -92.30708131164124},
  38: {'count': 258, 'z-score': 92.30708131164138},
  98: {'count': 0, 'z-score': -0.7302967433402214}},
 't12_circled1': {6: {'count': 333, 'z-score': -29.75824156727798},
  12: {'count': 792, 'z-score': -21.35444918512302},
  36: {'count': 1092, 'z-score': -29.75824156727794},
  38: {'count': 41, 'z-score': 29.75824156727797},
  98: {'count': 0, 'z-score': -0.44721359549995787}},
 't46_intpro_1': {6: {'count': 431, 'z-score': -33.64925207115981},
  12: {'count': 973, 'z-score': -36.388218409595034},
  36: {'count': 2705, 'z-score': -33.64925207115988},
  38: {'count': 113, 'z-score': 33.64925207115982},
  98: {'count': 0, 'z-score': -0.4472135954999579}},
 't17_pdiff_3': -1,
 'l16_msuhom_1': {6: {'count': 567, 'z-score': -25.03879343080629},
  12: {'count': 1374, 'z-score': -12.713096349510876},
  36: {'count':