# Testing the code

This notebook is used to flesh out simple programming errors when writing code. It therefore focuses on the graph building part of the graph, and not on the testable part. Note that we assume in the code that the graph is connected, i.e that there is a path between any two nodes in the graph. The case of non-connected graphs is more complex but, fortunatly, it does not concern us.


## Creating a toy dataset

In [1]:
from importlib import reload
import sys
import os
import numpy as np
import pandas as pd
import random
sys.path.append(os.path.abspath("/Users/hector/Documents/BGWAS2/COIN/Scripts"))
from COIN import *
import graphExplo
reload(graphExplo)
from graphExplo import *

## Explore the graph
### One node

In [2]:
sys.path.append(os.path.abspath("/Users/hector/Documents/BGWAS2/COIN/Tests"))
import toyDataset
Pop, neighbours, pattern, Pheno, edges = toyDataset.generateToyDataset()
alpha = .05
Lengths = np.ones(pattern.shape[0])
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
n1s, n2s = G.ns()
TH = chi2.isf(.05 / 1, 1)

In [3]:
random.seed(24)
n = int(n1s.sum() + n2s.sum())
nNodes = G.pattern.shape[0]
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
explored = check_itemtable(0, G.pattern[0,:], R.itemtable)
if explored:
    next
S.add_node(0, G.pattern[0,:], neighbours = G.neighbours[0], length = G.lengths[0])
# Then we go on enumerating all pairs.
# Note: a single node has no Vertex separator.
R.enum(S, G, n1s, n2s, verbose = True, Lmax = 10)

### All nodes

In [4]:
pbar = ProgressBar()
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 10)

  3% |##                                                                      |

k = 1000


  9% |######                                                                  |

k = 2000


 15% |##########                                                              |

k = 3000


 25% |##################                                                      |

k = 4000


 37% |##########################                                              |

k = 5000


 55% |#######################################                                 |

k = 6000


100% |########################################################################|


## Running an example where we know the truth

In [5]:
import Ground_truth as ground_truth
alpha = 1
Pop, neighbours, pattern, Pheno, edges = ground_truth.generateData1()
Lengths = np.ones(pattern.shape[0])
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
n1s, n2s = G.ns()
n = int(n1s.sum() + n2s.sum())
pbar = ProgressBar()
nNodes = G.pattern.shape[0]
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 500, prune = False)
assert(R.subgraphs.shape[0] == 11)

100% |########################################################################|


In [6]:
Pop, neighbours, pattern, Pheno, edges = ground_truth.generateData2()
Lengths = np.ones(pattern.shape[0])
nNodes = G.pattern.shape[0]
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
n1s, n2s = G.ns()
n = int(n1s.sum() + n2s.sum())
pbar = ProgressBar()
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 500, prune = False)
assert(R.subgraphs.shape[0] == 7)

100% |########################################################################|


In [7]:
Pop, neighbours, pattern, Pheno, edges = ground_truth.generateData3()
Lengths = np.ones(pattern.shape[0])
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
nNodes = G.pattern.shape[0]
n1s, n2s = G.ns()
n = int(n1s.sum() + n2s.sum())
pbar = ProgressBar()
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 500, prune = False)
assert(R.subgraphs.shape[0] == 12)

100% |########################################################################|


In [8]:
Pop, neighbours, pattern, Pheno, edges = ground_truth.generateData4()
alpha = 1
Lengths = np.ones(pattern.shape[0])
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
nNodes = G.pattern.shape[0]
n1s, n2s = G.ns()
n = int(n1s.sum() + n2s.sum())
pbar = ProgressBar()
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 500, prune = False)
assert(R.subgraphs.shape[0] == 14)

100% |########################################################################|


### Checking that we do find closed subgraphs

In [9]:
Pop, neighbours, pattern, Pheno, edges = toyDataset.generateToyDataset(n=2, N = 10)
Lengths = np.ones(pattern.shape[0])
Pop = np.zeros((4,), dtype = np.int)
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
n1s, n2s = G.ns()
n = int(n1s.sum() + n2s.sum())
pbar = ProgressBar()
nNodes = G.pattern.shape[0]
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 500, prune = True)

for S in R.subgraphs:
    if not S == None:
        for ne in S.neighbours:
            in_closure = ((G.pattern[int(ne)] | S.ys) == S.ys)
            in_closure = all(in_closure)
            assert(not in_closure)

100% |########################################################################|


In [12]:
neighbours, neighbours, pattern, Pheno, edges = toyDataset.generateToyDataset(n = 2, N = 20)
Lengths = np.ones(pattern.shape[0])
Pop = np.zeros((4,), dtype = np.int)
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
n1s, n2s = G.ns()
n = int(n1s.sum() + n2s.sum())
pbar = ProgressBar()
nNodes = G.pattern.shape[0]
R = testables(alpha, nNodes, 10 ** 7)
S = graph(n = n)
for s in pbar(np.arange(np.shape(G.pattern)[0])):
        if R.stop():
            break
        S = graph(n = n)
        explored = check_itemtable(s, G.pattern[s,:], R.itemtable)
        if explored:
            next
        S.add_node(s, G.pattern[s,:], neighbours = G.neighbours[s],
                   length = G.lengths[s])
        # Then we go on enumerating all pairs.
        # Note: a single node has no Vertex separator.
        R.enum(S, G, n1s, n2s, verbose = True, Lmax = 500, prune = False)

for S in R.subgraphs:
    if not S == None:
        for ne in S.neighbours:
            in_closure = ((G.pattern[int(ne)] | S.ys) == S.ys)
            in_closure = all(in_closure)
            assert(not in_closure)

100% |########################################################################|
