# Testing the code

This notebook is used to flesh out simple programming errors when writing code. It therefore focuses on the graph building part of the graph, and not on the testable part. Note that we assume in the code that the graph is connected, i.e that there is a path between any two nodes in the graph. The case of non-connected graphs is more complex but, fortunatly, it does not concern us.


# Creating a toy dataset

In [1]:
import sys, os, random
import numpy as np
import pandas as pd
from caldera.Exploration._Subgraphs import *
from caldera.Exploration._ExploreDFS import *

In [2]:
import caldera.data._toyData as toyDataset
Pop, neighbours, pattern, Pheno, edges = toyDataset.generateToyDataset()
alpha = 10 ** -5
Lengths = np.ones(pattern.shape[0])
G = structure(Pop, neighbours, pattern, Pheno, Lengths)
n1s, n2s = G.ns()
TH = chi2.isf(alpha / 1, 1)
nNodes = G.lengths.shape[0]

## Helper functions
### Start

In [3]:
random.seed(209)
nodes = random.sample(range(nNodes), 20)
nodes = [node for node in nodes if len(G.neighbours[node]) > 0]
C = [create_Graph(node, G.Pop, G.pattern, G.neighbours, G.lengths, TH, n1s, n2s) for node in nodes]
assert(len(C) == len(nodes))
C = np.array([S for S in C if S.Env >= TH or not S.prunable], dtype = graph)
assert(C.shape[0] <= len(nodes))

### Childrens

In [4]:
for i in range(C.shape[0]):
    S = C[i]
    childrens = S.Childrens(G.Pop, G.pattern, G.neighbours, G.lengths, TH, n1s, n1s)
    for child in childrens:
        assert(child.Env >= TH)

## Exploring

(Note that adding a Lmax forces the exploration to be kind of BFS)

In [5]:
S = graph()
S.new_graph(0, G.Pop, G.pattern, G.neighbours, G.lengths, TH, n1s, n2s)
sols = solutions(.05, nNodes, Lmax = 10, kmax = 1000) 
sols.enum(S, G, True, n1s, n2s)
print("Done")

Done


## Check

In [6]:
S = graph()
S.new_graph(2, G.Pop, G.pattern, G.neighbours, G.lengths, TH, n1s, n2s)
sols = solutions(.05, nNodes, Lmax = 500, kmax = 1000) 
sols.enum(S, G, True, n1s, n2s)
print("Done")

Done


In [7]:
sols = solutions(0.05, nNodes, Lmax = 500, kmax = 100000)
candidates = start(range(nNodes), G.Pop, G.pattern, G.neighbours, G.lengths,
                   sols.TH, n1s, n2s)
for S in candidates:
    sols.enum(S, G, True, n1s, n2s)

k = 1000
k = 2000
k = 3000
k = 4000
k = 5000
k = 6000
