##### Imports

In [1]:
import sys, os
from digraph import *
from equivalenceClass import *
from topoSorts import *
from algorithmTime import *
from recursiveFormula import *
from naiveFormula import *
import pandas as pd
#TODO: Mantener una consistencia entre el Camel Cases o el Snake Case, revisar que se usa en Python

# Variable to store the number of hits in the memoization table

### Testing functions

In [2]:
def assertEquivalenceClassesForNode(dag: nx.DiGraph, feature_node, all_topo_sorts: List[List[Any]], timing_dict: Dict[str, Dict[str, float]]):
    
    nodes_classification = {}
    unr_roots = classifyNodes(dag, feature_node, nodes_classification)
    hasher = TopoSortHasher(nodes_classification)

    # Naive approach
    start_time = time.time()
    naiveClassesSizes = naiveEquivalenceClassesSizes(all_topo_sorts, feature_node, hasher)
    end_time = time.time()
    timing_dict[feature_node]['Naive Formula'] = end_time - start_time

    # Recursive approach
    start_time = time.time()
    recursiveClassesSizes = recursiveEquivalenceClassesSizes(dag, unr_roots, hasher, feature_node, nodes_classification)
    end_time = time.time()
    timing_dict[feature_node]['Recursive Formula'] = end_time - start_time

    timing_dict[feature_node]['Number of equivalence classes'] = len(naiveClassesSizes.keys())
    
    # Assert that each equivalence class has the same number of elements.

    naiveEqClasses = len(naiveClassesSizes.keys())
    recursiveEqClasses = len(recursiveClassesSizes.keys())
    if naiveEqClasses != recursiveEqClasses and numberOfEquivalenceClasses(dag, feature_node) != naiveEqClasses:
        raise AssertionError(f"The number of equivalence classes is different. \n Naive Approach: {naiveEqClasses}, Recursive Approach: {recursiveEqClasses} \n Feature Node: {feature_node}")

    assertTopoSortsAndEquivalenceClasses(dag, feature_node, recursiveClassesSizes)

    for eqClassHash in naiveClassesSizes.keys():
        clSize1 = naiveClassesSizes[eqClassHash][1]
        clTopo1 = naiveClassesSizes[eqClassHash][0]
        try: 
            clSize2 = recursiveClassesSizes[eqClassHash][1]
            clTopo2 = recursiveClassesSizes[eqClassHash][0]
        except KeyError:
            raise AssertionError(f"The equivalence class {eqClassHash} is not present in the recursive approach. \n Naive Approach: Topo {clTopo1}, Size {clSize1} \n Feature Node: {feature_node}")
        if (clSize1 != clSize2):
            raise AssertionError(f"The sizes of the equivalence classes are not equal. \n Naive Approach: Topo {clTopo1}, Size {clSize1} \n Recursive Approach: Topo {clTopo2}, Size {clSize2} \n Feature Node: {feature_node}")

#TODO: Find a better algorithm than all_topological_sorts, it takes too much time. In the paper they mention a dynamic programming approach, maybe implement that. This takes too much time.

def assertEquivClassesForDag(dag: nx.DiGraph, nodesToEvaluate = None, allSorts = None) -> Dict[str, float]:
    timing_dict = {}
    
    # Measure time for all topological sorts
    start_time = time.time()
    all_topo_sorts = allSorts if allSorts != None else list(nx.all_topological_sorts(dag))
    assert len(all_topo_sorts) == allTopoSorts(dag)
    end_time = time.time()
    timing_dict['Time Of Topological Sorts'] = end_time - start_time
    timing_dict['Number of Topological Sorts'] = len(all_topo_sorts)
    
    nodesToEvaluate = nodesToEvaluate if nodesToEvaluate != None else list(dag.nodes)
    for node in nodesToEvaluate:
            timing_dict[node] = {}
            assertEquivalenceClassesForNode(dag, node, all_topo_sorts, timing_dict)
    
    return timing_dict

## Examples

In [3]:
numNodes = 7

emptyTestGraph = emptyGraph(numNodes)
resEmptyGraph = assertEquivClassesForDag(emptyTestGraph)

naiveBayesTest = naiveBayes(numNodes)
resNaiveBayes = assertEquivClassesForDag(naiveBayesTest)

lengthOfPath = 4
naiveBayesWithPathTest = naiveBayesWithPath(numNodes, lengthOfPath)
resNaiveBayesWithPath = assertEquivClassesForDag(naiveBayesWithPathTest)

numberOfPaths = 3
numNodes = 3
multiplePathsTest = multiplePaths(numNodes, numberOfPaths)
resMultiplePaths = assertEquivClassesForDag(multiplePathsTest)

numLevels = 2
branchingFactor = 3
treeTest = balancedTree(numLevels, branchingFactor)
resTree = assertEquivClassesForDag(treeTest)

def test_allTopos(graph):
    all_topos = allTopoSorts(graph)
    all_topo_sorts = list(nx.all_topological_sorts(graph))
    assert all_topos == len(all_topo_sorts), "allTopos and all_topological_sorts have different lengths"

for graph in [emptyTestGraph, naiveBayesTest, naiveBayesWithPathTest, multiplePathsTest, treeTest]:
    test_allTopos(graph)


## Experimentation

### Auxiliary Functions

In [2]:
printEnabled = True
def disablePrint():
    global printEnabled
    if printEnabled:
        sys._jupyter_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
        printEnabled = False

def enablePrint():
    global printEnabled
    printEnabled = True
    sys.stdout.close()
    sys.stdout = sys._jupyter_stdout

def convertDictToCsv(dict, filename):

    df = pd.DataFrame.from_dict(dict, orient='index')

    # Save the DataFrame to a CSV file
    df.to_csv(filename)

### Time Naive Bayes

In [5]:
def timeMultipleNaiveBayes(numNodes, pathLength, startFrom = 1):
    graphsResults = {}
    for i in range(startFrom,numNodes+1):
        for j in range(startFrom,pathLength+1):
            graphToEvaluate = naiveBayesWithPath(i, j)
            nodesToEvaluate = list(range(i+j-8, i+j))
            print(f'{i} Children, {j} PathLength' + str(nodesToEvaluate))
            graphsResults[f'{i} Children, {j} PathLength'] = measureGraphTime(graphToEvaluate, nodesToEvaluate)
            convertDictToCsv(graphsResults, 'naiveBayes.csv')
    return graphsResults
    

numNodes = 20
pathLenght = 20
        
resNaiveBayes = timeMultipleNaiveBayes(numNodes, pathLenght,10)
convertDictToCsv(resNaiveBayes, 'naiveBayes.csv')

10 Children, 10 PathLength[12, 13, 14, 15, 16, 17, 18, 19]
Running for node 12 which has 256 equivalence classes
Node 12 took 0.03303718566894531 seconds to run
Running for node 13 which has 256 equivalence classes
Node 13 took 0.024445772171020508 seconds to run
Running for node 14 which has 256 equivalence classes
Node 14 took 0.02159738540649414 seconds to run
Running for node 15 which has 256 equivalence classes
Node 15 took 0.024441003799438477 seconds to run
Running for node 16 which has 256 equivalence classes
Node 16 took 0.025638103485107422 seconds to run
Running for node 17 which has 256 equivalence classes
Node 17 took 0.025960922241210938 seconds to run
Running for node 18 which has 256 equivalence classes
Node 18 took 0.02630329132080078 seconds to run
Running for node 19 which has 256 equivalence classes
Node 19 took 0.020450353622436523 seconds to run
10 Children, 11 PathLength[13, 14, 15, 16, 17, 18, 19, 20]
Running for node 13 which has 256 equivalence classes
Node 13

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f16085d69e0>>
Traceback (most recent call last):
  File "/home/echu/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


Node 29 took 0.03188967704772949 seconds to run
11 Children, 10 PathLength[13, 14, 15, 16, 17, 18, 19, 20]
Running for node 13 which has 512 equivalence classes
Node 13 took 0.09970974922180176 seconds to run
Running for node 14 which has 512 equivalence classes
Node 14 took 0.04614067077636719 seconds to run
Running for node 15 which has 512 equivalence classes
Node 15 took 0.05052042007446289 seconds to run
Running for node 16 which has 512 equivalence classes
Node 16 took 0.05453991889953613 seconds to run
Running for node 17 which has 512 equivalence classes
Node 17 took 0.054347991943359375 seconds to run
Running for node 18 which has 512 equivalence classes
Node 18 took 0.05581474304199219 seconds to run
Running for node 19 which has 512 equivalence classes
Node 19 took 0.08735346794128418 seconds to run
Running for node 20 which has 512 equivalence classes
Node 20 took 0.057389259338378906 seconds to run
11 Children, 11 PathLength[14, 15, 16, 17, 18, 19, 20, 21]
Running for node

### Time multiple Paths

In [None]:
def timeMultiplePathsGraphs(numPaths, pathLength, startFrom = 1):
    graphsResults = {}
    for i in range(startFrom,numPaths+1):
        for j in range(startFrom,pathLength+1):
            graphToEvaluate = multiplePaths(i, j)
            #drawGraph(graphToEvaluate)
            nodesToEvaluate = list(range(0, j))
            print(f'{i} Paths, {j} Length' + str(nodesToEvaluate))
            graphsResults[f'{i} Paths, {j} Length'] = measureGraphTime(graphToEvaluate, nodesToEvaluate)
            convertDictToCsv(graphsResults, 'multiplePaths.csv')
            print()

    return graphsResults
    

numberOfPaths = 8
pathLenght = 8

resMultiplePaths = timeMultiplePathsGraphs(numberOfPaths, pathLenght,4)
convertDictToCsv(resMultiplePaths, 'multiplePaths.csv')

### Time balanced trees

In [4]:
def timeMultipleBalancedTrees(numLevels, branchingFactor = 2, startLevels = 1, starBranching = 2):
    graphsResults = {}
    for i in range(startLevels,numLevels+1):
        for j in range(starBranching,branchingFactor+1):
            graphToEvaluate = balancedTree(i, j)
            #drawGraph(graphToEvaluate)
            leafNode = [node for node in graphToEvaluate.nodes if isLeaf(node, graphToEvaluate)][0]
            pathToLeaf = orderedNodes(graphToEvaluate, nx.ancestors(graphToEvaluate, leafNode)) + [leafNode]
            print(f'{i} Levels, {j} Branching' + str(pathToLeaf))
            graphsResults[f'{i} Levels, {j} Branching'] = measureGraphTime(graphToEvaluate, pathToLeaf)
            convertDictToCsv(graphsResults, 'balancedTrees.csv')
            

    return graphsResults

numLevels = 6
branchingFactor = 2

resBalancedTrees = timeMultipleBalancedTrees(numLevels, branchingFactor, 2, 2)
convertDictToCsv(resBalancedTrees, 'balancedTrees.csv')


2 Levels, 2 Branching[0, 1]
3 Levels, 2 Branching[0, 1, 3]
4 Levels, 2 Branching[0, 1, 3, 7]
5 Levels, 2 Branching[0, 1, 3, 7, 15]
6 Levels, 2 Branching[0, 1, 3, 7, 15, 31]
