In [1]:
from code_analysis import CFG, CFGReader
import json
import os

In [2]:
def get_teinted_gen_node(cfg : CFG, def_node : int, refs : set, in_teinted :dict[int,set], def_ref_pairs, sources, filters,safe): #in_teinted is the source at the init and at the init we don't check the def_ref_pairs
    gen =  set()
    if def_node in sources:
        gen.add(def_node)
        return gen
    #check for all dependant node in the expresssion
    bin_op_node = cfg.get_children(def_node)[0]
    nodes_to_check = cfg.get_op_hands(bin_op_node)
    nodes_to_check = set(nodes_to_check).difference(set([def_node]))
    
    while len(nodes_to_check) > 0:
        node = nodes_to_check.pop()
        if node in sources:
            gen.add(def_node)
            break
        elif node in safe:
            continue
        elif node in filters : 
            continue
        elif cfg.get_type(node) == "RetValue":
            #add parent to the nodes to check
            nodes_to_check.add(cfg.get_parents(node)[0])
        elif cfg.get_type(node) == "CallEnd":
            #add the call begin node to the nodes to check
            call_begin = cfg.get_call_begin(node)
            #add args to the nodes to check
            nodes_to_check = nodes_to_check.union(cfg.get_children(call_begin))
        elif cfg.get_type(node) == "BinOP" :
            nodes_to_check = nodes_to_check.union(set(cfg.get_op_hands(node)))
        elif cfg.get_type(node) == "Variable" :
            if node in refs:
                #get_pairs for the node
                dj = set([pair[0] for pair in def_ref_pairs if pair[1] == node])
                if in_teinted[node].intersection(dj) != set():
                    gen.add(def_node)
                    break
    return gen
                    

            
                    
def get_teinted_kill(cfg : CFG,defs : set):
    kill = {node : set() for node in cfg.get_node_ids()}
    
    nodes_var_name = {node : cfg.get_image(node) for node in defs}
    for node in defs:
        var_name = nodes_var_name[node]
        kill[node] = set([n for n in defs if nodes_var_name[n] == var_name])
    return kill
def get_predecessors(cfg : CFG, node : int):
    #if node is a CallEnd node, we need to get the predecessors of the CallStart node
    if cfg.get_type(node) == "CallEnd":
        return set([cfg.get_call_begin(node)])
    else:
        return set(cfg.get_parents(node))

def Possibly_Tainted_Definitions(cfg : CFG, 
                                    defs : set , 
                                    refs : set, 
                                    pairs : list, 
                                    sources : set, 
                                    filter : set, 
                                    safe : set
                                    ) -> list:
    IN, OUT = {}, {}
    for node in cfg.get_node_ids():
        IN[node], OUT[node] = set(), set()
    
    gen = {node : get_teinted_gen_node(cfg, node, refs, IN, pairs, sources, filter,safe) if node in defs else set() for node in cfg.get_node_ids() }
    kill = get_teinted_kill(cfg,defs)
    changes = True
    while changes :
        changes = False
        for node in cfg.get_node_ids() :
            predecessors = get_predecessors(cfg, node)
            global_union = set()
            for pred in predecessors :
                global_union = global_union.union(OUT[pred])
                
            IN[node] = global_union
            old_OUT = OUT[node]
            if node in defs :
                gen[node] = get_teinted_gen_node(cfg, node, refs, IN, pairs, sources, filter,safe)
            OUT[node] = gen[node].union(IN[node] - kill[node])
            if OUT[node] != old_OUT :
                changes = True
    return IN, OUT

In [3]:
def analyse_file(file_path):
    cfg = CFGReader().read_cfg(file_path)
    parameters_path = file_path.replace(".cfg.json",".taint.json")
    parameters = json.load(open(parameters_path))
    defs = set(parameters["defs"])
    refs = set(parameters["refs"])
    pairs = parameters["pairs"]
    sources = set(parameters["sources"])
    filters = set(parameters["filters"])
    safe = set(parameters["safes"])
    IN, OUT = Possibly_Tainted_Definitions(cfg, defs, refs, pairs, sources, filters, safe)
    print(f"File : {file_path}")
    for node in cfg.get_node_ids():
        print(f"Node {node} : {IN[node]} -> {OUT[node]}")
    print("\n\n")
    

    

In [4]:
##get all the .cfg files from the directory
dir = "../part_1/"
files = sorted(os.listdir(dir))
cfgs = []
for file in files:
    if file.endswith(".cfg.json"):
        analyse_file(dir + file)
        
##parse the .cfg files

File : ../part_1/file_1.php.cfg.json
Node 99 : set() -> set()
Node 100 : {109} -> {109}
Node 101 : set() -> set()
Node 102 : set() -> set()
Node 103 : set() -> set()
Node 104 : set() -> set()
Node 105 : {109} -> {109}
Node 106 : set() -> set()
Node 107 : set() -> set()
Node 108 : set() -> set()
Node 109 : set() -> {109}
Node 110 : {109} -> {109}
Node 111 : {109} -> {109}
Node 112 : {109} -> {109}
Node 113 : {109} -> {109}
Node 114 : {109} -> {109}
Node 115 : {109} -> {109}
Node 116 : {109} -> {109}
Node 117 : {109} -> {109}
Node 118 : {109} -> {109}
Node 119 : {109} -> {109}
Node 120 : {109} -> {109}
Node 121 : {109} -> {109}
Node 122 : {109} -> {109}
Node 123 : {109} -> {109}
Node 124 : {109} -> {109}
Node 125 : {109} -> {109}



File : ../part_1/file_2.php.cfg.json
Node 64 : {40, 47} -> {40, 47}
Node 65 : {40, 47} -> {40, 47}
Node 66 : {40, 47} -> {40, 47}
Node 67 : {40, 47} -> {40, 47}
Node 68 : {40, 47} -> {40, 47}
Node 30 : set() -> set()
Node 31 : {40, 47} -> {40, 47}
Node 32 : s