# LAB5 : Analyse de teinte 

## Partie 1 :

### Implementation de l'algorithme possibly tainted definitions

In [20]:
from code_analysis import CFG 
from code_analysis import CFGReader
from code_analysis import AST
from code_analysis import ASTReader
import json

# definition of a recursiv function to retrieve the node ids of all references in an expression
def referencesExpression (cfg : CFG, node, ref):
    
    if cfg.get_type(node) == 'BinOP':
        referencesExpression(cfg, cfg.get_op_hands(node)[0], ref)
        referencesExpression(cfg, cfg.get_op_hands(node)[1], ref)
    else : 
        ref.append(node)
    return ref

# definition of the function possibly tainted definitions, take as arguments the cfg 
# and the path of the taint file
def poss_tainted_defs(cfg : CFG , taintFilePath) :

    # load the JSON
    taint_file = open(taintFilePath)
    data = json.load(taint_file)

    # extract the file content in variables  
    defs = data['defs']
    refs = data['refs']
    pairs = data['pairs']
    sinks = data['sinks']
    filters = data['filters']
    safes = data['safes']
    sources = data['sources']

    # retrieve the cfg node ids
    nodeSet = cfg.get_node_ids()

    # initialize IN, OUT, tainted_KILL and GEN with empty dict
    IN = {}
    OUT = {}
    tainted_KILL = {}
    tainted_GEN = {}

    # initialize the value of IN, OUT, tainted_KILL and GEN for all nodes 
    # to an empty set 
    for node in nodeSet :
        
        IN[node] = set()
        OUT[node] = set()
        tainted_GEN[node] = set()
        tainted_KILL[node] = set()
       
    # initialize variables old_OUT and changes
    old_OUT = {}
    changes = True
    
    while changes :

        changes = False

        # for each node in the cfg 
        for node in nodeSet :
            
            # if node is a definition then check the right side if it is a source then the definition 
            # is tainted, if it correspond to a filter or safe then nothing happen, otherwise the right 
            # side is an expression then check for all references if one is tainted the definition will be 
            # tainted as well     
            if cfg.get_type(node) == 'BinOP' and cfg.get_image(node) == '=' :
                
                left, right = cfg.get_op_hands(node)
                
                if right in sources :
                    tainted_GEN[node].add(left)
                elif right in safes :
                    tainted_GEN[node] = set()
                elif right in filters :
                    tainted_GEN[node] = set()
                else : 
                    refExpr = referencesExpression (cfg , right, [])
                    for ref in refExpr :
                        for defRef in pairs :
                            definition , reference = defRef
                            if reference == ref and definition in IN[node]:    
                                tainted_GEN[node].add(left)
                                break
                
                # a definition kills all other definitions with the same left side
                for definition in defs : 
                    if definition == left :
                        tainted_KILL[node].add(definition) 
            
            # set the predecessor nodes
            predNodes = []
            if cfg.get_type(node) == 'CallEnd':
                predNodes.append(cfg.get_call_begin(node))
            else :
                predNodes = cfg.get_parents(node)
            
            # update IN, IN = IN U OUT
            for predNode in predNodes :
                IN[node] = IN[node].union(OUT[predNode])

            old_OUT[node] = OUT [node]

            # update OUT, OUT = GEN U ( IN \ KILL ) 
            OUT[node] = tainted_GEN[node].union(IN[node].difference(tainted_KILL[node]))

            if OUT[node] !=  old_OUT[node]:
                changes = True
        
    return IN

# function definition  for printing the pairs (def,ref) tainted
def print_poss_tainted_defrefs(cfg : CFG, ast : AST, taintFilePath) :

    # load the JSON
    taint_file = open(taintFilePath)
    data = json.load(taint_file)

    # extract the file content in variables  
    defs = data['defs']
    refs = data['refs']
    pairs = data['pairs']
    sinks = data['sinks']
    filters = data['filters']
    safes = data['safes']
    sources = data['sources']

    IN = poss_tainted_defs(cfg, taintFilePath)

    for sink in sinks : 
        for pair in pairs :
            definition, reference = pair
            if sink == reference and definition in IN[sink]:
                print(f" La paire  ( definition : '{cfg.get_image(definition)}' ligne {ast.get_position(cfg.get_node_ast_ptr(definition))[0]}, reference : '{cfg.get_image(reference)}' ligne {ast.get_position(cfg.get_node_ast_ptr(reference))[0]} ) est teintee \n" )


### Test partie 1

In [21]:
# File 1
cfg_file1 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_1.php.cfg.json')
ast_file1 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_1.php.ast.json')
taint_file1 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_1.php.taint.json'

print_poss_tainted_defrefs(cfg_file1, ast_file1, taint_file1)

 La paire  ( definition : 'tainted' ligne 3, reference : 'tainted' ligne 6 ) est teintee 



In [22]:
# File 2
cfg_file2 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_2.php.cfg.json')
ast_file2 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_2.php.ast.json')
taint_file2 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_2.php.taint.json'

print_poss_tainted_defrefs(cfg_file2, ast_file2, taint_file2)

 La paire  ( definition : 'tainted2' ligne 5, reference : 'tainted2' ligne 9 ) est teintee 



In [23]:
# File 3
cfg_file3 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_3.php.cfg.json')
ast_file3 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_3.php.ast.json')
taint_file3 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_3.php.taint.json'

print_poss_tainted_defrefs(cfg_file3, ast_file3, taint_file3)

In [24]:
# File 4
cfg_file4 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_4.php.cfg.json')
ast_file4 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_4.php.ast.json')
taint_file4 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_4.php.taint.json'

print_poss_tainted_defrefs(cfg_file4, ast_file4, taint_file4)

 La paire  ( definition : 'tainted' ligne 3, reference : 'tainted' ligne 7 ) est teintee 



In [25]:
# File 5
cfg_file5 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_5.php.cfg.json')
ast_file5 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_5.php.ast.json')
taint_file5 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_5.php.taint.json'

print_poss_tainted_defrefs(cfg_file4, ast_file4, taint_file4)

 La paire  ( definition : 'tainted' ligne 3, reference : 'tainted' ligne 7 ) est teintee 



## Partie 2 :

In [26]:
# Prepare paths of cfg ast and taint of all files

files_paths = [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/about.php' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/contact.php',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/departments.php',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/index.php', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/define.php', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/footer.php', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/header.php']

cfg_paths = [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/about.php.cfg.json' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/contact.php.cfg.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/departments.php.cfg.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/index.php.cfg.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/define.php.cfg.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/footer.php.cfg.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/header.php.cfg.json']

ast_paths = [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/about.php.ast.json' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/contact.php.ast.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/departments.php.ast.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/index.php.ast.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/define.php.ast.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/footer.php.ast.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/header.php.ast.json']

taint_paths =  [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/about.php.taint.json' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/contact.php.taint.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/departments.php.taint.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/index.php.taint.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/define.php.taint.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/footer.php.taint.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/header.php.taint.json' ]

In [27]:
for i in range(7) :
    print (f"Les pairs def-refs possiblement teintee du fichier '{files_paths[i]}' sont : \n" )
    print_poss_tainted_defrefs(CFGReader().read_cfg(cfg_paths[i]),ASTReader().read_ast(ast_paths[i]),taint_paths[i])

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/about.php' sont : 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/contact.php' sont : 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/departments.php' sont : 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/index.php' sont : 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/define.php' sont : 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/footer.php' sont : 

 La paire  ( definition : 'sql' ligne 33, reference : 'sql' ligne 36 ) est teintee 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman