# LAB5 : Analyse de teinte 

## Partie 1 :

### Implementation de l'algorithme possibly tainted definitions

In [17]:
from code_analysis import CFG 
from code_analysis import CFGReader
from code_analysis import AST
from code_analysis import ASTReader
import json

# definition of a recursiv function to retrieve the node ids of all references in an expression
def referencesExpression (cfg : CFG, node, ref):
    
    if cfg.get_type(node) == 'BinOP':
        referencesExpression(cfg, cfg.get_op_hands(node)[0], ref)
        referencesExpression(cfg, cfg.get_op_hands(node)[1], ref)
    else : 
        ref.append(node)
    return ref

# definition of the function possibly tainted definitions, take as arguments the cfg 
# and the path of the taint file
def poss_tainted_defs(cfg : CFG , taintFilePath) :

    # load the JSON
    taint_file = open(taintFilePath)
    data = json.load(taint_file)

    # extract the file content in variables  
    defs = data['defs']
    refs = data['refs']
    pairs = data['pairs']
    sinks = data['sinks']
    filters = data['filters']
    safes = data['safes']
    sources = data['sources']

    # retrieve the cfg node ids
    nodeSet = cfg.get_node_ids()

    # initialize IN, OUT, tainted_KILL and GEN with empty dict
    IN = {}
    OUT = {}
    tainted_KILL = {}
    tainted_GEN = {}

    # initialize the value of IN, OUT, tainted_KILL and GEN for all nodes 
    # to an empty set 
    for node in nodeSet :
        
        IN[node] = set()
        OUT[node] = set()
        tainted_GEN[node] = set()
        tainted_KILL[node] = set()
       
    # initialize variables old_OUT and changes
    old_OUT = {}
    changes = True
    
    while changes :

        changes = False

        # for each node in the cfg 
        for node in nodeSet :
            
            # if node is a definition then check the right side if it is a source then the definition 
            # is tainted, if it correspond to a filter or safe then nothing happen, otherwise the right 
            # side is an expression then check for all references if one is tainted the definition will be 
            # tainted as well     
            if cfg.get_type(node) == 'BinOP' and cfg.get_image(node) == '=' :
                
                left, right = cfg.get_op_hands(node)
                
                if right in sources :
                    tainted_GEN[node].add(left)
                elif right in safes :
                    tainted_GEN[node] = set()
                elif right in filters :
                    tainted_GEN[node] = set()
                else : 
                    refExpr = referencesExpression (cfg , right, [])
                    for ref in refExpr :
                        for defRef in pairs :
                            definition , reference = defRef
                            if reference == ref and definition in IN[node]:    
                                tainted_GEN[node].add(left)
                                break
                
                # a definition kills all other definitions with the same left side
                for definition in defs : 
                    if definition == left :
                        tainted_KILL[node].add(definition) 
            
            # set the predecessor nodes
            predNodes = []
            if cfg.get_type(node) == 'CallEnd':
                predNodes.append(cfg.get_call_begin(node))
            else :
                predNodes = cfg.get_parents(node)
            
            # update IN, IN = IN U OUT
            for predNode in predNodes :
                IN[node] = IN[node].union(OUT[predNode])

            old_OUT[node] = OUT [node]

            # update OUT, OUT = GEN U ( IN \ KILL ) 
            OUT[node] = tainted_GEN[node].union(IN[node].difference(tainted_KILL[node]))

            if OUT[node] !=  old_OUT[node]:
                changes = True
        
    return [IN,OUT]

# function definition  for printing the pairs (def,ref) tainted
def print_poss_tainted_defrefs(cfg : CFG, ast : AST, taintFilePath) :

    # load the JSON
    taint_file = open(taintFilePath)
    data = json.load(taint_file)

    # extract the file content in variables  
    defs = data['defs']
    refs = data['refs']
    pairs = data['pairs']
    sinks = data['sinks']
    filters = data['filters']
    safes = data['safes']
    sources = data['sources']

    IN,OUT = poss_tainted_defs(cfg, taintFilePath)

    for sink in sinks : 
        for pair in pairs :
            definition, reference = pair
            if sink == reference and definition in IN[sink]:
                print(f" La paire  ( definition : '{cfg.get_image(definition)}' ligne {ast.get_position(cfg.get_node_ast_ptr(definition))[0]}, reference : '{cfg.get_image(reference)}' ligne {ast.get_position(cfg.get_node_ast_ptr(reference))[0]} ) est teintee \n" )

    print(f" IN : {IN} ")
    print(f" OUT : {OUT} \n")

### Test partie 1

In [18]:
# File 1
cfg_file1 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_1.php.cfg.json')
ast_file1 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_1.php.ast.json')
taint_file1 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_1.php.taint.json'

print_poss_tainted_defrefs(cfg_file1, ast_file1, taint_file1)

 La paire  ( definition : 'tainted' ligne 3, reference : 'tainted' ligne 6 ) est teintee 

 IN : {99: set(), 100: {109}, 101: set(), 102: set(), 103: set(), 104: set(), 105: set(), 106: set(), 107: set(), 108: set(), 109: set(), 110: {109}, 111: {109}, 112: {109}, 113: {109}, 114: {109}, 115: {109}, 116: {109}, 117: {109}, 118: {109}, 119: {109}, 120: {109}, 121: {109}, 122: {109}, 123: {109}, 124: {109}, 125: {109}} 
 OUT : {99: set(), 100: {109}, 101: set(), 102: set(), 103: set(), 104: set(), 105: {109}, 106: set(), 107: set(), 108: set(), 109: set(), 110: {109}, 111: {109}, 112: {109}, 113: {109}, 114: {109}, 115: {109}, 116: {109}, 117: {109}, 118: {109}, 119: {109}, 120: {109}, 121: {109}, 122: {109}, 123: {109}, 124: {109}, 125: {109}} 



In [19]:
# File 2
cfg_file2 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_2.php.cfg.json')
ast_file2 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_2.php.ast.json')
taint_file2 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_2.php.taint.json'

print_poss_tainted_defrefs(cfg_file2, ast_file2, taint_file2)

 La paire  ( definition : 'tainted2' ligne 5, reference : 'tainted2' ligne 9 ) est teintee 

 IN : {64: {40, 47}, 65: {40, 47}, 66: {40, 47}, 67: {40, 47}, 68: {40, 47}, 30: set(), 31: {40, 47}, 32: set(), 33: set(), 34: set(), 35: set(), 36: set(), 37: set(), 38: set(), 39: set(), 40: set(), 41: {40}, 42: {40}, 43: {40}, 44: {40}, 45: {40}, 46: {40}, 47: {40}, 48: {40, 47}, 49: {40, 47}, 50: {40, 47}, 51: {40, 47}, 52: {40, 47}, 53: {40, 47}, 54: {40, 47}, 55: {40, 47}, 56: {40, 47}, 57: {40, 47}, 58: {40, 47}, 59: {40, 47}, 60: {40, 47}, 61: {40, 47}, 62: {40, 47}, 63: {40, 47}} 
 OUT : {64: {40, 47}, 65: {40, 47}, 66: {40, 47}, 67: {40, 47}, 68: {40, 47}, 30: set(), 31: {40, 47}, 32: set(), 33: set(), 34: set(), 35: set(), 36: {40}, 37: set(), 38: set(), 39: set(), 40: set(), 41: {40, 47}, 42: {40}, 43: {40}, 44: {40}, 45: {40}, 46: {40}, 47: {40}, 48: {40, 47}, 49: {40, 47}, 50: {40, 47}, 51: {40, 47}, 52: {40, 47}, 53: {40, 47}, 54: {40, 47}, 55: {40, 47}, 56: {40, 47}, 57: {40, 4

In [20]:
# File 3
cfg_file3 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_3.php.cfg.json')
ast_file3 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_3.php.ast.json')
taint_file3 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_3.php.taint.json'

print_poss_tainted_defrefs(cfg_file3, ast_file3, taint_file3)

 IN : {1: set(), 2: {11}, 3: set(), 4: set(), 5: set(), 6: set(), 7: set(), 8: set(), 9: set(), 10: set(), 11: set(), 12: {11}, 13: {11}, 14: {11}, 15: {11}, 16: {11}, 17: {11}, 18: {11}, 19: {11}, 20: {11}, 21: {11}, 22: {11}, 23: {11}, 24: {11}, 25: {11}, 26: {11}, 27: {11}, 28: {11}, 29: {11}} 
 OUT : {1: set(), 2: {11}, 3: set(), 4: set(), 5: set(), 6: set(), 7: {11}, 8: set(), 9: set(), 10: set(), 11: set(), 12: {11}, 13: {11}, 14: {11}, 15: {11}, 16: {11}, 17: {11}, 18: {11}, 19: {11}, 20: {11}, 21: {11}, 22: {11}, 23: {11}, 24: {11}, 25: {11}, 26: {11}, 27: {11}, 28: {11}, 29: {11}} 



In [21]:
# File 4
cfg_file4 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_4.php.cfg.json')
ast_file4 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_4.php.ast.json')
taint_file4 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_4.php.taint.json'

print_poss_tainted_defrefs(cfg_file4, ast_file4, taint_file4)

 La paire  ( definition : 'tainted' ligne 3, reference : 'tainted' ligne 7 ) est teintee 

 IN : {69: set(), 70: {76}, 71: set(), 72: set(), 73: set(), 74: set(), 75: set(), 76: set(), 77: {76}, 78: {76}, 79: {76}, 80: {76}, 81: {76}, 82: {76}, 83: {76}, 84: {76}, 85: {76}, 86: {76}, 87: {76}, 88: {76}, 89: {76}, 90: {76}, 91: {76}, 92: {76}, 93: {76}, 94: {76}, 95: {76}, 96: {76}, 97: {76}, 98: {76}} 
 OUT : {69: set(), 70: {76}, 71: set(), 72: {76}, 73: set(), 74: set(), 75: set(), 76: set(), 77: {76}, 78: {76}, 79: {76}, 80: {76}, 81: {76}, 82: {76}, 83: {76}, 84: {76}, 85: {76}, 86: {76}, 87: {76}, 88: {76}, 89: {76}, 90: {76}, 91: {76}, 92: {76}, 93: {76}, 94: {76}, 95: {76}, 96: {76}, 97: {76}, 98: {76}} 



In [22]:
# File 5
cfg_file5 = CFGReader().read_cfg('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_5.php.cfg.json')
ast_file5 = ASTReader().read_ast('/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_5.php.ast.json')
taint_file5 =  '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_1/file_5.php.taint.json'

print_poss_tainted_defrefs(cfg_file5, ast_file5, taint_file5)

 La paire  ( definition : 'line' ligne 9, reference : 'line' ligne 23 ) est teintee 

 La paire  ( definition : 'line' ligne 15, reference : 'line' ligne 23 ) est teintee 

 IN : {1: set(), 2: set(), 3: set(), 4: set(), 5: {32, 66, 40, 21}, 6: set(), 7: set(), 8: set(), 9: set(), 10: set(), 11: set(), 12: set(), 13: set(), 14: set(), 15: set(), 16: set(), 17: set(), 18: set(), 19: set(), 20: set(), 21: set(), 22: {32, 66, 40, 21}, 23: {32, 66, 40, 21}, 24: {32, 66, 40, 21}, 25: {32, 66, 40, 21}, 26: {32, 66, 40, 21}, 27: {32, 66, 40, 21}, 28: {32, 66, 40, 21}, 29: {32, 66, 40, 21}, 30: {32, 66, 40, 21}, 31: {32, 66, 40, 21}, 32: {32, 66, 40, 21}, 33: {32, 66, 40, 21}, 34: {32, 66, 40, 21}, 35: {32, 66, 40, 21}, 36: {32, 66, 40, 21}, 37: {32, 66, 40, 21}, 38: {32, 66, 40, 21}, 39: {32, 66, 40, 21}, 40: {32, 66, 40, 21}, 41: {32, 66, 40, 21}, 42: {32, 66, 40, 21}, 43: {32, 66, 40, 21}, 44: {32, 66, 40, 21}, 45: {32, 66, 40, 21}, 46: {32, 66, 40, 21}, 47: {32, 66, 40, 21}, 48: {32, 66, 40

## Partie 2 :

In [23]:
# Prepare paths of cfg ast and taint of all files

files_paths = [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/about.php' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/contact.php',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/departments.php',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/index.php', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/define.php', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/footer.php', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/includes/header.php']

cfg_paths = [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/about.php.cfg.json' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/contact.php.cfg.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/departments.php.cfg.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/index.php.cfg.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/define.php.cfg.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/footer.php.cfg.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/header.php.cfg.json']

ast_paths = [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/about.php.ast.json' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/contact.php.ast.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/departments.php.ast.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/index.php.ast.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/define.php.ast.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/footer.php.ast.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/header.php.ast.json']

taint_paths =  [ '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/about.php.taint.json' ,
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/contact.php.taint.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/departments.php.taint.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/index.php.taint.json',
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/define.php.taint.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/footer.php.taint.json', 
              '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app.cfg/includes/header.php.taint.json' ]

In [24]:

for i in range(7) :
    print (f"Les pairs def-refs possiblement teintee du fichier '{files_paths[i]}' sont : \n" )
    print_poss_tainted_defrefs(CFGReader().read_cfg(cfg_paths[i]),ASTReader().read_ast(ast_paths[i]),taint_paths[i])

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/about.php' sont : 

 IN : {1: set(), 2: set(), 3: set(), 4: set(), 5: set(), 6: set(), 7: set(), 8: set(), 9: set(), 10: set(), 11: set(), 12: set(), 13: set()} 
 OUT : {1: set(), 2: set(), 3: set(), 4: set(), 5: set(), 6: set(), 7: set(), 8: set(), 9: set(), 10: set(), 11: set(), 12: set(), 13: set()} 

Les pairs def-refs possiblement teintee du fichier '/mnt/c/Users/Othman/Desktop/TP-LOG6302/log6302a_lab5/part_2/app/contact.php' sont : 

 IN : {14: set(), 15: set(), 16: set(), 17: set(), 18: set(), 19: set(), 20: set(), 21: {59, 76, 93}, 22: set(), 23: set(), 24: set(), 25: set(), 26: set(), 27: set(), 28: set(), 29: set(), 30: set(), 31: set(), 32: set(), 33: set(), 34: set(), 35: set(), 36: set(), 37: set(), 38: set(), 39: set(), 40: set(), 41: set(), 42: set(), 43: set(), 44: set(), 45: set(), 46: set(), 47: set(), 48: set(), 49: set(), 50: set(), 51: set(), 52: set(