In [238]:
import os

from __future__ import print_function

import json
import sys
import re

# This is not required if you've installed pycparser into
# your site-packages/ with setup.py
#
sys.path.extend(['.', '..'])

from pycparser import parse_file, c_ast
from pycparser.plyparser import Coord


RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
RE_INTERNAL_ATTR = re.compile('__.*__')


class CJsonError(Exception):
    pass


def memodict(fn):
    """ Fast memoization decorator for a function taking a single argument """
    class memodict(dict):
        def __missing__(self, key):
            ret = self[key] = fn(key)
            return ret
    return memodict().__getitem__


@memodict
def child_attrs_of(klass):
    """
    Given a Node class, get a set of child attrs.
    Memoized to avoid highly repetitive string manipulation
    """
    non_child_attrs = set(klass.attr_names)
    all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
    return all_attrs - non_child_attrs


def to_dict(node):
    """ Recursively convert an ast into dict representation. """
    klass = node.__class__

    result = {}

    # Metadata
    result['_nodetype'] = klass.__name__

    # Local node attributes
    for attr in klass.attr_names:
        result[attr] = getattr(node, attr)

    # Coord object
    if node.coord:
        result['coord'] = str(node.coord)
    else:
        result['coord'] = None

    # Child attributes
    for child_name, child in node.children():
        # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
        match = RE_CHILD_ARRAY.match(child_name)
        if match:
            array_name, array_index = match.groups()
            array_index = int(array_index)
            # arrays come in order, so we verify and append.
            result[array_name] = result.get(array_name, [])
            if array_index != len(result[array_name]):
                raise CJsonError('Internal ast error. Array {} out of order. '
                    'Expected index {}, got {}'.format(
                    array_name, len(result[array_name]), array_index))
            result[array_name].append(to_dict(child))
        else:
            result[child_name] = to_dict(child)

    # Any child attributes that were missing need "None" values in the json.
    for child_attr in child_attrs_of(klass):
        if child_attr not in result:
            result[child_attr] = None

    return result


def to_json(node, **kwargs):
    """ Convert ast node to json string """
    return json.dumps(to_dict(node), **kwargs)


def file_to_dict(filename):
    """ Load C file into dict representation of ast """
    ast = parse_file(filename, use_cpp=True,
            cpp_path='clang',
            cpp_args=['-E', r'-Iutils/fake_libc_include'])
    return to_dict(ast)


def file_to_json(filename, **kwargs):
    """ Load C file into json string representation of ast """
    ast = parse_file(filename, use_cpp=True,
            cpp_path='clang',
            cpp_args=['-E', r'-Iutils/fake_libc_include'])
    return to_json(ast, **kwargs)


def _parse_coord(coord_str):
    """ Parse coord string (file:line[:column]) into Coord object. """
    if coord_str is None:
        return None

    vals = coord_str.split(':')
    vals.extend([None] * 3)
    filename, line, column = vals[:3]
    return Coord(filename, line, column)


def _convert_to_obj(value):
    """
    Convert an object in the dict representation into an object.
    Note: Mutually recursive with from_dict.
    """
    value_type = type(value)
    if value_type == dict:
        return from_dict(value)
    elif value_type == list:
        return [_convert_to_obj(item) for item in value]
    else:
        # String
        return value


def from_dict(node_dict):
    """ Recursively build an ast from dict representation """
    class_name = node_dict.pop('_nodetype')

    klass = getattr(c_ast, class_name)

    # Create a new dict containing the key-value pairs which we can pass
    # to node constructors.
    objs = {}
    for key, value in node_dict.items():
        if key == 'coord':
            objs[key] = _parse_coord(value)
        else:
            objs[key] = _convert_to_obj(value)

    # Use keyword parameters, which works thanks to beautifully consistent
    # ast Node initializers.
    return klass(**objs)


def from_json(ast_json):
    """ Build an ast from json string representation """
    return from_dict(json.loads(ast_json))

In [239]:
folderPath = './Testdata/'
fileList = os.listdir(folderPath)

In [240]:
for fileName in fileList:
    ast_dict = file_to_dict(folderPath + fileName)
    ast = from_dict(ast_dict)
    jsonString = to_json(ast,indent=4)
    jsonObject = json.loads(jsonString)
    print(fileName)
    print(json.dumps(jsonObject, indent = 4))
    print("----------------------------------------------------------------------------------------")

test1.c
{
    "_nodetype": "FileAST",
    "coord": null,
    "ext": [
        {
            "_nodetype": "FuncDef",
            "coord": "./Testdata/test1.c:1:5",
            "decl": {
                "_nodetype": "Decl",
                "name": "func",
                "quals": [],
                "storage": [],
                "funcspec": [],
                "coord": "./Testdata/test1.c:1:5",
                "type": {
                    "_nodetype": "FuncDecl",
                    "coord": "./Testdata/test1.c:1:5",
                    "args": {
                        "_nodetype": "ParamList",
                        "coord": "./Testdata/test1.c:1:15",
                        "params": [
                            {
                                "_nodetype": "Decl",
                                "name": "a",
                                "quals": [],
                                "storage": [],
                                "funcspec": [],
                                "

In [241]:
def traverse(json_object):
    if type(json_object) == dict:
        for key in json_object:
            if key == 'ext' or key == 'body' or key == 'block_items':
                traverse(json_object[key])
            #Case for 'int a = b - c'
            elif key == '_nodetype' and json_object[key] == 'Decl':
                if 'init' in json_object and bool(json_object['init']) and 'op' in json_object['init']:
                    operation = json_object['init']['op']
                    if operation == '+' or operation == '-' or operation == '*':
                        if (json_object['init']['left']['_nodetype'] == 'ID' or json_object['init']['right']['_nodetype'] == 'ID') and json_object['type']['type']['names'][0] == 'int':
                            match = re.search('(:[0-9]+:)', json_object['init']['coord'])
                            lineNum = match.string.strip(':')
                            print('Possibible vunerability "CWE-190 Integer Overflow or Wraparound" detected.')
                            print('Revise line ' + str(lineNum))
                            print()
    if type(json_object) == list:
        for item in json_object:
            traverse(item)


In [242]:
"""
AST Start
Laterally find ext
Laterally find body
Go into body
Laterally find block_items
Go into block_items
    Find index with "init": not NULL
        If ("name": is a var int) AND (init['op'] is +,-,*) AND ((init['left']['_nodetype'] is 'ID') OR (init['right']['_nodetype'] is 'ID'))
            Flag the line
    Find index with "_nodetype": Assignment
        If (lvalue['name'] is a var int) AND (rvalue['op'] is +,-,*) AND ((left['_nodetype'] is 'ID') OR (right['_nodetype'] is 'ID'))
            Flag the line

"""

'\nAST Start\nLaterally find ext\nLaterally find body\nGo into body\nLaterally find block_items\nGo into block_items\n    Find index with "init": not NULL\n        If ("name": is a var int) AND (init[\'op\'] is +,-,*) AND ((init[\'left\'][\'_nodetype\'] is \'ID\') OR (init[\'right\'][\'_nodetype\'] is \'ID\'))\n            Flag the line\n    Find index with "_nodetype": Assignment\n        If (lvalue[\'name\'] is a var int) AND (rvalue[\'op\'] is +,-,*) AND ((left[\'_nodetype\'] is \'ID\') OR (right[\'_nodetype\'] is \'ID\'))\n            Flag the line\n\n'

In [243]:
for fileName in fileList:
    ast_dict = file_to_dict(folderPath + fileName)
    ast = from_dict(ast_dict)
    jsonString = to_json(ast,indent=4)
    jsonObject = json.loads(jsonString)
    print(fileName)
    traverse(jsonObject)
    print("----------------------------------------------------------------------------------------")

test1.c
----------------------------------------------------------------------------------------
test2.c
Possibible vunerability "CWE-190 Integer Overflow or Wraparound" detected.
Revise line ./Testdata/test2.c:3:13

Possibible vunerability "CWE-190 Integer Overflow or Wraparound" detected.
Revise line ./Testdata/test2.c:4:13

----------------------------------------------------------------------------------------
test3.c
----------------------------------------------------------------------------------------
test4.c
----------------------------------------------------------------------------------------
