In [8]:
from __future__ import print_function

import json
import sys
import re

# This is not required if you've installed pycparser into
# your site-packages/ with setup.py
#
sys.path.extend(['.', '..'])

from pycparser import parse_file, c_ast
from pycparser.plyparser import Coord


RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
RE_INTERNAL_ATTR = re.compile('__.*__')


class CJsonError(Exception):
    pass


def memodict(fn):
    """ Fast memoization decorator for a function taking a single argument """
    class memodict(dict):
        def __missing__(self, key):
            ret = self[key] = fn(key)
            return ret
    return memodict().__getitem__


@memodict
def child_attrs_of(klass):
    """
    Given a Node class, get a set of child attrs.
    Memoized to avoid highly repetitive string manipulation
    """
    non_child_attrs = set(klass.attr_names)
    all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
    return all_attrs - non_child_attrs


def to_dict(node):
    """ Recursively convert an ast into dict representation. """
    klass = node.__class__

    result = {}

    # Metadata
    result['_nodetype'] = klass.__name__

    # Local node attributes
    for attr in klass.attr_names:
        result[attr] = getattr(node, attr)

    # Coord object
    if node.coord:
        result['coord'] = str(node.coord)
    else:
        result['coord'] = None

    # Child attributes
    for child_name, child in node.children():
        # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
        match = RE_CHILD_ARRAY.match(child_name)
        if match:
            array_name, array_index = match.groups()
            array_index = int(array_index)
            # arrays come in order, so we verify and append.
            result[array_name] = result.get(array_name, [])
            if array_index != len(result[array_name]):
                raise CJsonError('Internal ast error. Array {} out of order. '
                    'Expected index {}, got {}'.format(
                    array_name, len(result[array_name]), array_index))
            result[array_name].append(to_dict(child))
        else:
            result[child_name] = to_dict(child)

    # Any child attributes that were missing need "None" values in the json.
    for child_attr in child_attrs_of(klass):
        if child_attr not in result:
            result[child_attr] = None

    return result


def to_json(node, **kwargs):
    """ Convert ast node to json string """
    return json.dumps(to_dict(node), **kwargs)


def file_to_dict(filename):
    """ Load C file into dict representation of ast """
    ast = parse_file(filename)
    return to_dict(ast)


def file_to_json(filename, **kwargs):
    """ Load C file into json string representation of ast """
    ast = parse_file(filename, use_cpp=True)
    return to_json(ast, **kwargs)


def _parse_coord(coord_str):
    """ Parse coord string (file:line[:column]) into Coord object. """
    if coord_str is None:
        return None

    vals = coord_str.split(':')
    vals.extend([None] * 3)
    filename, line, column = vals[:3]
    return Coord(filename, line, column)


def _convert_to_obj(value):
    """
    Convert an object in the dict representation into an object.
    Note: Mutually recursive with from_dict.
    """
    value_type = type(value)
    if value_type == dict:
        return from_dict(value)
    elif value_type == list:
        return [_convert_to_obj(item) for item in value]
    else:
        # String
        return value


def from_dict(node_dict):
    """ Recursively build an ast from dict representation """
    class_name = node_dict.pop('_nodetype')

    klass = getattr(c_ast, class_name)

    # Create a new dict containing the key-value pairs which we can pass
    # to node constructors.
    objs = {}
    for key, value in node_dict.items():
        if key == 'coord':
            objs[key] = _parse_coord(value)
        else:
            objs[key] = _convert_to_obj(value)

    # Use keyword parameters, which works thanks to beautifully consistent
    # ast Node initializers.
    return klass(**objs)


def from_json(ast_json):
    """ Build an ast from json string representation """
    return from_dict(json.loads(ast_json))


#------------------------------------------------------------------------------

  

In [341]:

ast_dict = file_to_dict("test.c")
ast = from_dict(ast_dict)
cjson=to_json(ast, sort_keys=True, indent=4)

In [342]:
dic=json.loads(cjson)

In [119]:
dic['ext'][0]['body']['block_items']

[{'_nodetype': 'Decl',
  'bitsize': None,
  'coord': 'test.c:2:7',
  'funcspec': [],
  'init': None,
  'name': 'p',
  'quals': [],
  'storage': [],
  'type': {'_nodetype': 'PtrDecl',
   'coord': 'test.c:2:7',
   'quals': [],
   'type': {'_nodetype': 'TypeDecl',
    'coord': 'test.c:2:8',
    'declname': 'p',
    'quals': [],
    'type': {'_nodetype': 'IdentifierType',
     'coord': 'test.c:2:3',
     'names': ['int']}}}},
 {'_nodetype': 'Decl',
  'bitsize': None,
  'coord': 'test.c:2:10',
  'funcspec': [],
  'init': None,
  'name': 'q',
  'quals': [],
  'storage': [],
  'type': {'_nodetype': 'PtrDecl',
   'coord': 'test.c:2:10',
   'quals': [],
   'type': {'_nodetype': 'TypeDecl',
    'coord': 'test.c:2:11',
    'declname': 'q',
    'quals': [],
    'type': {'_nodetype': 'IdentifierType',
     'coord': 'test.c:2:3',
     'names': ['int']}}}},
 {'_nodetype': 'Decl',
  'bitsize': None,
  'coord': 'test.c:2:13',
  'funcspec': [],
  'init': {'_nodetype': 'Constant',
   'coord': 'test.c:2:1

In [27]:
d=from_json(cjson)

In [16]:

text = ''.join(sys.stdin.readlines()) # 読み込む

parser = c_parser.CParser() # パーサ
ast = parser.parse(text, filename='test.c') # パースする

generator = c_generator.CGenerator() # 生成器
print(generator.visit(ast)) # 単にパースしたものを書き出すだけ






In [10]:
from pycparser import parse_file, c_parser, c_generator, c_ast

In [23]:
ast = parse_file("test.c")
generator = c_generator.CGenerator()
print(generator.visit(ast))

int test(char *data, int size)
{
  int *p;
  int *q;
  int num = 0;
  for (int i = 0; i < size; i++)
  {
    if (!isdigit(*(data + i)))
      return 0;

    num = ((num * 10) + (*(data + i))) - '0';
  }

  p = malloc(4);
  if (num < 5)
    q = p;
  else
    q = malloc(4);

  *q = 1;
  free(q);
  return 0;
}

int LLVMFuzzerTestOneInput(char *data, int size)
{
  test(data, size);
  return 0;
}




In [88]:
def genif(filename,line,col,optype,opcode,operand,obj):
    return {'_nodetype': 'If',
            'cond': {'_nodetype': optype,
                     'coord': f'{filename}:{line}:{4+col}',
                     'left': {'_nodetype': 'ID', 'coord': f'{filename}:{line}:{4+col}', 'name': operand[0]},
                     'op': opcode,
                     'right': {'_nodetype': 'Constant',
                               'coord': f'{filename}:{line}:{3+col+len(operand[0])+len(opcode)}',
                               'type': 'int',
                               'value': f'{operand[1]}'}},
            'coord': f'{filename}:{line}:{col}',
            'iffalse': None,
            'iftrue': {'_nodetype': 'FuncCall',
                       'args': {'_nodetype': 'ExprList',
                                 'coord': f'{filename}:{line}:{10+col+len(operand[0])+len(opcode)+len(str(operand[1]))}',
                                 'exprs': [{'_nodetype': 'ID', 'coord': f'{filename}:{line}:{10+col+len(operand[0])+len(opcode)+len(str(operand[1]))}', 'name': obj}]},
                       'coord': f'{filename}:{line}:{5+col+len(operand[0])+len(opcode)+len(str(operand[1]))}',
                       'name': {'_nodetype': 'ID', 'coord': f'{filename}:{line}:{5+col+len(operand[0])+len(opcode)+len(str(operand[1]))}', 'name': 'free'}}}

In [None]:
def localization(dic,filename,line,col):
    

In [327]:
conif=genif(0,0,0,'BinaryOp','>=',["num",5],"p")

In [328]:
conif

{'_nodetype': 'If',
 'cond': {'_nodetype': 'BinaryOp',
  'coord': '0:0:4',
  'left': {'_nodetype': 'ID', 'coord': '0:0:4', 'name': 'num'},
  'op': '>=',
  'right': {'_nodetype': 'Constant',
   'coord': '0:0:8',
   'type': 'int',
   'value': '5'}},
 'coord': '0:0:0',
 'iffalse': None,
 'iftrue': {'_nodetype': 'FuncCall',
  'args': {'_nodetype': 'ExprList',
   'coord': '0:0:16',
   'exprs': [{'_nodetype': 'ID', 'coord': '0:0:16', 'name': 'p'}]},
  'coord': '0:0:11',
  'name': {'_nodetype': 'ID', 'coord': '0:0:11', 'name': 'free'}}}

In [157]:
dic=json.loads(cjson)

In [158]:
dic['ext'][0]['body']['block_items'].insert(-2,conif)

In [316]:
ast1=from_dict(dic)

In [167]:
dic['ext'][1]["coord"]

'test.c:19:5'

In [168]:
dic

{'_nodetype': 'FileAST',
 'coord': None,
 'ext': [{'_nodetype': 'FuncDef',
   'body': {'_nodetype': 'Compound',
    'block_items': [{'_nodetype': 'Decl',
      'bitsize': None,
      'coord': 'test.c:2:7',
      'funcspec': [],
      'init': None,
      'name': 'p',
      'quals': [],
      'storage': [],
      'type': {'_nodetype': 'PtrDecl',
       'coord': 'test.c:2:7',
       'quals': [],
       'type': {'_nodetype': 'TypeDecl',
        'coord': 'test.c:2:8',
        'declname': 'p',
        'quals': [],
        'type': {'_nodetype': 'IdentifierType',
         'coord': 'test.c:2:3',
         'names': ['int']}}}},
     {'_nodetype': 'Decl',
      'bitsize': None,
      'coord': 'test.c:2:10',
      'funcspec': [],
      'init': None,
      'name': 'q',
      'quals': [],
      'storage': [],
      'type': {'_nodetype': 'PtrDecl',
       'coord': 'test.c:2:10',
       'quals': [],
       'type': {'_nodetype': 'TypeDecl',
        'coord': 'test.c:2:11',
        'declname': 'q',
      

In [172]:
def loc_return(dic,alloc_line):
    func=-1
    for f in dic['ext']:
        if int(f["coord"].split(":")[1]) > alloc_line:
            break
        else:
            func+=1
    
    if func == -1:
        print(f"loc_return: alloc_line = {alloc_line}, func = -1")
        return -1
    
    if dic['ext'][0]['body']['block_items'][-1]["_nodetype"] == 'Return':
        return {"func":func, "loc":-2}
    else:
        return {"func":func, "loc":-1}


In [173]:
loc_return(dic,12)

{'func': 0, 'loc': -2}

In [None]:
def loc_reach(dic,reach_line,reach_col):
    func=-1
    


In [344]:
class Localization:
    def __init__(self,dic,alloc_line,alloc_col,reach_line,reach_col):
        self.dic=dic
        self.alline=alloc_line
        self.alcol=alloc_col
        self.reline=reach_line
        self.recol=reach_col
        self.l=None
    
    def loc(self):
        if self.reline==-1:
            # infer did not detect the memory error
            self.loc_return()
        else:
            # unreachable location given by infer
            self.loc_reach()
        return self.dic,self.l

    def loc_return(self):#localize the return
        func=-1
        for f in self.dic['ext']:
            if int(f["coord"].split(":")[1]) > self.alline:
                break
            else:
                func+=1

        if func == -1:
            print(f"loc_return: alloc_line = {self.alline}, func = -1")
            sys.exit(1)
            
        self.dic=self.dic['ext'][func]['body']['block_items']
        if self.dic[-1]["_nodetype"] == 'Return':
            self.l=-1
        else:
            self.l=len(self.dic)
        
    def loc_reach(self):#localize the unreachable location given by infer
        func=-1
        for f in self.dic['ext']:
            if int(f["coord"].split(":")[1]) > self.reline:
                break
            else:
                func+=1

        if func == -1:
            print(f"loc_reach: reach_line = {self.reline}, func = -1")
            sys.exit(1)
        
        self.dic=self.dic['ext'][func]['body']['block_items']
        i=-1
        for b in self.dic:
            loc=b["coord"].split(":")
            if int(loc[1]) > self.reline or (int(loc[1]) == self.reline and int(loc[2]) > self.recol):
                break
            else:
                i+=1
                
        self.l=i
        self.check_type()
        
    def check_type(self):#block and block index
        if self.dic[self.l]["_nodetype"] == "If":
            self.dic=self.dic[self.l]
            self.if_spec()# {"type":"If","node":{"type": , ...}}
        elif self.dic[self.l]["_nodetype"] == "While":
            self.dic=self.dic[self.l]
            self.while_spec()
        elif self.dic[self.l]["_nodetype"] == "For":
            self.dic=self.dic[self.l]
            self.for_spec()
        else:
            return
        
    
    def if_spec(self):
        if self.dic["iftrue"]==None:
            tf="iffalse"
        elif self.dic["iffalse"]==None:
            tf="iftrue"
        else:
            if self.reline < int(self.dic["iffalse"]["coord"].split(":")[1]):
                tf="iftrue"
            elif self.reline > int(self.dic["iffalse"]["coord"].split(":")[1]):
                tf="iffalse"
            else:
                if self.recol < int(self.dic["iffalse"]["coord"].split(":")[2]):
                    tf="iftrue"
                elif self.reline > int(self.dic["iffalse"]["coord"].split(":")[2]):
                    tf="iffalse"
                else:
                    print(f"if_spec: reach_col={self.recol}, has the same location with \"else\"")
                    sys.exit(1)
        
        self.dic=self.dic[tf]
        if self.dic["_nodetype"] == "If":
            self.if_spec()
            return
        
        self.dic=self.dic["block_items"]
        i=-1
        for b in self.dic:
            loc=b["coord"].split(":")
            if int(loc[1]) > self.reline or (int(loc[1]) == self.reline and int(loc[2]) > self.recol):
                break
            else:
                i+=1
                
        self.l=i
        self.check_type()
    
    def while_spec(self):
        pass
    def for_spec(self):
        pass

In [348]:
class Localization:
    def __init__(self,dic,line,col):
        self.dic=dic
        self.reline=line
        self.recol=col
        self.l=None
    
    def loc(self,flag):
        if flag:
            # infer did not detect the memory error
            self.loc_return()
        else:
            # unreachable location given by infer
            self.loc_reach()
        return self.dic,self.l
    
    def loc_return(self):#localize the return
        func=-1
        for f in self.dic['ext']:
            if int(f["coord"].split(":")[1]) > self.reline:
                break
            else:
                func+=1

        if func == -1:
            print(f"loc_return: alloc_line = {self.reline}, func = -1")
            sys.exit(1)
            
        self.dic=self.dic['ext'][func]['body']['block_items']
        if self.dic[-1]["_nodetype"] == 'Return':
            self.l=-1
        else:
            self.l=len(self.dic)
        
    def loc_reach(self):#localize the unreachable location given by infer
        func=-1
        for f in self.dic['ext']:
            if int(f["coord"].split(":")[1]) > self.reline:
                break
            else:
                func+=1

        if func == -1:
            print(f"loc_reach: reach_line = {self.reline}, func = -1")
            sys.exit(1)
        
        self.dic=self.dic['ext'][func]['body']['block_items']
        i=-1
        for b in self.dic:
            loc=b["coord"].split(":")
            if int(loc[1]) > self.reline or (int(loc[1]) == self.reline and int(loc[2]) > self.recol):
                break
            else:
                i+=1
                
        self.l=i
        self.check_type()
        
    def check_type(self):#block and block index
        if self.dic[self.l]["_nodetype"] == "If":
            self.dic=self.dic[self.l]
            self.if_spec()# {"type":"If","node":{"type": , ...}}
        elif self.dic[self.l]["_nodetype"] == "While":
            self.dic=self.dic[self.l]
            self.while_spec()
        elif self.dic[self.l]["_nodetype"] == "For":
            self.dic=self.dic[self.l]
            self.for_spec()
        else:
            return
        
    
    def if_spec(self):
        if self.dic["iftrue"]==None:
            tf="iffalse"
        elif self.dic["iffalse"]==None:
            tf="iftrue"
        else:
            if self.reline < int(self.dic["iffalse"]["coord"].split(":")[1]):
                tf="iftrue"
            elif self.reline > int(self.dic["iffalse"]["coord"].split(":")[1]):
                tf="iffalse"
            else:
                if self.recol < int(self.dic["iffalse"]["coord"].split(":")[2]):
                    tf="iftrue"
                elif self.reline > int(self.dic["iffalse"]["coord"].split(":")[2]):
                    tf="iffalse"
                else:
                    print(f"if_spec: reach_col={self.recol}, has the same location with \"else\"")
                    sys.exit(1)
        
        self.dic=self.dic[tf]
        if self.dic["_nodetype"] == "If":
            self.if_spec()
            return
        
        self.dic=self.dic["block_items"]
        i=-1
        for b in self.dic:
            loc=b["coord"].split(":")
            if int(loc[1]) > self.reline or (int(loc[1]) == self.reline and int(loc[2]) > self.recol):
                break
            else:
                i+=1
                
        self.l=i
        self.check_type()
    
    def while_spec(self):
        pass
    def for_spec(self):
        pass

In [351]:
cur,loc=Localization(dic,12,5).loc(0)

In [352]:
cur[loc]

{'_nodetype': 'Assignment',
 'coord': 'test.c:12:5',
 'lvalue': {'_nodetype': 'ID', 'coord': 'test.c:12:5', 'name': 'q'},
 'op': '=',
 'rvalue': {'_nodetype': 'FuncCall',
  'args': {'_nodetype': 'ExprList',
   'coord': 'test.c:12:14',
   'exprs': [{'_nodetype': 'Constant',
     'coord': 'test.c:12:14',
     'type': 'int',
     'value': '4'}]},
  'coord': 'test.c:12:7',
  'name': {'_nodetype': 'ID', 'coord': 'test.c:12:7', 'name': 'malloc'}}}

In [353]:
obj=cur[loc]["lvalue"]["name"]

In [354]:
obj

'q'

In [332]:
cur.insert(loc,conif)

In [333]:
ast1=from_dict(dic1)
print(generator.visit(ast1))

void func(void)
{
  if (a < 0)
  {
    z = 3;
  }
  else
    if (b < 0)
  {
    i = 1;
    j = 2;
    k = 3;
  }
  else
  {
    p = 1;
  }


  q = 1;
  r = 3;
  if (num >= 5)
    free(p);

  return 0;
}




In [326]:
dic1['ext'][0]['body']['block_items']

[{'coord': ':4:3',
  'cond': {'op': '<',
   'coord': ':4:6',
   'left': {'name': 'a', 'coord': ':4:6'},
   'right': {'type': 'int', 'value': '0', 'coord': ':4:8'}},
  'iftrue': {'coord': ':4:1',
   'block_items': [{'op': '=',
     'coord': ':5:5',
     'lvalue': {'name': 'z', 'coord': ':5:5'},
     'rvalue': {'type': 'int', 'value': '3', 'coord': ':5:7'}}]},
  'iffalse': {'coord': ':6:9',
   'cond': {'op': '<',
    'coord': ':6:12',
    'left': {'name': 'b', 'coord': ':6:12'},
    'right': {'type': 'int', 'value': '0', 'coord': ':6:14'}},
   'iftrue': {'coord': ':6:1',
    'block_items': [{'op': '=',
      'coord': ':7:5',
      'lvalue': {'name': 'i', 'coord': ':7:5'},
      'rvalue': {'type': 'int', 'value': '1', 'coord': ':7:7'}},
     {'op': '=',
      'coord': ':7:9',
      'lvalue': {'name': 'j', 'coord': ':7:9'},
      'rvalue': {'type': 'int', 'value': '2', 'coord': ':7:11'}},
     {'op': '=',
      'coord': ':7:13',
      'lvalue': {'name': 'k', 'coord': ':7:13'},
      'rvalu

In [318]:
text = r"""
void func(void)
{ 
  if(a<0){
    z=3;
  }else if(b<0){
    i=1;j=2;k=3;
  }else{
    p=1;
  }
  q=1;r=3;return 0;
}
"""

In [319]:
parser = c_parser.CParser() # パーサ
ast = parser.parse(text) # パースする

generator = c_generator.CGenerator() # 生成器
print(generator.visit(ast))

void func(void)
{
  if (a < 0)
  {
    z = 3;
  }
  else
    if (b < 0)
  {
    i = 1;
    j = 2;
    k = 3;
  }
  else
  {
    p = 1;
  }


  q = 1;
  r = 3;
  return 0;
}




In [337]:
dic1=to_dict(ast)

In [210]:
a=[1,2,3]
a.insert(3,12)

In [330]:
dic1

{'_nodetype': 'FileAST',
 'coord': None,
 'ext': [{'_nodetype': 'FuncDef',
   'coord': ':2:6',
   'decl': {'_nodetype': 'Decl',
    'name': 'func',
    'quals': [],
    'storage': [],
    'funcspec': [],
    'coord': ':2:6',
    'type': {'_nodetype': 'FuncDecl',
     'coord': ':2:6',
     'args': {'_nodetype': 'ParamList',
      'coord': ':0:1',
      'params': [{'_nodetype': 'Typename',
        'name': None,
        'quals': [],
        'coord': ':0:1',
        'type': {'_nodetype': 'TypeDecl',
         'declname': None,
         'quals': [],
         'coord': None,
         'type': {'_nodetype': 'IdentifierType',
          'names': ['void'],
          'coord': ':2:11'}}}]},
     'type': {'_nodetype': 'TypeDecl',
      'declname': 'func',
      'quals': [],
      'coord': ':2:6',
      'type': {'_nodetype': 'IdentifierType',
       'names': ['void'],
       'coord': ':2:1'}}},
    'init': None,
    'bitsize': None},
   'body': {'_nodetype': 'Compound',
    'coord': ':3:1',
    'block_

False