In [1]:
import pickle

from labm8 import fs
from pycparser.plyparser import ParseError
from pycparserext.ext_c_parser import OpenCLCParser
from pycparserext.ext_c_generator import OpenCLCGenerator

**Parse OpenCL AST:**

In [2]:
text = r"""
__kernel void A(int a, int b) {
  float c = a + b;
}
"""

parser = OpenCLCParser()
ast = parser.parse(text)
ast.show()

FileAST: 
  FuncDef: 
    Decl: A, [], [], ['__kernel']
      FuncDecl: 
        ParamList: 
          Decl: a, [], [], []
            TypeDecl: a, []
              IdentifierType: ['int']
          Decl: b, [], [], []
            TypeDecl: b, []
              IdentifierType: ['int']
        TypeDecl: A, []
          IdentifierType: ['void']
    Compound: 
      Decl: c, [], [], []
        TypeDecl: c, []
          IdentifierType: ['float']
        BinaryOp: +
          ID: a
          ID: b


**Generate code from AST:**

In [3]:
generator = OpenCLCGenerator()
print(generator.visit(ast))

__kernel void A(int a, int b)
{
  float c = a + b;
}




**Parsing the GitHub corpus:**

In [4]:
files = fs.ls(fs.path("~/data/github"), abspaths=True)
n1 = len(files)
print("{n1} GitHub files (preprocessed)".format(**vars()))

files = fs.ls(fs.path("data/github/ast"), abspaths=True)
n2 = len(files)
ratio = n2 / n1
print("{n2} parsed ASTs ({ratio:.2%})".format(**vars()))

5390 GitHub files (preprocessed)
4380 parsed ASTs (81.26%)


**Parsing CLgen programs:**

In [5]:
files = fs.ls(fs.path("~/data/synthetic-2017-02-01"), abspaths=True)
n1 = len(files)
print("{n1} CLgen files (preprocessed)".format(**vars()))

files = fs.ls(fs.path("data/synthetic-2017-02-01/ast"), abspaths=True)
n2 = len(files)
ratio = n2 / n1
print("{n2} parsed ASTs ({ratio:.2%})".format(**vars()))

861339 CLgen files (preprocessed)
35 parsed ASTs (0.00%)
