# Parsing and rewriting OpenCL using pycparser

In [1]:
from pycparser.plyparser import ParseError
from pycparserext.ext_c_parser import OpenCLCParser
from pycparserext.ext_c_generator import OpenCLCGenerator

**Parse OpenCL AST:**

In [2]:
text = r"""
__kernel void A(int a, int b) {
  float c = a + b;
}
"""

parser = OpenCLCParser()
ast = parser.parse(text)
ast.show()

FileAST: 
  FuncDef: 
    Decl: A, [], [], ['__kernel']
      FuncDecl: 
        ParamList: 
          Decl: a, [], [], []
            TypeDecl: a, []
              IdentifierType: ['int']
          Decl: b, [], [], []
            TypeDecl: b, []
              IdentifierType: ['int']
        TypeDecl: A, []
          IdentifierType: ['void']
    Compound: 
      Decl: c, [], [], []
        TypeDecl: c, []
          IdentifierType: ['float']
        BinaryOp: +
          ID: a
          ID: b


**Generate code from AST:**

In [3]:
generator = OpenCLCGenerator()
print(generator.visit(ast))

__kernel void A(int a, int b)
{
  float c = a + b;
}




**Parsing the GitHub corpus:**

In [4]:
# from labm8 import fs

# files = fs.ls(fs.path("~/data/github"), abspaths=True)
# n1 = len(files)
# print("{n1} GitHub files (preprocessed)".format(**vars()))

# files = fs.ls(fs.path("data/github/ast"), abspaths=True)
# n2 = len(files)
# ratio = n2 / n1
# print("{n2} parsed ASTs ({ratio:.2%})".format(**vars()))

**Parsing CLgen programs:**

In [5]:
# files = fs.ls(fs.path("~/data/synthetic-2017-02-01"), abspaths=True)
# n1 = len(files)
# print("{n1} CLgen files (preprocessed)".format(**vars()))

# files = fs.ls(fs.path("data/synthetic-2017-02-01/ast"), abspaths=True)
# n2 = len(files)
# ratio = n2 / n1
# print("{n2} parsed ASTs ({ratio:.2%})".format(**vars()))

## Pycparser rewriter

In [9]:
from clgen import preprocess
from labm8 import fs

# our test input:
with open(fs.path("~/data/in/amd-app-sdk-3.0-DCT_Kernels.cl")) as infile:
    src = infile.read()

src = preprocess.clangformat_ocl(preprocess.compiler_preprocess_cl(src))
print(src)

uint getIdx(uint blockIdx, uint blockIdy, uint localIdx, uint localIdy, uint blockWidth, uint globalWidth) {
  double foo;
  double foo2;
  double foo3;
  uint globalIdx = blockIdx * blockWidth + localIdx;
  uint globalIdy = blockIdy * blockWidth + localIdy;

  return (globalIdy * globalWidth + globalIdx);
}
__kernel void DCT(__global float* output, __global float* input, __global float* dct8x8, __global float* dct8x8_trans, __local float* inter, const uint width, const uint blockWidth, const uint inverse)

{
  uint globalIdx = get_global_id(0);
  uint globalIdy = get_global_id(1);

  uint groupIdx = get_group_id(0);
  uint groupIdy = get_group_id(1);

  uint i = get_local_id(0);
  uint j = get_local_id(1);

  uint idx = globalIdy * width + globalIdx;

  float acc = 0.0f;

  for (uint k = 0; k < blockWidth; k++) {
    uint index1 = j * blockWidth + k;
    uint index2 = getIdx(groupIdx, groupIdy, i, k, blockWidth, width);

    if (inverse)
      acc += dct8x8[index1] * input[index2];
  

In [10]:
# with open(fs.path("~/Inbox/functions.txt")) as infile:
#     builtin_functions = set(x for x in infile.read().split('\n') if x)
# print(len(builtin_functions))

In [17]:
from pycparser import c_parser, c_ast, parse_file
from pycparser.c_ast import FuncDecl, TypeDecl

identifier_rewrites = {}

def next_rewrite(rewrites):
    global _n
    i = len(rewrites)
    
    s = []
    while i > 25:
        k = i // 26
        i = i % 26
        # k, i = divmod(i, 26)
        s.append(chr(ord('A') - 1 + k))

    s.append(chr(ord('A') + i))
    return ''.join(s)


class OpenCLRewriter(c_ast.NodeVisitor):
    def visit_TypeDecl(self, node):
        name = node.declname
        
        if name in identifier_rewrites:
            new_name = identifier_rewrites[name]
        else:      
            new_name = next_rewrite(identifier_rewrites)
            identifier_rewrites[node.declname] = new_name
            
        print(node.coord, node.declname, '->', new_name)
        node.declname = new_name
    
    def visit_ID(self, node):
        if node.name in identifier_rewrites:
            node.name = identifier_rewrites[node.name]

parser = OpenCLCParser()
generator = OpenCLCGenerator()
            
ast = parser.parse(src)
v = OpenCLRewriter()
print("Rewrites\n========")
v.visit(ast)
print("\nOutput\n======\n", preprocess.clangformat_ocl(generator.visit(ast)), sep="")

Rewrites
:1 blockIdx -> A
:1 blockIdy -> B
:1 localIdx -> C
:1 localIdy -> D
:1 blockWidth -> E
:1 globalWidth -> F
:1 getIdx -> G
:2 foo -> H
:3 foo2 -> I
:4 foo3 -> J
:5 globalIdx -> K
:6 globalIdy -> L
:10 output -> M
:10 input -> N
:10 dct8x8 -> O
:10 dct8x8_trans -> P
:10 inter -> Q
:10 width -> R
:10 blockWidth -> E
:10 inverse -> S
:10 DCT -> T
:13 globalIdx -> K
:14 globalIdy -> L
:16 groupIdx -> U
:17 groupIdy -> V
:19 i -> W
:20 j -> X
:22 idx -> Y
:24 acc -> Z
:26 k -> AA
:27 index1 -> AB
:28 index2 -> AC
:41 k -> AA
:42 index1 -> AB
:43 index2 -> AC

Output
uint G(uint A, uint B, uint C, uint D, uint E, uint F) {
  double H;
  double I;
  double J;
  uint K = (A * E) + C;
  uint L = (B * E) + D;
  return (L * F) + K;
}

__kernel void T(__global float* M, __global float* N, __global float* O, __global float* P, __local float* Q, const uint R, const uint E, const uint S) {
  uint K = get_global_id(0);
  uint L = get_global_id(1);
  uint U = get_group_id(0);
  uint V = get_gro

In [13]:
from labm8 import fs
from pycparser.c_parser import ParseError

indir = "~/data/minigh"
outdir = "~/data/minigh-pp"
fs.mkdir(outdir)

files = [x for x in fs.ls(fs.path(indir), abspaths=True) if x.endswith(".cl")]

FileNotFoundError: [Errno 2] No such file or directory: '/home/cec/data/minigh'

In [221]:
import time

def rewrite(inpath):
    name = fs.basename(inpath)
    # print(name)
    parser = OpenCLCParser()
    v = OpenCLRewriter()
    generator = OpenCLCGenerator()
    outpath = fs.path(outdir, name)

    with open(inpath) as infile:
        src = infile.read()
        src = compiler_preprocess_cl(src)
        try:
            ast = parser.parse(src)
            v.visit(ast)
            with open(outpath, "w") as outfile:
                print(generator.visit(ast), file=outfile)
        except ParseError as e:
            # print("error", name, e)
            pass
            
start = time.time()

from multiprocessing import Pool
with Pool(5) as p:
    p.map(rewrite, files)
end = time.time()

print("elapsed: {:.2}s".format(end-start))

error https-api-github-com-repos-OpenCL-GEGL-OpenCL-old-git-blobs-ce8aa4014f35f94d5a5840a2c0f16c961b644096.cl :30:27: before: ;
error https-api-github-com-repos-HSAFoundation-HSA-Debugger-GDB-Source-AMD-git-blobs-55b6e103e5488ff315ca8850a28c76434a9c4909.cl :134:32: before: {
error https-api-github-com-repos-PRiME-project-PRiMEStereoMatch-git-blobs-fa57aac0edcb56fb73817e018fc1cbdf215ee604.cl :17:25: before: ]
error https-api-github-com-repos-8l-kalmar-git-blobs-597c41017388a0814853c4bfeb9119b5cffa9982.cl :176:17: before: {
error https-api-github-com-repos-IntelLabs-iHRC-git-blobs-83a854f2a4f431c5a74af692174540dcd5efbffb.cl :1:20: before: read_only
error https-api-github-com-repos-8l-kalmar-git-blobs-71bd40ad5d2203ecabf4c38e5c6ab04b97e0817e.cl :42:1: before: }
error https-api-github-com-repos-RadeonOpenCompute-hcc-git-blobs-a3717a0a2e9a8cc572f0545e5c78421e4a97eb6b.cl :42:1: before: }
error https-api-github-com-repos-SirGargamel-DICOpenCL-git-blobs-141a84e62062d812608eee505d085daa57b08b2c