In [1]:
import pandas as pd
import subprocess
from lxml import etree
from io import StringIO
import sys
import difflib
import os
import ctypes
import stat
from pylibsrcml import srcml

# source= '../data/projects/contiki-2.4/apps/ftp'
# xml = "myprj.xml"

# # Translate from a source-code file to a srcML file
# srcml.srcml(source, xml)

def src2xml(src):
    """generate srcML tree from the given source file or directory"""
    # srcml --xpath="//src:function" '../data/projects/contiki-2.4/apps/ftp/ftpc.c' | srcml --xpath="string(//src:function)"
    src2xml_cmd = ["srcml", "--xpath=//src:function", src]
    xml2code_cmd = ['srcml', '--xpath=string(//src:function)']

    # ps = subprocess.Popen(src2xml_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    # output = subprocess.Popen(xml2code_cmd, stdin=ps.stdout, stdout=subprocess.PIPE, text=True)
    # output, errors = output.communicate()
    # print(output)
    # print(errors)
    ps = subprocess.Popen(src2xml_cmd, stdout=subprocess.PIPE, text=True)
    return ps.stdout.read()

def xpath_on_tree(the_tree, xpath_query):
    """Run an xpath query on a srcML parsetree"""
    try:
        return the_tree.xpath(xpath_query, namespaces={'src':'http://www.srcML.org/srcML/src'})
    except etree.XPathEvalError as err:
        print(err)
        return None

def function_tree2source(fun_tree):
    """convert a function tree to source code"""
    head = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
    <unit xmlns="http://www.srcML.org/srcML/src" revision="1.0.0" url="prj">
    <unit revision="1.0.0" filename="prj" item="1">
    """
    # <unit revision="1.0.0" language="C" filename="prj" item="1">
    tail = """</unit>
    </unit>"""

    body = etree.tostring(fun_tree, pretty_print=True, encoding='unicode')
    # the extracted tree is bare function excluding head and tail
    tree = head + body + tail 
    tree = etree.fromstring(tree.encode('utf-8'))
    return xpath_on_tree(tree, 'string(//src:function)')

def extract_functions_from_srcML(tree):
    """get all function bodies"""
    fun_trees = xpath_on_tree(tree, '//src:function')
    functions = []

    for fun_tree in fun_trees:
        functions.append(function_tree2source(fun_tree))
    
    if len(functions)>0:
        return functions
    else:
        return [head + tail]

def write_functions_file(file, functions):
    # write all functions to a file
    with open(file, 'w') as f:
        for item in functions:
            f.write("%s\n\n" % item)

def src2src_functions(src):
    """retrieve source functions from the given src:file/dir of source code"""
    try:
        tree = src2xml(src)
        tree = etree.fromstring(tree.encode('utf-8'))
        return extract_functions_from_srcML(tree)
    except Exception as err:
        print(err)

In [4]:
src_file = "../data/projects/contiki-2.4/apps/ftp/ftpc.c"

funs = src2src_functions(src=src_file)
# write_functions_file('functions.txt', funs)
funs

['void\nftpc_init(void)\n{\n  memb_init(&connections);\n  /*  tcp_listen(HTONS(DATAPORT));*/\n}',
 'void *\nftpc_connect(u16_t *ipaddr, u16_t port)\n{\n  struct ftp_connection *c;\n\n  c = (struct ftp_connection *)memb_alloc(&connections);\n  if(c == NULL) {\n    return NULL;\n  }\n  c->type = TYPE_CONTROL;\n  c->state = STATE_INITIAL;\n  c->connected_confirmed = 0;\n  c->codeptr = 0;\n  c->dataconn.type = TYPE_DATA;\n  c->dataconn.port = DATAPORT;\n  tcp_listen(HTONS(DATAPORT));\n\n  if(tcp_connect((uip_ipaddr_t *)ipaddr, port, c) == NULL) {\n    memb_free(&connections, c);\n    return NULL;\n  }\n\n  return c;\n}',
 'static void\nhandle_input(struct ftp_connection *c)\n{\n  int code;\n\n  code = (c->code[0] - \'0\') * 100 +\n    (c->code[1] - \'0\') * 10 +\n    (c->code[2] - \'0\');\n  /*  printf("Handle input code %d state %d\\n", code, c->state);*/\n\n  if(c->state == STATE_INITIAL) {\n    if(code == 220) {\n      c->state = STATE_SEND_USER;\n    }\n  } else if(c->state == STATE_US

In [2]:
s = 'a'
len(s)

1