In [1]:
import sys
sys.path.append('..')

import re
import pandas as pd
import seutil as su
from exli.macros import Macros
from exli.util import Util

In [2]:
# list of projects
projects_used_sorted = sorted(
    [p for p in Util.get_project_names_list() if p not in Util.get_excluded_projects()],
    key=lambda p: p.lower(),
)
projects_excluded_sorted = sorted(
    Macros.project_with_timeout + Macros.projects_with_jacoco_exception
)
projects_no_mutant = ["liquibase_liquibase-oracle"]

# tool (names used in experiments) to macro (names used in paper) mapping
tool2macro = {
    "unit": "dev",
    "randoop": "randoop",
    "evosuite": "evosuite",
    "all": "r0",
    "baseline": "r0",
    "reduced": "r1",
    "r2-universalmutator": "r2-um",
    "r2-major": "r2-major",
}

In [3]:
# util to convert file path to fully qualified class name
def path2cname(path: str):
    if "src/main/java/" in path:
        path = path.split("src/main/java/")[1]
    elif "src/" in path:
        path = path.split("src/")[1]
    if path.endswith(".java"):
        path = path[:-5]
    return path.replace("/", ".")

In [7]:
# load target statements
df_stmts = pd.DataFrame(su.io.load(Macros.results_dir / "teco-target-statements.json"))
# statement key: proj:cname:line
df_stmts["stmt_key"] = (
    df_stmts["project"]
    + ":"
    + df_stmts["filename"].apply(lambda p: path2cname(p))
    + ":"
    + df_stmts["line_number"].astype(str)
)


In [5]:
df_stmts.sample(10, random_state=42)


Unnamed: 0,evosuite_inst_count,evosuite_method_count,evosuite_method_covered,evosuite_stmt_covered,filename,line_number,project,randoop_inst_count,randoop_method_count,randoop_method_covered,randoop_stmt_covered,type,unit_inst_count,unit_method_count,unit_method_covered,unit_stmt_covered,stmt_key
309,0,0,False,False,/home/liuyu/projects/inlinegen-research/_downl...,2336,hyperledger_fabric-sdk-java,0,0,False,False,string,0,1,True,False,hyperledger_fabric-sdk-java:org.hyperledger.fa...
1041,0,1,True,False,/home/liuyu/projects/inlinegen-research/_downl...,1260,jkuhnert_ognl,0,0,False,False,bit,7,1,True,True,jkuhnert_ognl:ognl.OgnlParserTokenManager:1260
381,0,1,True,False,/home/liuyu/projects/inlinegen-research/_downl...,145,medcl_elasticsearch-analysis-pinyin,0,0,False,False,bit,11,1,True,True,medcl_elasticsearch-analysis-pinyin:org.elasti...
497,6,1,True,True,/home/liuyu/projects/inlinegen-research/_downl...,155,mpatric_mp3agic,6,1,True,True,bit,6,1,True,True,mpatric_mp3agic:com.mpatric.mp3agic.AbstractID...
551,5,1,True,True,/home/liuyu/projects/inlinegen-research/_downl...,946,ralscha_extclassgenerator,0,1,True,False,string,5,1,True,True,ralscha_extclassgenerator:ch.rasc.extclassgene...
832,3,1,True,True,/home/liuyu/projects/inlinegen-research/_downl...,80,wmixvideo_nfe,3,1,True,True,regex,3,1,True,True,wmixvideo_nfe:com.fincatto.documentofiscal.val...
96,9,1,True,True,/home/liuyu/projects/inlinegen-research/_downl...,200,Asana_java-asana,9,1,True,True,string,0,0,False,False,Asana_java-asana:com.asana.resources.Tasks:200
289,5,1,True,True,/home/liuyu/projects/inlinegen-research/_downl...,55,craftercms_core,0,1,True,False,string,5,1,True,True,craftercms_core:org.craftercms.core.url.impl.E...
949,0,1,True,False,/home/liuyu/projects/inlinegen-research/_downl...,394,jkuhnert_ognl,0,1,True,False,bit,6,1,True,True,jkuhnert_ognl:ognl.OgnlParserTokenManager:394
101,9,1,True,True,/home/liuyu/projects/inlinegen-research/_downl...,262,Asana_java-asana,9,1,True,True,string,0,0,False,False,Asana_java-asana:com.asana.resources.Tasks:262


In [10]:
# load projects' shas
data_projects = su.io.load(
    Macros.data_dir / "teco-projects.json"
)
proj2sha = {p["full_name"]: p["sha"] for p in data_projects}

re_itest = re.compile(
    r"\S*new Here\(\"(?P<source>\w+)\", (?P<lineno>\d+)\)(?P<givens>(\.given\([^)]*?\))*)(?P<checks>(\.check\w+\([^)]*?\)+));"
)

# load source code
data_src_stmts = []
for proj in projects_used_sorted:
    itest_type = "R2"
    dir = Macros.project_dir / f"{itest_type}-tests" / f"{proj}-{proj2sha[proj][:7]}"
    for file in dir.glob("*.java"):
        lines = su.io.load(file, su.io.fmts.txt_list)

        data = None
        for i in range(len(lines)):
            line_lstrip = lines[i].lstrip()
            match_itest = re_itest.fullmatch(line_lstrip)
            if match_itest is not None:
                num_givens = match_itest["givens"].count(".given")
                num_checks = match_itest["checks"].count(".check")
                itest = {
                    "source": match_itest["source"],
                    "num_givens": num_givens,
                    "num_checks": num_checks,
                    "num_vars": num_givens + num_checks,
                    "code": line_lstrip,
                    "lineno": i + 1,
                }
            if data is None:
                if match_itest is not None:
                    # starting a new stmt+itests
                    data = {
                        "project": proj,
                        "filename": file.stem,
                        "itest_type": itest_type,
                        "itests": [itest],
                        "stmt": lines[i - 1],
                        "tgt_lineno": int(match_itest["lineno"]),
                    }
            else:
                if match_itest is not None:
                    # continuing a new itests
                    data["itests"].append(itest)
                else:
                    # ending a stmt+itests
                    data["num_itests"] = len(data["itests"])
                    # using indentation to detect code context (until the next code block)
                    cur_indent = len(lines[i]) - len(line_lstrip)
                    context_lines = []
                    for j in range(i + 1, len(lines)):
                        line_lstrip_j = lines[j].lstrip()
                        if len(lines[j]) - len(line_lstrip_j) < cur_indent:
                            break
                        if line_lstrip_j.startswith("new Here"):
                            continue
                        context_lines.append(line_lstrip_j)
                    data["context_loc"] = len(context_lines)
                    data["context"] = "\n".join(context_lines)
                    data_src_stmts.append(data)
                    data = None
df_src_stmts = pd.DataFrame(data_src_stmts)

# expand the data frame to have one row per itest
data_src_itests = []
for _, row in df_src_stmts.iterrows():
    for i, itest in enumerate(row["itests"]):
        data = {**row, **itest, "itest_idx": i}
        del data["itests"]
        data_src_itests.append(data)
df_src_itests = pd.DataFrame(data_src_itests)


In [11]:
df_src_itests.sample(10, random_state=42)

Unnamed: 0,project,filename,itest_type,stmt,tgt_lineno,num_itests,context_loc,context,source,num_givens,num_checks,num_vars,code,lineno,itest_idx
468,jkuhnert_ognl,OgnlParserTokenManager,R2,} else if ((jjtoSkip[jjmat...,1537,1,3,curLexState = jjnewLexState[jjmatchedKind];\n}...,Unit,2,1,3,"new Here(""Unit"", 1537).given(jjmatchedKind, 70...",1737,0
148,Asana_java-asana,Projects,R2,"String path = String.format(""/workspac...",129,1,0,,Randoop,1,1,2,"new Here(""Randoop"", 129).given(workspace, """")....",130,0
302,hyperledger_fabric-sdk-java,ChaincodeCollectionConfiguration,R2,if (match.matches() && match.group...,289,1,14,int matchNo = Integer.parseInt(matchStingNo);\...,Unit,1,1,2,"new Here(""Unit"", 289).given(match, ""22.xml"").c...",242,0
355,jkuhnert_ognl,OgnlParserTokenManager,R2,if ((active0 & 0x40L) != 0L) {,354,2,0,,Unit,1,1,2,"new Here(""Unit"", 354).given(active0, 256L).che...",403,1
515,medcl_elasticsearch-analysis-pinyin,PinyinTokenFilter,R2,if (config.keepSeparat...,145,2,0,,Unit,2,1,3,"new Here(""Unit"", 145).given(pinyin, ""liu"").giv...",157,0
266,craftercms_core,AbstractInheritFromHierarchyMergeStrategy,R2,"k = mainDescriptorUrl.indexOf('/',...",81,1,2,descriptors.add(new MergeableDescriptor(mainDe...,Unit,2,1,3,"new Here(""Unit"", 81).given(mainDescriptorUrl, ...",73,0
72,Asana_java-asana,Tasks,R2,"String path = String.format(""/tasks/%s...",425,1,0,,Randoop,1,1,2,"new Here(""Randoop"", 425).given(task, ""/tasks/N...",426,0
81,Asana_java-asana,ProjectsBase,R2,"String path = ""/projects/{project_gid}...",160,1,1,return req;,Randoop,1,1,2,"new Here(""Randoop"", 160).given(projectGid, ""hi...",153,0
133,Asana_java-asana,CustomFields,R2,"String path = String.format(""/custom_f...",84,1,0,,Randoop,1,1,2,"new Here(""Randoop"", 84).given(customField, ""hi...",86,0
342,jkuhnert_ognl,OgnlParserTokenManager,R2,if ((active0 & 0x8000L) != 0L) {,318,2,0,,Unit,1,1,2,"new Here(""Unit"", 318).given(active0, 131072L)....",354,0
