In [1]:
# The dataset is located at : RQ2/toga-model-inputs-outputs.
# input.csv that's the input file where each row contains three information
# 1. focus method (the method being tested)
# 2, a test case with test assertions (you will need to remove that)
# 3) docstring (not necessary for SEER).
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import random

# Modifies data from TOGA* as specified below
Note that this will not run from the current directory unless the original data files are transferred in.

In [2]:
folder_dir = "../toga_star"

colnames = ["dataset", "project", "bug_id", "C", "T", "docstring"]
df = pd.DataFrame(columns=colnames)
folder_names = os.listdir(folder_dir)

for project in folder_names:
    project_path = os.path.join(folder_dir, project)
    temp_df = pd.read_csv(project_path + "/inputs.csv")
    temp_df.columns = ["C", "T", "docstring"]
    temp_df = temp_df.dropna()
    temp_df["dataset"] = "toga*"
    temp_df["project"] = project
    temp_df["bug_id"] = "-1"
    df = pd.concat([df, temp_df], ignore_index=True, axis=0)

df["label"] = "P"
df = df.drop("docstring", axis=1)
df["T"] = df.apply(lambda row: re.sub(r"\s*assert.*", "", row["T"]), axis=1)
for col in ["C", "T"]:
    df[col] = df[col].astype(str)
    df[col] = df.apply(lambda row: re.sub(r"\s*\/\/.*\n", "", row[col].strip()), axis=1)
    df[col] = df.apply(lambda row: re.sub(r"\s\s*", " ", row[col].strip()), axis=1)

df

Unnamed: 0,dataset,project,bug_id,C,T,label
0,toga*,commons-imaging-1.0-alpha3-src,-1,public boolean isProgressive() { return progre...,public void test21() throws Throwable { Linked...,P
1,toga*,commons-imaging-1.0-alpha3-src,-1,public int getPhysicalWidthDpi() { return phys...,public void test22() throws Throwable { Linked...,P
2,toga*,commons-imaging-1.0-alpha3-src,-1,public float getPhysicalHeightInch() { return ...,public void test23() throws Throwable { Linked...,P
3,toga*,commons-imaging-1.0-alpha3-src,-1,public int getNumberOfImages() { return number...,public void test24() throws Throwable { Linked...,P
4,toga*,commons-imaging-1.0-alpha3-src,-1,public String getMimeType() { return mimeType; },public void test25() throws Throwable { Linked...,P
...,...,...,...,...,...,...
169094,toga*,commons-dbutils-1.7,-1,public float getNullFloat() { return this.null...,public void test324() throws Throwable { Resul...,P
169095,toga*,commons-dbutils-1.7,-1,public double getNullDouble() { return this.nu...,public void test325() throws Throwable { Resul...,P
169096,toga*,commons-dbutils-1.7,-1,public short getNullShort() { return this.null...,public void test326() throws Throwable { Resul...,P
169097,toga*,commons-dbutils-1.7,-1,public int getNullInt() { return this.nullInt; },public void test327() throws Throwable { Resul...,P


In [3]:
print(df[df["project"] == "async-http-client"].reset_index().loc[1]["T"])

public void test0() throws Throwable { RateLimitedThrottleRequestFilter rateLimitedThrottleRequestFilter0 = new RateLimitedThrottleRequestFilter(Integer.MAX_VALUE, Integer.MAX_VALUE, (-1)); FilterContext<String> filterContext0 = (FilterContext<String>) mock(FilterContext.class, new ViolatedAssumptionAnswer()); doReturn((AsyncHandler) null, (AsyncHandler) null).when(filterContext0).getAsyncHandler(); doReturn((IOException) null).when(filterContext0).getIOException(); doReturn((Request) null).when(filterContext0).getRequest(); doReturn((HttpResponseStatus) null).when(filterContext0).getResponseStatus(); doReturn(false).when(filterContext0).replayRequest(); try { rateLimitedThrottleRequestFilter0.filter((FilterContext<String>) filterContext0); fail("Expecting exception: NullPointerException"); } catch(NullPointerException e) { verifyException("org.asynchttpclient.filter.ReleasePermitOnComplete", e); } }


# Removing try catch blocks and making failing tests

In [4]:
r_try = r"try\s*{"
r_fail = r"fail\([^;]*;\s*}"
r_except = r"catch\([^}]*\s*}"

try_except_regex = [r_try, r_fail, r_except]
df_try = df

In [5]:
for regex in try_except_regex:
    df_try["label"] = df_try.apply(
        lambda row: "F" if (re.search(regex, row["T"]) != None) else row["label"],
        axis=1,
    )
    df_try["T"] = df_try.apply(lambda row: re.sub(regex, "", row["T"]), axis=1)

In [6]:
df_try.to_json("./triplets/triplets.json", orient="index", indent=4)

In [7]:
print(df_try[df_try["project"] == "async-http-client"].reset_index().loc[1]["T"])

public void test0() throws Throwable { RateLimitedThrottleRequestFilter rateLimitedThrottleRequestFilter0 = new RateLimitedThrottleRequestFilter(Integer.MAX_VALUE, Integer.MAX_VALUE, (-1)); FilterContext<String> filterContext0 = (FilterContext<String>) mock(FilterContext.class, new ViolatedAssumptionAnswer()); doReturn((AsyncHandler) null, (AsyncHandler) null).when(filterContext0).getAsyncHandler(); doReturn((IOException) null).when(filterContext0).getIOException(); doReturn((Request) null).when(filterContext0).getRequest(); doReturn((HttpResponseStatus) null).when(filterContext0).getResponseStatus(); doReturn(false).when(filterContext0).replayRequest();  rateLimitedThrottleRequestFilter0.filter((FilterContext<String>) filterContext0);   }
