In [2]:
import json
import os

def read_json(filename):
  with open(filename, 'r') as f:
    return json.load(f)

def write_json(filename, data):
  with open(filename, 'w') as f:
    json.dump(data, f, indent=2)

# ============== LogViz Initialize ==============
import json
import requests
def read_json(filename): 
  with open(filename, 'r') as f: return json.load(f)
LOGVIZ_CONFIG = read_json("./logviz.private.json")
WEBDIS_ENDPOINT = LOGVIZ_CONFIG["WEBDIS_API_ENDPOINT"]

# def logviz_putjson(path, data):
#   resp = requests.put(WEBDIS_ENDPOINT + "/JSON.SET/" + path + "/$", json=data)
#   return resp.json()

def logviz_getjson(path):
  resp = requests.get(WEBDIS_ENDPOINT + "/JSON.GET/" + path + "/$")
  return json.loads(resp.json()['JSON.GET'])[0]

def logviz_setjson(datakey, data):
  resp = requests.put(f"{WEBDIS_ENDPOINT}/JSON.SET/{datakey}/$", json=data)
  jsresp = resp.json()
  if "JSON.SET" in jsresp:
    return jsresp["JSON.SET"]
  raise Exception(f"logviz_set JSON.SET unexpected resp: {jsresp}")

In [7]:

  
datapaths = [
  "transmex:SUMMARY_PER_M-leetcode-byinj.tabular",
  "transmex:SUMMARY_PER_M-gfg-byinj.tabular",
  "transmex:SUMMARY_PER_M-humanevalx-byinj.tabular",
]

summ_leetcode = logviz_getjson(datapaths[0])
summ_gfg = logviz_getjson(datapaths[1])
summ_humanevalx = logviz_getjson(datapaths[2])

In [8]:
def tab_filter(tabular, row_filter):
  row_ids = tabular["row_ids"]
  tabular_data = tabular["tabular_data"]
  filtered_tabular_data = {
    "tabular_data": {},
    "row_ids": [],
    "column_ids": tabular["column_ids"]
  }
  for row_id in row_ids:
    if row_filter(row_id, tabular_data[row_id]):
      filtered_tabular_data["row_ids"].append(row_id)
      filtered_tabular_data["tabular_data"][row_id] = tabular_data[row_id]
  return filtered_tabular_data

def tab_column(tabular, column_id):
  column_data = []
  for row_id in tabular["row_ids"]:
    row = tabular["tabular_data"][row_id]
    column_data.append(row[column_id])
  return column_data

def tab_columns(tabular, *column_ids):
  columns_data = []
  for row_id in tabular["row_ids"]:
    row = tabular["tabular_data"][row_id]
    columns_data.append(tuple([row[column_id] for column_id in column_ids]))
  return columns_data

def tab_intersect(tabular1, tabular2):
  new_tabular = {
    "tabular_data": {},
    "row_ids": [],
    "column_ids": tabular1["column_ids"]
  }
  intersect_row_ids = []
  for row_id in tabular1["row_ids"]:
    if row_id in tabular2["row_ids"]:
      intersect_row_ids.append(row_id)
  new_tabular["row_ids"] = intersect_row_ids
  new_tabular["tabular_data"] = {row_id: tabular1["tabular_data"][row_id] for row_id in intersect_row_ids}
  return new_tabular

def tab_substract(tabular1, tabular2):
  new_tabular = {
    "tabular_data": {},
    "row_ids": [],
    "column_ids": tabular1["column_ids"]
  }
  subtract_row_ids = []
  for row_id in tabular1["row_ids"]:
    if row_id not in tabular2["row_ids"]:
      subtract_row_ids.append(row_id)
  new_tabular["row_ids"] = subtract_row_ids
  new_tabular["tabular_data"] = {row_id: tabular1["tabular_data"][row_id] for row_id in subtract_row_ids}
  return new_tabular

def tab_len(tabular):
  return len(tabular["row_ids"])

In [9]:
def ff_column_eq(col_id, val):
  return lambda row_id, row: row[col_id] == val

def ff_column_includes(col_id, val):
  return lambda row_id, row: val in row[col_id]

def ff_column_cond(col_id, cond):
  return lambda row_id, row: cond(row[col_id])


In [58]:

def print_tabular(tabular, keys=None, prefix=None):
  for row_id in tabular["row_ids"]:
    if prefix is not None:
      print(prefix, end=" ")
    if keys is None:
      print(row_id, tabular["tabular_data"][row_id])
    else:
      print(row_id, end=" | ")
      for key in keys:
        print(json.dumps(tabular["tabular_data"][row_id][key]), end=" | ")
      print()


def process_benchmarks(name, summ_bench, expected_extra_syserr=0, manual_classification=None):
  print("\n-------- Processing", name)
  # get all source-map applicable rows (not syntax err)
  print("total rows:", len(summ_bench["row_ids"]))
  allsrcmap_bench = tab_filter(summ_bench, ff_column_cond("symptom_cls", lambda x : "SYNTAX_ERR" not in x))
  summ_bench = None
  
  print("src mapped rows:", len(allsrcmap_bench["row_ids"]))
  notlastsound_bench = tab_filter(allsrcmap_bench, ff_column_eq("last_sound", False))
  print("transmap fail:", len(notlastsound_bench["row_ids"]))
  syserr_bench = tab_filter(allsrcmap_bench, ff_column_cond("syserr", lambda x : x != ""))
  print("syserr:", len(syserr_bench["row_ids"]))

  # for every row in allsrcmap_bench, check inside folder
  need_manual_inspection = []
  map_folder = f"../../data/transmap/tests/tempex/dynamic/{name}/_srcmap_py_js_codex0err"
  for row_id in allsrcmap_bench["row_ids"]:
    bench_id, injidx = row_id.split(".")
    bench_fdr = os.path.join(map_folder, bench_id)
    assert os.path.exists(bench_fdr), bench_fdr
    raw_stmtmap_filepath = os.path.join(bench_fdr, f"smap_linemap_raw.{injidx}.json")
    if not os.path.exists(raw_stmtmap_filepath):
      print("ERROR file not found: ", raw_stmtmap_filepath)
      continue
    stmtmap_json = read_json(raw_stmtmap_filepath)
    if stmtmap_json == "-": continue
    assert isinstance(stmtmap_json, dict), stmtmap_json
    src_anno_stmts = stmtmap_json["src_anno_stmts"]
    tar_anno_stmts = stmtmap_json["tar_anno_stmts"]
    # check that tar_anno_stmts is monotonous
    base = -1
    for stmt, idxes in tar_anno_stmts:
      if idxes is not None:
        for i in idxes:
          if i < base:
            # print("WARN: tar_anno_stmts is not monotonous!", row_id)
            need_manual_inspection.append(row_id)
            base = -2
            break
          base = i
      if base == -2: break

  assert tab_len(tab_substract(syserr_bench, notlastsound_bench)) == 0
  # filter all errors that are srcmap errors
  srcmaperr_bench = tab_filter(syserr_bench, ff_column_cond("syserr", lambda x : not (x.find("no trace difference") != -1 or x.find("difference and target DID NOT PASS") != -1)))
  logviz_setjson(f"transmex:transmap_srcmaperr_{name}.tabular", srcmaperr_bench)

  srcmaperr2_bench = tab_filter(syserr_bench, ff_column_cond("syserr", lambda x : x.find("stmt") >= 0 or x.find("from the source map") >= 0 or x.find("no corresponding line mapping") >= 0))
  extra_syserr_count = tab_len(tab_substract(srcmaperr_bench, srcmaperr2_bench))
  
  if extra_syserr_count != expected_extra_syserr:
    print("[WARNING] Need manual check:",  f"extra_syserr_count={extra_syserr_count}, expected_extra_syserr={expected_extra_syserr}")
    print_tabular(tab_substract(srcmaperr_bench, srcmaperr2_bench))
    raise Exception("Need manual check")
  
  assert tab_len(tab_substract(srcmaperr2_bench, srcmaperr_bench)) == 0
  logviz_setjson(f"transmex:transmap_srcmaperr2_{name}.tabular", srcmaperr2_bench)
  srcmaperr_bench = None
    
  neq_bench = tab_filter(srcmaperr2_bench, ff_column_cond("syserr", lambda x : (x.find("different from the source map") != -1 )))
  oob_bench = tab_filter(srcmaperr2_bench, ff_column_cond("syserr", lambda x : (x.find("Unexpected src_stmt_idx") != -1 )))

  neq_count = tab_len(neq_bench)
  oob_count = tab_len(oob_bench)
  diso_count = 0

  # print_tabular(srcmaperr2_bench, ["syserr"])
  nonskip_manual_count = 0
  for inspect_id in need_manual_inspection:
    if inspect_id in srcmaperr2_bench["row_ids"]:
      # print("[SKIP] diso already in srcmaperr2_bench:", inspect_id)
      pass
    else:
      if manual_classification is not None and inspect_id in manual_classification:
        manual_cls = manual_classification[inspect_id]
        if manual_cls == "NEQ": neq_count += 1
        elif manual_cls == "OOB": oob_count += 1
        elif manual_cls == "DISO": diso_count += 1
        elif manual_cls == "CORRECT": pass
        else: raise Exception("Unknown manual classification:", manual_cls)
      else:
        print("[MANUAL] Please manually check (non-continuous):", inspect_id)
        nonskip_manual_count += 1

  remaining = tab_substract(tab_substract(srcmaperr2_bench, neq_bench), oob_bench)
  for row_id in remaining["row_ids"]:
    if manual_classification is not None and row_id in manual_classification:
      manual_cls = manual_classification[row_id]
      if manual_cls == "NEQ": neq_count += 1
      elif manual_cls == "OOB": oob_count += 1
      elif manual_cls == "DISO": diso_count += 1
      elif manual_cls == "CORRECT": pass 
      else: raise Exception("Unknown manual classification:", manual_cls)
    else:
      print("[MANUAL] Please manually check (other errors):", row_id, json.dumps(remaining["tabular_data"][row_id]["syserr"]))
      nonskip_manual_count += 1

  print("----")
  print("neq:", neq_count)
  print("oob:", oob_count)
  print("diso:", diso_count)
  if nonskip_manual_count > 0:
    print("!!! MANUAl CHECK TO BE DONE:", nonskip_manual_count)
  print("total:", len(allsrcmap_bench["row_ids"]))
  return neq_count, oob_count, diso_count, nonskip_manual_count, tab_len(allsrcmap_bench)

def get_all_stats(manual_check_result):
  leet_neq, leet_oob, leet_diso, leet_manual, leet_total = process_benchmarks("leetcode", summ_leetcode, expected_extra_syserr=2, manual_classification=manual_check_result["leetcode"])
  gfg_neq, gfg_oob, gfg_diso, gfg_manual, gfg_total = process_benchmarks("gfg", summ_gfg, expected_extra_syserr=1, manual_classification=manual_check_result["gfg"])
  heval_neq, heval_oob, heval_diso, heval_manual, heval_total = process_benchmarks("humanevalx", summ_humanevalx, expected_extra_syserr=0, manual_classification=manual_check_result["humanevalx"])
  print("\n------- TOTAL")
  found_err = leet_neq + leet_oob + leet_diso + gfg_neq + gfg_oob + gfg_diso + heval_neq + heval_oob + heval_diso
  found_invalid = leet_neq + leet_oob + gfg_neq + gfg_oob + heval_neq + heval_oob
  found_neq = leet_neq + gfg_neq + heval_neq
  found_oob = leet_oob + gfg_oob + heval_oob
  found_diso = leet_diso + gfg_diso + heval_diso
  total_count = leet_total + gfg_total + heval_total
  print("total map Error (found) / map count / ratio:  ", found_err, total_count, found_err / total_count)
  print("total map Invalid (found) / map count / ratio:  ", found_invalid, total_count, found_invalid / total_count)
  print("total map - neq (found) / map count / ratio:  ", found_neq, total_count, found_neq / total_count)
  print("total map - oob (found) / map count / ratio:  ", found_oob, total_count, found_oob / total_count)
  print("total map - diso (found) / map count / ratio:  ", found_diso, total_count, found_diso / total_count)
  total_manual = leet_manual + gfg_manual + heval_manual
  if total_manual > 0:
    print("[!!!!] Result in-complete. total manual check needed:", total_manual)
  

get_all_stats({"leetcode": {}, "gfg": {}, "humanevalx": {}})



-------- Processing leetcode
total rows: 511
src mapped rows: 425
transmap fail: 90
syserr: 50
[MANUAL] Please manually check (non-continuous): L0008_StringtoInteger_atoi__py.2
[MANUAL] Please manually check (non-continuous): L0311_SparseMatrixMultiplication_py.0
[MANUAL] Please manually check (non-continuous): L0734_SentenceSimilarity_py.0
[MANUAL] Please manually check (non-continuous): L0734_SentenceSimilarity_py.1
[MANUAL] Please manually check (non-continuous): L1181_BeforeandAfterPuzzle_py.0
[MANUAL] Please manually check (non-continuous): L1181_BeforeandAfterPuzzle_py.1
[MANUAL] Please manually check (non-continuous): L1394_FindLuckyIntegerinanArray_py.0
[MANUAL] Please manually check (non-continuous): L1394_FindLuckyIntegerinanArray_py.1
[MANUAL] Please manually check (non-continuous): L1765_MapofHighestPeak_py.0
[MANUAL] Please manually check (non-continuous): L2316_CountUnreachablePairsofNodesinanUndirectedGraph_py.0
[MANUAL] Please manually check (non-continuous): L2318_Num

In [59]:
MANUAL_CHECK_DICT = {
  "leetcode": {
    "L0008_StringtoInteger_atoi__py.2": "CORRECT",
    "L0311_SparseMatrixMultiplication_py.0": "DISO",
    "L0734_SentenceSimilarity_py.0": "DISO",
    "L0734_SentenceSimilarity_py.1": "DISO",
    "L1181_BeforeandAfterPuzzle_py.0": "DISO",
    "L1181_BeforeandAfterPuzzle_py.1": "DISO",
    "L1394_FindLuckyIntegerinanArray_py.0": "DISO",
    "L1394_FindLuckyIntegerinanArray_py.1": "DISO",
    "L1765_MapofHighestPeak_py.0": "DISO",
    "L2316_CountUnreachablePairsofNodesinanUndirectedGraph_py.0": "CORRECT",
    "L2318_NumberofDistinctRollSequences_py.0": "CORRECT",
    "L1210_MinimumMovestoReachTargetwithRotations_py.0": "CORRECT",
    "L1883_MinimumSkipstoArriveatMeetingOnTime_py.0": "CORRECT",
    "L2052_MinimumCosttoSeparateSentenceIntoRows_py.0": "CORRECT"
  },
  "gfg": {
    "GFG_CHECK_POSSIBLE_TRANSFORM_ONE_STRING_ANOTHER_py.0": "DISO",
    "GFG_DICE_THROW_PROBLEM_1_py.0": "CORRECT",
  },
  "humanevalx": {
    "H123.0": "CORRECT"
  }
}

get_all_stats(MANUAL_CHECK_DICT)


-------- Processing leetcode
total rows: 511
src mapped rows: 425
transmap fail: 90
syserr: 50
----
neq: 5
oob: 11
diso: 8
total: 425

-------- Processing gfg
total rows: 257
src mapped rows: 241
transmap fail: 42
syserr: 33
----
neq: 18
oob: 0
diso: 1
total: 241

-------- Processing humanevalx
total rows: 73
src mapped rows: 60
transmap fail: 9
syserr: 4
----
neq: 2
oob: 0
diso: 0
total: 60

------- TOTAL
total map Error (found) / map count / ratio:   45 726 0.06198347107438017
total map Invalid (found) / map count / ratio:   36 726 0.049586776859504134
total map - neq (found) / map count / ratio:   25 726 0.03443526170798898
total map - oob (found) / map count / ratio:   11 726 0.015151515151515152
total map - diso (found) / map count / ratio:   9 726 0.012396694214876033
