### _logviz_db init

In [1]:
# ============== LogViz Initialize ==============
import json
import requests
def read_json(filename): 
  with open(filename, 'r') as f: return json.load(f)
LOGVIZ_CONFIG = read_json("./logviz.private.json")
WEBDIS_ENDPOINT = LOGVIZ_CONFIG["WEBDIS_API_ENDPOINT"]

# def logviz_putjson(path, data):
#   resp = requests.put(WEBDIS_ENDPOINT + "/JSON.SET/" + path + "/$", json=data)
#   return resp.json()

def logviz_getjson(path):
  resp = requests.get(WEBDIS_ENDPOINT + "/JSON.GET/" + path + "/$")
  return json.loads(resp.json()['JSON.GET'])[0]

# ============== Jupyter Viz Initialize ==============
from IPython.display import HTML, Markdown, display
display(HTML("<h2 style='color:green;'><i>IPython.display</i> is working</h2>"))
# %pip install json2html
# %pip install pandas
# %pip install matplotlib
import json2html
# import pandas as pd
import matplotlib.pyplot as plt

def display_dict(datadict):
  html = json2html.json2html.convert(datadict)
  display(HTML(html))

def display_md(md_str):
  display(Markdown(md_str))

In [3]:
stats_h = logviz_getjson("transmex:SUMMARY_PER_M-humanevalx-byinj.stats")["scalar"]
stats_g = logviz_getjson("transmex:SUMMARY_PER_M-gfg-byinj.stats")["scalar"]
stats_l = logviz_getjson("transmex:SUMMARY_PER_M-leetcode-byinj.stats")["scalar"]
display_md("## HumanEvalX")
display_dict(stats_h)
display_md("## GFG")
display_dict(stats_g)
display_md("## LeetCode")
display_dict(stats_l)

## HumanEvalX

0,1
total_c,73.0
TOFIX_succ,4.0
TOFIX_unknown,0.0
TOFIX_syserr,4.0
err_total_c,69.0
err_syntax_c,13.0
err_sem_c,56.0
err_syntax_decl_c,0.0
err_syntax_tokif_c,1.0
err_syntax_tokfor_c,3.0


## GFG

0,1
total_c,257.0
TOFIX_succ,21.0
TOFIX_unknown,0.0
TOFIX_syserr,33.0
err_total_c,236.0
err_syntax_c,16.0
err_sem_c,219.0
err_syntax_decl_c,0.0
err_syntax_tokif_c,8.0
err_syntax_tokfor_c,3.0


## LeetCode

0,1
total_c,511.0
TOFIX_succ,51.0
TOFIX_unknown,0.0
TOFIX_syserr,50.0
err_total_c,460.0
err_syntax_c,86.0
err_sem_c,373.0
err_syntax_decl_c,2.0
err_syntax_tokif_c,41.0
err_syntax_tokfor_c,12.0


### Stat

#### Set threshold of syntax minor

In [4]:
threshold = 5

#### Leetcode

In [5]:
import jellyfish
tabular_l = logviz_getjson("transmex:SUMMARY_PER_M-leetcode-byinj.tabular")
tab_l = tabular_l["tabular_data"]

filter_from_dict_f = lambda d, f: {k:d[k] for k in d if f(d[k])}
# filter
has_symptom_filter = lambda x: not ("SUCCESS" in x["symptom_cls"] or "-UNKNOWN-" in x["symptom_cls"])
syntax_err_filter = lambda x: "SYNTAX_ERR" in x["symptom_cls"]

hassymp_data = filter_from_dict_f(tab_l, has_symptom_filter)
syntaxerr_data = filter_from_dict_f(hassymp_data, syntax_err_filter)

minor_list = []
confused_list = []

for err in syntaxerr_data.keys():
  index, inject_pos = err.split('.')
  inject_pos = int(inject_pos)

  with open(f'../../data/transmap/tests/evalex/leetcode/py_js_codex0err/{index}/injects.json', 'r') as f:
    json_list = json.load(f)
  
  inject = json_list[inject_pos]
  content = inject['content']
  replacement = inject['replacement']
  levenshtein_distance = jellyfish.levenshtein_distance(content, replacement)

  if levenshtein_distance <= threshold:
    minor_list.append(index)
  else:
    confused_list.append(index)

minor_num_l = len(minor_list)
confused_num_l = len(confused_list)

print(f"minor: {minor_num_l}")
print(f"confused: {confused_num_l}")

minor: 16
confused: 70


In [6]:
print("""
% ---------------- Leetcode
% ---------------- overview
\\newcommand{\\ltotal}{L_TOTAL_C}
\\newcommand{\\ltotalerr}{L_TOTAL_ERR_C}
\\newcommand{\\lsynerr}{L_SYN_ERR}
\\newcommand{\\lsemerr}{L_SEM_ERR}
% ---------------- baseline
\\newcommand{\\lbaselinesucc}{L_SIMP_YES_C}
\\newcommand{\\lbaselinesuccratio}{L_SIMP_YES_RATIO}
% ---------------- transmap
\\newcommand{\\ltransmapsucc}{L_LAST_SOUND_C}
\\newcommand{\\ltransmapsuccratio}{L_SUCC_RATIO}
\\newcommand{\\ltransmapsuccwhenbaselinefail}{L_SYMPTOM_NO_SOUND_C}
\\newcommand{\\ltransmapsuccwhenbaselinefailratio}{L_SYMPTOM_NO_SUCC_RATIO}
\\newcommand{\\lmismatcherrsucc}{L_MISMATCH_SOUND_C}
\\newcommand{\\lmismatcherrsuccratio}{L_MISMATCH_SUCC_RATIO}
\\newcommand{\\lsuspiciouslinesizeavg}{L_LC_AVG}
\\newcommand{\\lsuspiciousratioavg}{L_LC_RATIO_AVG}
% ---------------- syntax_err_type
\\newcommand{\\lsynminor}{L_SYN_MINOR_C}
\\newcommand{\\lsynconfused}{L_SYN_CONFUSED_C}
% ---------------- sem_err_type
\\newcommand{\\lruntime}{L_RUNTIME_C}
\\newcommand{\\lruntimeref}{L_RUNTIME_REF_C}
\\newcommand{\\lruntimetype}{L_RUNTIME_TYPE_C}
\\newcommand{\\lruntimeother}{L_RUNTIME_OTHER_C}
\\newcommand{\\lmismatch}{L_MISMATCH_C}
"""
.replace("L_TOTAL_C", str(stats_l["total_c"]))
.replace("L_TOTAL_ERR_C", str(stats_l["err_syntax_c"]+stats_l["err_sem_c"]))
.replace("L_SYN_ERR", str(stats_l["err_syntax_c"]))
.replace("L_SEM_ERR", str(stats_l["err_sem_c"]))

.replace("L_SIMP_YES_C", str(stats_l["symptom_yes_c"]))
.replace("L_SIMP_YES_RATIO", "{:.2%}".format(stats_l["symptom_yes_c"]/stats_l["err_sem_c"]).replace('%','\%'))

.replace("L_LAST_SOUND_C", str(stats_l["diag_last_sound_c"]))
.replace("L_SUCC_RATIO", "{:.2%}".format(stats_l["diag_succ_ratio"]).replace('%','\%'))
.replace("L_SYMPTOM_NO_SOUND_C", str(stats_l["symptom_no_diag_sound_c"]))
.replace("L_SYMPTOM_NO_SUCC_RATIO", "{:.2%}".format(stats_l["diag_symno_ratio"]).replace('%','\%'))
.replace("L_MISMATCH_SOUND_C", str(stats_l["err_mism_diag_sound_c"]))
.replace("L_MISMATCH_SUCC_RATIO", "{:.2%}".format(stats_l["diag_mism_ratio"]).replace('%','\%'))
.replace("L_LC_AVG", "{:.3}".format(stats_l["diag_lc_avg"]))
.replace("L_LC_RATIO_AVG", "{:.2%}".format(stats_l["diag_ratio_avg"]).replace('%','\%'))

.replace("L_SYN_MINOR_C", str(minor_num_l))
.replace("L_SYN_CONFUSED_C", str(confused_num_l))

.replace("L_RUNTIME_C", str(stats_l["err_runtime_c"]))
.replace("L_RUNTIME_REF_C", str(stats_l["err_runtime_ref_c"]))
.replace("L_RUNTIME_TYPE_C", str(stats_l["err_runtime_type_c"]))
.replace("L_RUNTIME_OTHER_C", str(stats_l["err_runtime_c"]-stats_l["err_runtime_ref_c"]-stats_l["err_runtime_type_c"]))
.replace("L_MISMATCH_C", str(stats_l["err_mismatch_c"]))
)


% ---------------- Leetcode
% ---------------- overview
\newcommand{\ltotal}{511}
\newcommand{\ltotalerr}{459}
\newcommand{\lsynerr}{86}
\newcommand{\lsemerr}{373}
% ---------------- baseline
\newcommand{\lbaselinesucc}{157}
\newcommand{\lbaselinesuccratio}{42.09\%}
% ---------------- transmap
\newcommand{\ltransmapsucc}{327}
\newcommand{\ltransmapsuccratio}{87.67\%}
\newcommand{\ltransmapsuccwhenbaselinefail}{178}
\newcommand{\ltransmapsuccwhenbaselinefailratio}{82.41\%}
\newcommand{\lmismatcherrsucc}{159}
\newcommand{\lmismatcherrsuccratio}{81.96\%}
\newcommand{\lsuspiciouslinesizeavg}{1.31}
\newcommand{\lsuspiciousratioavg}{7.23\%}
% ---------------- syntax_err_type
\newcommand{\lsynminor}{16}
\newcommand{\lsynconfused}{70}
% ---------------- sem_err_type
\newcommand{\lruntime}{179}
\newcommand{\lruntimeref}{118}
\newcommand{\lruntimetype}{58}
\newcommand{\lruntimeother}{3}
\newcommand{\lmismatch}{194}



#### GFG

In [7]:
import jellyfish
tabular_g = logviz_getjson("transmex:SUMMARY_PER_M-gfg-byinj.tabular")
tab_g = tabular_g["tabular_data"]

filter_from_dict_f = lambda d, f: {k:d[k] for k in d if f(d[k])}
# filter
has_symptom_filter = lambda x: not ("SUCCESS" in x["symptom_cls"] or "-UNKNOWN-" in x["symptom_cls"])
syntax_err_filter = lambda x: "SYNTAX_ERR" in x["symptom_cls"]

hassymp_data = filter_from_dict_f(tab_g, has_symptom_filter)
syntaxerr_data = filter_from_dict_f(hassymp_data, syntax_err_filter)

minor_list = []
confused_list = []

for err in syntaxerr_data.keys():
  index, inject_pos = err.split('.')
  inject_pos = int(inject_pos)

  with open(f'../../data/transmap/tests/evalex/gfg/py_js_codex0err/{index}/injects.json', 'r') as f:
    json_list = json.load(f)
  
  inject = json_list[inject_pos]
  content = inject['content']
  replacement = inject['replacement']
  levenshtein_distance = jellyfish.levenshtein_distance(content, replacement)

  if levenshtein_distance <= threshold:
    minor_list.append(index)
  else:
    confused_list.append(index)

minor_num_g = len(minor_list)
confused_num_g = len(confused_list)

print(f"minor: {minor_num_g}")
print(f"confused: {confused_num_g}")

minor: 2
confused: 14


In [8]:
print("""
% ---------------- GFG
% ---------------- overview
\\newcommand{\\gtotal}{GFG_TOTAL_C}
\\newcommand{\\gtotalerr}{GFG_TOTAL_ERR_C}
\\newcommand{\\gsynerr}{GFG_SYN_ERR}
\\newcommand{\\gsemerr}{GFG_SEM_ERR}
% ---------------- baseline
\\newcommand{\\gbaselinesucc}{GFG_SIMP_YES_C}
\\newcommand{\\gbaselinesuccratio}{GFG_SIMP_YES_RATIO}
% ---------------- transmap
\\newcommand{\\gtransmapsucc}{GFG_LAST_SOUND_C}
\\newcommand{\\gtransmapsuccratio}{GFG_SUCC_RATIO}
\\newcommand{\\gtransmapsuccwhenbaselinefail}{GFG_SYMPTOM_NO_SOUND_C}
\\newcommand{\\gtransmapsuccwhenbaselinefailratio}{GFG_SYMPTOM_NO_SUCC_RATIO}
\\newcommand{\\gmismatcherrsucc}{GFG_MISMATCH_SOUND_C}
\\newcommand{\\gmismatcherrsuccratio}{GFG_MISMATCH_SUCC_RATIO}
\\newcommand{\\gsuspiciouslinesizeavg}{GFG_LC_AVG}
\\newcommand{\\gsuspiciousratioavg}{GFG_LC_RATIO_AVG}
% ---------------- syntax_err_type
\\newcommand{\\gsynminor}{GFG_SYN_MINOR_C}
\\newcommand{\\gsynconfused}{GFG_SYN_CONFUSED_C}
% ---------------- sem_err_type
\\newcommand{\\gruntime}{GFG_RUNTIME_C}
\\newcommand{\\gruntimeref}{GFG_RUNTIME_REF_C}
\\newcommand{\\gruntimetype}{GFG_RUNTIME_TYPE_C}
\\newcommand{\\gruntimeother}{GFG_RUNTIME_OTHER_C}
\\newcommand{\\gmismatch}{GFG_MISMATCH_C}
"""
.replace("GFG_TOTAL_C", str(stats_g["total_c"]))
.replace("GFG_TOTAL_ERR_C", str(stats_g["err_syntax_c"]+stats_g["err_sem_c"]))
.replace("GFG_SYN_ERR", str(stats_g["err_syntax_c"]))
.replace("GFG_SEM_ERR", str(stats_g["err_sem_c"]))

.replace("GFG_SIMP_YES_C", str(stats_g["symptom_yes_c"]))
.replace("GFG_SIMP_YES_RATIO", "{:.2%}".format(stats_g["symptom_yes_c"]/stats_g["err_sem_c"]).replace('%','\%'))

.replace("GFG_LAST_SOUND_C", str(stats_g["diag_last_sound_c"]))
.replace("GFG_SUCC_RATIO", "{:.2%}".format(stats_g["diag_succ_ratio"]).replace('%','\%'))
.replace("GFG_SYMPTOM_NO_SOUND_C", str(stats_g["symptom_no_diag_sound_c"]))
.replace("GFG_SYMPTOM_NO_SUCC_RATIO", "{:.2%}".format(stats_g["diag_symno_ratio"]).replace('%','\%'))
.replace("GFG_MISMATCH_SOUND_C", str(stats_g["err_mism_diag_sound_c"]))
.replace("GFG_MISMATCH_SUCC_RATIO", "{:.2%}".format(stats_g["diag_mism_ratio"]).replace('%','\%'))
.replace("GFG_LC_AVG", "{:.3}".format(stats_g["diag_lc_avg"]))
.replace("GFG_LC_RATIO_AVG", "{:.2%}".format(stats_g["diag_ratio_avg"]).replace('%','\%'))

.replace("GFG_SYN_MINOR_C", str(minor_num_g))
.replace("GFG_SYN_CONFUSED_C", str(confused_num_g))

.replace("GFG_RUNTIME_C", str(stats_g["err_runtime_c"]))
.replace("GFG_RUNTIME_REF_C", str(stats_g["err_runtime_ref_c"]))
.replace("GFG_RUNTIME_TYPE_C", str(stats_g["err_runtime_type_c"]))
.replace("GFG_RUNTIME_OTHER_C", str(stats_g["err_runtime_c"]-stats_g["err_runtime_ref_c"]-stats_g["err_runtime_type_c"]))
.replace("GFG_MISMATCH_C", str(stats_g["err_mismatch_c"]))
)


% ---------------- GFG
% ---------------- overview
\newcommand{\gtotal}{257}
\newcommand{\gtotalerr}{235}
\newcommand{\gsynerr}{16}
\newcommand{\gsemerr}{219}
% ---------------- baseline
\newcommand{\gbaselinesucc}{86}
\newcommand{\gbaselinesuccratio}{39.27\%}
% ---------------- transmap
\newcommand{\gtransmapsucc}{194}
\newcommand{\gtransmapsuccratio}{88.58\%}
\newcommand{\gtransmapsuccwhenbaselinefail}{115}
\newcommand{\gtransmapsuccwhenbaselinefailratio}{86.47\%}
\newcommand{\gmismatcherrsucc}{100}
\newcommand{\gmismatcherrsuccratio}{88.50\%}
\newcommand{\gsuspiciouslinesizeavg}{1.11}
\newcommand{\gsuspiciousratioavg}{7.44\%}
% ---------------- syntax_err_type
\newcommand{\gsynminor}{2}
\newcommand{\gsynconfused}{14}
% ---------------- sem_err_type
\newcommand{\gruntime}{106}
\newcommand{\gruntimeref}{52}
\newcommand{\gruntimetype}{54}
\newcommand{\gruntimeother}{0}
\newcommand{\gmismatch}{113}



#### HumanEvalX

In [9]:
import jellyfish
tabular_h = logviz_getjson("transmex:SUMMARY_PER_M-humanevalx-byinj.tabular")
tab_h = tabular_h["tabular_data"]

filter_from_dict_f = lambda d, f: {k:d[k] for k in d if f(d[k])}
# filter
has_symptom_filter = lambda x: not ("SUCCESS" in x["symptom_cls"] or "-UNKNOWN-" in x["symptom_cls"])
syntax_err_filter = lambda x: "SYNTAX_ERR" in x["symptom_cls"]

hassymp_data = filter_from_dict_f(tab_h, has_symptom_filter)
syntaxerr_data = filter_from_dict_f(hassymp_data, syntax_err_filter)

minor_list = []
confused_list = []

for err in syntaxerr_data.keys():
  index, inject_pos = err.split('.')
  inject_pos = int(inject_pos)

  with open(f'../../data/transmap/tests/evalex/humanevalx/py_js_codex0err/{index}/injects.json', 'r') as f:
    json_list = json.load(f)
  
  inject = json_list[inject_pos]
  content = inject['content']
  replacement = inject['replacement']
  levenshtein_distance = jellyfish.levenshtein_distance(content, replacement)

  if levenshtein_distance <= threshold:
    minor_list.append(index)
  else:
    confused_list.append(index)

minor_num_h = len(minor_list)
confused_num_h = len(confused_list)

print(f"minor: {minor_num_h}")
print(f"confused: {confused_num_h}")

minor: 2
confused: 11


In [10]:
print("""
% ---------------- HumanEvalX
% ---------------- overview
\\newcommand{\\htotal}{HUM_TOTAL_C}
\\newcommand{\\htotalerr}{HUM_TOTAL_ERR_C}
\\newcommand{\\hsynerr}{HUM_SYN_ERR}
\\newcommand{\\hsemerr}{HUM_SEM_ERR}
% ---------------- baseline
\\newcommand{\\hbaselinesucc}{HUM_SIMP_YES_C}
\\newcommand{\\hbaselinesuccratio}{HUM_SIMP_YES_RATIO}
% ---------------- transmap
\\newcommand{\\htransmapsucc}{HUM_LAST_SOUND_C}
\\newcommand{\\htransmapsuccratio}{HUM_SUCC_RATIO}
\\newcommand{\\htransmapsuccwhenbaselinefail}{HUM_SYMPTOM_NO_SOUND_C}
\\newcommand{\\htransmapsuccwhenbaselinefailratio}{HUM_SYMPTOM_NO_SUCC_RATIO}
\\newcommand{\\hmismatcherrsucc}{HUM_MISMATCH_SOUND_C}
\\newcommand{\\hmismatcherrsuccratio}{HUM_MISMATCH_SUCC_RATIO}
\\newcommand{\\hsuspiciouslinesizeavg}{HUM_LC_AVG}
\\newcommand{\\hsuspiciousratioavg}{HUM_LC_RATIO_AVG}
% ---------------- syntax_err_type
\\newcommand{\\hsynminor}{HUM_SYN_MINOR_C}
\\newcommand{\\hsynconfused}{HUM_SYN_CONFUSED_C}
% ---------------- sem_err_type
\\newcommand{\\hruntime}{HUM_RUNTIME_C}
\\newcommand{\\hruntimeref}{HUM_RUNTIME_REF_C}
\\newcommand{\\hruntimetype}{HUM_RUNTIME_TYPE_C}
\\newcommand{\\hruntimeother}{HUM_RUNTIME_OTHER_C}
\\newcommand{\\hmismatch}{HUM_MISMATCH_C}
"""
.replace("HUM_TOTAL_C", str(stats_h["total_c"]))
.replace("HUM_TOTAL_ERR_C", str(stats_h["err_syntax_c"]+stats_h["err_sem_c"]))
.replace("HUM_SYN_ERR", str(stats_h["err_syntax_c"]))
.replace("HUM_SEM_ERR", str(stats_h["err_sem_c"]))

.replace("HUM_SIMP_YES_C", str(stats_h["symptom_yes_c"]))
.replace("HUM_SIMP_YES_RATIO", "{:.2%}".format(stats_h["symptom_yes_c"]/stats_h["err_sem_c"]).replace('%','\%'))

.replace("HUM_LAST_SOUND_C", str(stats_h["diag_last_sound_c"]))
.replace("HUM_SUCC_RATIO", "{:.2%}".format(stats_h["diag_succ_ratio"]).replace('%','\%'))
.replace("HUM_SYMPTOM_NO_SOUND_C", str(stats_h["symptom_no_diag_sound_c"]))
.replace("HUM_SYMPTOM_NO_SUCC_RATIO", "{:.2%}".format(stats_h["diag_symno_ratio"]).replace('%','\%'))
.replace("HUM_MISMATCH_SOUND_C", str(stats_h["err_mism_diag_sound_c"]))
.replace("HUM_MISMATCH_SUCC_RATIO", "{:.2%}".format(stats_h["diag_mism_ratio"]).replace('%','\%'))
.replace("HUM_LC_AVG", "{:.3}".format(stats_h["diag_lc_avg"]))
.replace("HUM_LC_RATIO_AVG", "{:.2%}".format(stats_h["diag_ratio_avg"]).replace('%','\%'))

.replace("HUM_SYN_MINOR_C", str(minor_num_h))
.replace("HUM_SYN_CONFUSED_C", str(confused_num_h))

.replace("HUM_RUNTIME_C", str(stats_h["err_runtime_c"]))
.replace("HUM_RUNTIME_REF_C", str(stats_h["err_runtime_ref_c"]))
.replace("HUM_RUNTIME_TYPE_C", str(stats_h["err_runtime_type_c"]))
.replace("HUM_RUNTIME_OTHER_C", str(stats_h["err_runtime_c"]-stats_h["err_runtime_ref_c"]-stats_h["err_runtime_type_c"]))
.replace("HUM_MISMATCH_C", str(stats_h["err_mismatch_c"]))
)


% ---------------- HumanEvalX
% ---------------- overview
\newcommand{\htotal}{73}
\newcommand{\htotalerr}{69}
\newcommand{\hsynerr}{13}
\newcommand{\hsemerr}{56}
% ---------------- baseline
\newcommand{\hbaselinesucc}{13}
\newcommand{\hbaselinesuccratio}{23.21\%}
% ---------------- transmap
\newcommand{\htransmapsucc}{49}
\newcommand{\htransmapsuccratio}{87.50\%}
\newcommand{\htransmapsuccwhenbaselinefail}{38}
\newcommand{\htransmapsuccwhenbaselinefailratio}{88.37\%}
\newcommand{\hmismatcherrsucc}{36}
\newcommand{\hmismatcherrsuccratio}{87.80\%}
\newcommand{\hsuspiciouslinesizeavg}{1.18}
\newcommand{\hsuspiciousratioavg}{10.90\%}
% ---------------- syntax_err_type
\newcommand{\hsynminor}{2}
\newcommand{\hsynconfused}{11}
% ---------------- sem_err_type
\newcommand{\hruntime}{15}
\newcommand{\hruntimeref}{3}
\newcommand{\hruntimetype}{11}
\newcommand{\hruntimeother}{1}
\newcommand{\hmismatch}{41}



#### Overall

In [11]:
syn_err = stats_l["err_syntax_c"] + stats_g["err_syntax_c"] + stats_h["err_syntax_c"]
sem_err = stats_l["err_sem_c"] + stats_g["err_sem_c"] + stats_h["err_sem_c"]
all_err = syn_err + sem_err

syn_ratio = syn_err / all_err
sem_ratio = sem_err / all_err
synminor = minor_num_l + minor_num_g + minor_num_h
synminor_ratio = synminor / all_err
synconfused = confused_num_l + confused_num_g + confused_num_h
synconfused_ratio = synconfused / all_err
synconfused_insyn_ratio = synconfused / syn_err

runtime = stats_l["err_runtime_c"] + stats_g["err_runtime_c"] + stats_h["err_runtime_c"]
runtime_ratio = runtime / all_err
runtimeref = stats_l["err_runtime_ref_c"] + stats_g["err_runtime_ref_c"] + stats_h["err_runtime_ref_c"]
runtimeref_ratio = runtimeref / all_err
runtimetype = stats_l["err_runtime_type_c"] + stats_g["err_runtime_type_c"] + stats_h["err_runtime_type_c"]
runtimetype_ratio = runtimetype / all_err
runtimeother = runtime - runtimeref - runtimetype
runtimeother_ratio = runtimeother / all_err
mismatch = stats_l["err_mismatch_c"] + stats_g["err_mismatch_c"] + stats_h["err_mismatch_c"]
mismatch_ratio = mismatch / all_err
mismatch_insem_ratio = mismatch / sem_err

avg_baselinesuccratio = ( 
    (stats_l["symptom_yes_c"] + stats_g["symptom_yes_c"] + stats_h["symptom_yes_c"]) / 
    (stats_l["err_sem_c"] + stats_g["err_sem_c"] + stats_h["err_sem_c"]) )
avg_transmapsuccratio = (
    (stats_l["diag_last_sound_c"] + stats_g["diag_last_sound_c"] + stats_h["diag_last_sound_c"]) /
    (stats_l["err_sem_c"] + stats_g["err_sem_c"] + stats_h["err_sem_c"]) )
avg_transmapsuccwhenbaselinefailratio = (
    (stats_l["symptom_no_diag_sound_c"] + stats_g["symptom_no_diag_sound_c"] + stats_h["symptom_no_diag_sound_c"]) / 
    (stats_l["symptom_no_c"] + stats_g["symptom_no_c"] + stats_h["symptom_no_c"]) )
avg_mismatcherrsuccratio = (
    (stats_l["err_mism_diag_sound_c"] + stats_g["err_mism_diag_sound_c"] + stats_h["err_mism_diag_sound_c"]) / 
    (stats_l["err_mismatch_c"] + stats_g["err_mismatch_c"] + stats_h["err_mismatch_c"]) )
avg_suspiciouslinesizeavg = (
    (stats_l["diag_lc_avg"]*stats_l["diag_last_sound_c"] + stats_g["diag_lc_avg"]*stats_g["diag_last_sound_c"] + stats_h["diag_lc_avg"]*stats_h["diag_last_sound_c"]) / 
    (stats_l["diag_last_sound_c"] + stats_g["diag_last_sound_c"] + stats_h["diag_last_sound_c"]) )
avg_suspiciousratioavg = (
    (stats_l["diag_ratio_avg"]*stats_l["diag_last_sound_c"] + stats_g["diag_ratio_avg"]*stats_g["diag_last_sound_c"] + stats_h["diag_ratio_avg"]*stats_h["diag_last_sound_c"]) /
    (stats_l["diag_last_sound_c"] + stats_g["diag_last_sound_c"] + stats_h["diag_last_sound_c"])
)

In [12]:
print("""
% ---------------- Overall
\\newcommand{\\totalerr}{OVERALL_ERR_C}
% ---------------- syntax_err_type
\\newcommand{\\synerr}{OVERALL_SYN_ERR_C}
\\newcommand{\\synerrratio}{OVERALL_SYN_ERR_RATIO}
\\newcommand{\\synminor}{OVERALL_SYN_MINOR_C}
\\newcommand{\\synminorratio}{OVERALL_SYN_MINOR_RATIO}
\\newcommand{\\synconfused}{OVERALL_SYN_CONFUSED_C}
\\newcommand{\\synconfusedratio}{OVERALL_SYN_CONFUSED_RATIO}
\\newcommand{\\synconfusedinsynerrratio}{OVERALL_SYN_CONFUSED_IN_SYN_RATIO}
% ---------------- sem_err_type
\\newcommand{\\semerr}{OVERALL_SEM_ERR_C}
\\newcommand{\\semerrratio}{OVERALL_SEM_ERR_RATIO}

\\newcommand{\\runtime}{OVERALL_RUNTIME_C}
\\newcommand{\\runtimeratio}{OVERALL_RUNTIME_RATIO}
\\newcommand{\\runtimeref}{OVERALL_RUNTIME_REF_C}
\\newcommand{\\runtimerefratio}{OVERALL_RUNTIME_REF_RATIO}
\\newcommand{\\runtimetype}{OVERALL_RUNTIME_TYPE_C}
\\newcommand{\\runtimetyperatio}{OVERALL_RUNTIME_TYPE_RATIO}
\\newcommand{\\runtimeother}{OVERALL_RUNTIME_OTHER_C}
\\newcommand{\\runtimeotherratio}{OVERALL_RUNTIME_OTHER_RATIO}

\\newcommand{\\mismatch}{OVERALL_MISMATCH_C}
\\newcommand{\\mismatchratio}{OVERALL_MISMATCH_RATIO}
\\newcommand{\\mismatchinsemratio}{OVERALL_MISMATCH_SEM_RATIO}
% ---------------- avg_statistics_benchmark(table performance)
\\newcommand{\\avgbaselinesuccratio}{AVG_SIMP_YES_RATIO}
\\newcommand{\\avgtransmapsuccratio}{AVG_SUCC_RATIO}
\\newcommand{\\avgtransmapsuccwhenbaselinefailratio}{AVG_SYMPTOM_NO_SUCC_RATIO}
\\newcommand{\\avgmismatcherrsuccratio}{AVG_MISMATCH_SUCC_RATIO}
\\newcommand{\\avgsuspiciouslinesizeavg}{AVG_LC_AVG}
\\newcommand{\\avgsuspiciousratioavg}{AVG_RATIO_AVG}
"""
.replace("OVERALL_ERR_C", str(all_err))

.replace("OVERALL_SYN_ERR_C", str(syn_err))
.replace("OVERALL_SYN_ERR_RATIO", "{:.2%}".format(syn_ratio).replace('%','\%'))
.replace("OVERALL_SYN_MINOR_C", str(synminor))
.replace("OVERALL_SYN_MINOR_RATIO", "{:.2%}".format(synminor_ratio).replace('%','\%'))
.replace("OVERALL_SYN_CONFUSED_C", str(synconfused))
.replace("OVERALL_SYN_CONFUSED_RATIO", "{:.2%}".format(synconfused_ratio).replace('%','\%'))
.replace("OVERALL_SYN_CONFUSED_IN_SYN_RATIO", "{:.2%}".format(synconfused_insyn_ratio).replace('%','\%'))

.replace("OVERALL_SEM_ERR_C", str(sem_err))
.replace("OVERALL_SEM_ERR_RATIO", "{:.2%}".format(sem_ratio).replace('%','\%'))

.replace("OVERALL_RUNTIME_C", str(runtime))
.replace("OVERALL_RUNTIME_RATIO", "{:.2%}".format(runtime_ratio).replace('%','\%'))
.replace("OVERALL_RUNTIME_REF_C", str(runtimeref))
.replace("OVERALL_RUNTIME_REF_RATIO", "{:.2%}".format(runtimeref_ratio).replace('%','\%'))
.replace("OVERALL_RUNTIME_TYPE_C", str(runtimetype))
.replace("OVERALL_RUNTIME_TYPE_RATIO", "{:.2%}".format(runtimetype_ratio).replace('%','\%'))
.replace("OVERALL_RUNTIME_OTHER_C", str(runtimeother))
.replace("OVERALL_RUNTIME_OTHER_RATIO", "{:.2%}".format(runtimeother_ratio).replace('%','\%'))

.replace("OVERALL_MISMATCH_C", str(mismatch))
.replace("OVERALL_MISMATCH_RATIO", "{:.2%}".format(mismatch_ratio).replace('%','\%'))
.replace("OVERALL_MISMATCH_SEM_RATIO", "{:.2%}".format(mismatch_insem_ratio).replace('%','\%'))
                                                                           
.replace("AVG_SIMP_YES_RATIO", "{:.2%}".format(avg_baselinesuccratio).replace("%", "\%"))
.replace("AVG_SUCC_RATIO", "{:.2%}".format(avg_transmapsuccratio).replace("%", "\%"))
.replace("AVG_SYMPTOM_NO_SUCC_RATIO", "{:.2%}".format(avg_transmapsuccwhenbaselinefailratio).replace("%", "\%"))
.replace("AVG_MISMATCH_SUCC_RATIO", "{:.2%}".format(avg_mismatcherrsuccratio).replace("%", "\%"))
.replace("AVG_LC_AVG", "{:.2f}".format(avg_suspiciouslinesizeavg))
.replace("AVG_RATIO_AVG", "{:.2%}".format(avg_suspiciousratioavg).replace("%", "\%"))
)


% ---------------- Overall
\newcommand{\totalerr}{763}
% ---------------- syntax_err_type
\newcommand{\synerr}{115}
\newcommand{\synerrratio}{15.07\%}
\newcommand{\synminor}{20}
\newcommand{\synminorratio}{2.62\%}
\newcommand{\synconfused}{95}
\newcommand{\synconfusedratio}{12.45\%}
\newcommand{\synconfusedinsynerrratio}{82.61\%}
% ---------------- sem_err_type
\newcommand{\semerr}{648}
\newcommand{\semerrratio}{84.93\%}

\newcommand{\runtime}{300}
\newcommand{\runtimeratio}{39.32\%}
\newcommand{\runtimeref}{173}
\newcommand{\runtimerefratio}{22.67\%}
\newcommand{\runtimetype}{123}
\newcommand{\runtimetyperatio}{16.12\%}
\newcommand{\runtimeother}{4}
\newcommand{\runtimeotherratio}{0.52\%}

\newcommand{\mismatch}{348}
\newcommand{\mismatchratio}{45.61\%}
\newcommand{\mismatchinsemratio}{53.70\%}
% ---------------- avg_statistics_benchmark(table performance)
\newcommand{\avgbaselinesuccratio}{39.51\%}
\newcommand{\avgtransmapsuccratio}{87.96\%}
\newcommand{\avgtransmapsuccwhenbaselinef

#### Others

In [13]:
runtimeerr_filter = lambda x: "RUNTIME_ERR" in x["symptom_cls"]
semerr_filter = lambda x: "SEMERR" in x["symptom_cls"]
baseline_fail_filter = lambda x: "NO_E" in x["symptom_yes"]

hassymp_data_l = filter_from_dict_f(tab_l, has_symptom_filter)
runtimeerr_data_l = filter_from_dict_f(hassymp_data_l, runtimeerr_filter)
runtimeerr_baseline_fail_data_l = filter_from_dict_f(runtimeerr_data_l, baseline_fail_filter)
runtimeerr_baseline_fail_num_l = len(runtimeerr_baseline_fail_data_l)

hassymp_data_g = filter_from_dict_f(tab_g, has_symptom_filter)
runtimeerr_data_g = filter_from_dict_f(hassymp_data_g, runtimeerr_filter)
runtimeerr_baseline_fail_data_g = filter_from_dict_f(runtimeerr_data_g, baseline_fail_filter)
runtimeerr_baseline_fail_num_g = len(runtimeerr_baseline_fail_data_g)

hassymp_data_h = filter_from_dict_f(tab_h, has_symptom_filter)
runtimeerr_data_h = filter_from_dict_f(hassymp_data_h, runtimeerr_filter)
runtimeerr_baseline_fail_data_h = filter_from_dict_f(runtimeerr_data_h, baseline_fail_filter)
runtimeerr_baseline_fail_num_h = len(runtimeerr_baseline_fail_data_h)

runtimeerr_baseline_fail_num = runtimeerr_baseline_fail_num_l + runtimeerr_baseline_fail_num_g + runtimeerr_baseline_fail_num_h
runtimeerr_baseline_fail_inruntime_ratio = runtimeerr_baseline_fail_num / runtime
runtimeerr_baseline_fail_insem_ratio = runtimeerr_baseline_fail_num / sem_err
runtimeerr_baseline_fail_inall_ratio = runtimeerr_baseline_fail_num / all_err

In [14]:
print("""
% ---------------- Others
\\newcommand{\\runtimebaselinefail}{RUNTIME_BASELINE_FAIL_C}
\\newcommand{\\runtimebaselinefailinruntimeratio}{RUNTIME_BASELINE_FAIL_INRUMTIME_RATIO}
\\newcommand{\\runtimebaselinefailinsemratio}{RUNTIME_BASELINE_FAIL_INSEM_RATIO}
\\newcommand{\\runtimebaselinefailinallratio}{RUNTIME_BASELINE_FAIL_INALL_RATIO}
"""
.replace("RUNTIME_BASELINE_FAIL_C", str(runtimeerr_baseline_fail_num))
.replace("RUNTIME_BASELINE_FAIL_INRUMTIME_RATIO", "{:.2%}".format(runtimeerr_baseline_fail_inruntime_ratio).replace('%','\%'))
.replace("RUNTIME_BASELINE_FAIL_INSEM_RATIO", "{:.2%}".format(runtimeerr_baseline_fail_insem_ratio).replace('%','\%'))
.replace("RUNTIME_BASELINE_FAIL_INALL_RATIO", "{:.2%}".format(runtimeerr_baseline_fail_inall_ratio).replace('%','\%'))
)


% ---------------- Others
\newcommand{\runtimebaselinefail}{44}
\newcommand{\runtimebaselinefailinruntimeratio}{14.67\%}
\newcommand{\runtimebaselinefailinsemratio}{6.79\%}
\newcommand{\runtimebaselinefailinallratio}{5.77\%}

