In our paper, we design four RQs to evaluate Upbeat:
* RQ1: How effectively Upbeat is on detecting boundary bugs in Q# libraries?
* RQ2: How does Upbeat compare with prior methods and baselines on bug detection?
* RQ3: How do individual components of Upbeat contribute to its overall performance?
* RQ4: How effective is Upbeat in extracting constraints from Q# libraries and API documents?

Please run the following cells to view our experiment results.

### Results for RQ1

During our experiment period, Upbeat has uncovered 16 implementation bugs and 4 API document errors. To review all the bugs detected by Upbeat during this period, please refer to the following cell.

In [1]:
import re

def extract_tables_from_md(md_file):
    with open(md_file, 'r', encoding='utf-8') as file:
        md_content = file.read()

    # 使用正则表达式匹配Markdown中的表格
    table_pattern = r'\|.*\|[\s\S]*?\n(?=\n|\Z)'
    tables = re.findall(table_pattern, md_content)

    return tables

def main():
    md_file = '../data/experiment/BugList.md'  # 替换为你的Markdown文件路径
    tables = extract_tables_from_md(md_file)

    for table in tables:
        print(table)

if __name__ == "__main__":
    main()

| No.  |     API     |     Version      |                             Link                             | Status                         | Contributor                              | Description   |
| :--: | :--------------: | :----------------------------------------------------------: | ------------------------------ | ---------------------------------------- | ------------------------------------------------ | ---- |
| 1 | Binom | V0.21.2111177148 | [Bug Report1](https://github.com/microsoft/QuantumLibraries/issues/498) | First Found & Verified & Fixed | [Xing Qu](https://github.com/QuXing9) | k=0 or n=k will cause an overflow |
| 2 | HalfIntegerBinom | v0.21.2111177148 | [Bug-Report2](/docs/bug/bug-14.png) | Verified & Fixed | [Xing Qu](https://github.com/QuXing9) | k=0 will cause overflow |
| 3 | Sin | v0.25.228311 | [Bug-Report3](https://github.com/microsoft/QuantumLibraries/issues/624) | First Found & Verified | [Tianmin Hu](https://github.com/weucode) | different os has different

### Results for RQ2

Upbeat outperforms the competing baselines by providing better code coverage and identifying more potential bugs with the same test time. Execute the following two cells to observe the coverage and anomaly results.

In [3]:
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

from Fuzzing.calculate_code_coverage import calculate_coverage

color_list = ['#9EB3C2', '#AFCAD0', '#C0E0DE', '#8BC3D9', '#6EACC7', '#468FAF', '#297596', '#014F86', '#013A63']
tool_list = ['qsharpfuzz', 'quito', 'qsharpcheck', 'upbeat-m', 'muskit', 'qdiff', 'morphq', 'upbeat-r', 'upbeat']

def draw_one_line(axss, y, label, color):
    # print("check:",len(y))
    x = range(0, 25)
    x_list = np.linspace(0, 24, 50)
    y_list = make_interp_spline(x, y)(x_list)
    axss.plot(x_list, y_list, label=label, color=color)

input_folder = "../data/experiment/cov-result-origin/"
output_folder = "../data/experiment/cov-result-calculated/"
# for input_file in os.listdir(input_folder):
#     print("processing "+input_file)
#     calculate_coverage(input_folder+input_file, output_folder+input_file)
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
for tool, color in zip(tool_list, color_list):
    line_cov, branch_cov = [0.0], [0.0]
    output_file = tool+".txt"
    print("drawing "+output_file)
    with open(output_folder+output_file, "r") as f:
        lines = f.readlines()
    for line in lines:
        if len(line) == 0:
            continue
        line_cov.append(float(line.split(" ")[1]))
        branch_cov.append(float(line.split(" ")[2]))
    draw_one_line(axs[0], line_cov, tool, color)
    draw_one_line(axs[1], branch_cov, tool, color)
axs[0].legend()
axs[1].legend()
axs[0].set_xticks(np.arange(0, 25, 1))
axs[1].set_xticks(np.arange(0, 25, 1))
plt.margins(x=0)
plt.tight_layout()
plt.show()

ModuleNotFoundError: No module named 'scipy'

In [None]:
from tabulate import tabulate

regex = r"can be detected by (.*)"
lang_results, diff_results = {}, {}
lang_dir = "../data/experiment/anomalies-lang/"
for f in os.listdir(lang_dir):
    with open(lang_dir+f) as fi:
        first_line = fi.readline()
    # print("first_line:"+first_line)
    match = re.search(regex, first_line)
    tool = match.group(1)
    if tool in lang_results:
        lang_results[tool] += 1
    else:
        lang_results[tool] = 1
print(tabulate(lang_results.items(), headers=["Tool", "#Anomalies via language-level test"]))
print("\n")
abl_dir = "../data/experiment/anomalies-diff/"
for f in os.listdir(abl_dir):
    with open(abl_dir+f) as fi:
        first_line = fi.readline()
    match = re.search(regex, first_line)
    tool = match.group(1)
    if tool in diff_results:
        diff_results[tool] += 1
    else:
        diff_results[tool] = 1
print(tabulate(diff_results.items(), headers=["Tool", "#Anomalies via differential testing"]))

### Results for RQ3

The Upbeat components all positively contribute to the bugexposing capability of the framework. Run the following two cells to observe the ablation results.

In [None]:
abl_results = {}
abl_dir = "../data/experiment/ablation-study/"
for f in os.listdir(abl_dir):
    with open(abl_dir+f) as fi:
        first_line = fi.readline()
    match = re.search(regex, first_line)
    tool = match.group(1)
    if tool in abl_results:
        abl_results[tool] += 1
    else:
        abl_results[tool] = 1
print(tabulate(abl_results.items(), headers=["Tool", "#Bugs"]))

### Results for RQ4

Upbeat is capable of extracting the majority of constraints from both source code and API documents with high accuracy. Run the following two cells to observe the extraction results.

In [None]:
import json
from tabulate import tabulate


def get_rate(num1: int, num2: int):
    if num2 == 0:
        return 0.0
    else:
        return num1 / num2

def convert_to_percent(n):
    n = round(n, 2)
    # print("n:",n)
    return "%.0f%%" % (n * 100)

def calculate(d: dict):
    classical_id, classical_ex, quantum_id, quantum_ex = 0.0, 0.0, 0.0, 0.0
    classical_id_total, classical_ex_total, quantum_id_total, quantum_ex_total = 0, 0, 0, 0
    for namespace, properties in d.items():
        classical_id += get_rate(properties["classical-identified"], properties["classical-id-total"])
        classical_ex += get_rate(properties["classical-extracted"], properties["classical-ex-total"])
        quantum_id += get_rate(properties["quantum-identified"], properties["quantum-id-total"])        
        quantum_ex += get_rate(properties["quantum-extracted"], properties["quantum-ex-total"])
        if properties["classical-id-total"] != 0:
            classical_id_total += 1
        if properties["classical-ex-total"] != 0:
            classical_ex_total += 1
        if properties["quantum-id-total"] != 0:
            quantum_id_total += 1
        if properties["quantum-ex-total"] != 0:
            quantum_ex_total += 1
    # print("quantum_extracted:", quantum_ex)
    return convert_to_percent(classical_id / classical_id_total), convert_to_percent(classical_ex / classical_ex_total), \
           convert_to_percent(quantum_id / quantum_id_total), convert_to_percent(quantum_ex / quantum_ex_total)

with open("../data/experiment/constraint-extraction/source-code.json") as f1:
    code_dict = json.load(f1)
code_result = calculate(code_dict)
tab = [("Source Code", "classical", code_result[0], code_result[1]), ("", "quantum", code_result[2], code_result[3])]
with open("../data/experiment/constraint-extraction/api-document.json") as f2:
    doc_dict = json.load(f2)
doc_result = calculate(doc_dict)
tab.append(("API Document", "classical", doc_result[0], doc_result[1]))
tab.append(("", "quantum", doc_result[2], doc_result[3]))
print(tabulate(tab, headers=["Source", "Type", "Recall", "Precision"]))