In [12]:
import pandas as pd
import re
import os
comment_types = ["no_comments", "comments", "added_comments"]

# Overall Stats

In [13]:
for comment_type in comment_types:
    for val in ["all", "50", "25", "10", "05"]:
        df = pd.read_csv(f"./fold0/{comment_type}/project_stats_{val}.csv")
        # Simplify project names
        df["project"] = df.apply(lambda row: re.split(r"-\d", row["project"])[0], axis=1)
        df.rename(
            columns={
                "accuracy": "acc.",
                "pass_accuracy": "pass_acc.",
                "fail_accuracy": "fail_acc.",
                "pass_rate": "dataset_pass_%",
                "fail_rate": "dataset_fail_%",
                "accuracy_improvement": "acc_\Delta",
                "coin_accuracy": "coin_acc.",
                "out_vocab_C_ratio": "missing_C_%",
                "out_vocab_T_ratio": "missing_T_%",
                "out_vocab_combined_ratio": "missing_token_%",
            },
            inplace=True,
        )

        table1 = df[
            [
                "project",
                "N",
                "dataset_pass_%",
                "dataset_fail_%",
                "missing_C_%",
                "missing_T_%",
                "missing_token_%",
            ]
        ]

        table2 = df[
            [
                "project",
                "acc_\Delta",
                "acc.",
                "pass_acc.",
                "fail_acc.",
                "f1",
                "coin_acc.",
                "coin_f1",
                "tp",
                "fn",
                "tn",
                "fp",
            ]
        ]

        if val == "all":
            table1.to_latex(
                f"./latex/{comment_type}/toga_dataset_stats_{val}.tex",
                index=False,
                caption=f"TOGA* Dataset Statistics",
                label=f"tab:toga_stats_{val}",
            )
            table2.to_latex(
                f"./latex/{comment_type}/toga_results_{val}.tex",
                index=False,
                caption=f"SEER Results on TOGA*",
                label=f"tab:toga_results_{val}",
            )
        else:
            table2 = df[
                [
                    "project",
                    "N",
                    "acc_\Delta",
                    "acc.",
                    "pass_acc.",
                    "fail_acc.",
                    "f1",
                    "coin_acc.",
                    "coin_f1",
                    "tp",
                    "fn",
                    "tn",
                    "fp",
                ]
            ]

            # table1.to_latex(
            #     f"./latex/{comment_type}/toga_dataset_stats_{val}.tex",
            #     index=False,
            #     caption=f"TOGA* Dataset Statistics, restricted to minimum {val}\% of tokens present",
            #     label=f"tab:toga_stats_{val}",
            # )
            table2.to_latex(
                f"./latex/{comment_type}/toga_results_{val}.tex",
                index=False,
                caption=f"SEER Results on TOGA*, restricted to minimum {str(100-int(val))}\% of tokens present",
                label=f"tab:toga_results_{val}",
            )

In [14]:
# Fixing some LaTeX issues
for filename in os.listdir(f"./latex/{comment_type}"):
    with open(f"./latex/{comment_type}/{filename}", "r+") as f:
        text = f.read()
        text = re.sub(r"\\textbackslash Delta", "$\Delta$", text)
        text = re.sub("table", "table*", text)
        f.seek(0)
        f.write(text)
        f.truncate()

# similarity

In [15]:
df_common_unique = pd.read_csv(f"./similarity_analysis/similarity_unique_mut.csv")
# Simplify project names
df_common_unique["triplets"] = df_common_unique.apply(lambda row: re.split(r"-\d", row["triplets"])[0], axis=1)

df_common_unique.rename(
    columns={
        "phase2": "SEER",
        "triplets": "TOGA*",
        "triplets_unique_count": "TOGA*_count",
        "phase2_unique_count": "SEER_count",
    },
    inplace=True,
)

df_common_unique[["SEER", "TOGA*", "SEER_count", "TOGA*_count"]].to_latex(
    f"./latex/common_projects_unique.tex",
    index=False,
    caption=f"Unique Methods Under Test",
    label=f"tab:common_unique_MUT",
)