In [10]:
import pandas as pd

In [11]:
def tpr(TP, FN):
    if TP + FN == 0:
        return 1
    return TP / (TP + FN)


def fpr(FP, TN):
    if FP + TN == 0:
        return 0
    return FP / (FP + TN)

In [12]:
from typing import List


def getAUC(x: List[float], y:List[float]) -> float:
    """
    Calculates the area under the curve (AUC) using the trapezoidal rule.
    """
    if len(x) != len(y):
        raise ValueError("The number of x and y values must be equal")

    n = len(x)
    area = 0.0

    for i in range(1, n):
        # Trapezoidal rule: area of trapezoid = (1/2) * (sum of parallel sides) * height
        area += 0.5 * (x[i] - x[i-1]) * (y[i] + y[i-1])

    return area // 0.001 / 1000

In [13]:
def getFPRAndTPR(dataset, model_name, criteria):
    eval_res_df = (
        pd.read_csv(
            f"./results_llm/{dataset}/{model_name}/criteria{criteria}.csv",
            header=0,
        )
        .sort_values("threshold")
        .reset_index(drop=True)
    )

    if criteria == 2:
        eval_res_df["FN"] = eval_res_df["FN"] + eval_res_df["TP_LATE"]

    eval_res_df["TPR"] = eval_res_df.apply(
        lambda row: tpr(row["TP"], row["FN"]), axis=1
    )
    eval_res_df["FPR"] = eval_res_df.apply(
        lambda row: fpr(row["FP"], row["TN"]), axis=1
    )
    
    eval_res_df = eval_res_df.sort_values("FPR").reset_index(drop=True)

    fin_tpr = eval_res_df["TPR"].tolist()
    fin_tpr.insert(0, 0)
    fin_tpr.append(1)
    
    fin_fpr = eval_res_df["FPR"].tolist()
    fin_fpr.insert(0, 0)
    fin_fpr.append(1)

    return fin_fpr, fin_tpr

In [14]:
def getAUCForDataset(dataset: str, models = ["LLM-Falcon7B", "LLM-Cyrax7B", "LLM-EmertonMonarch7B", "LLM-BartLarge",  "LLM-GPT3"]):
        # print(f"{model_name}\t", end="")
    for criteria in [1, 2]:
        # print(f"Criteria {criteria}")
        for model_name in models:
            # print(f"{model_name}")
            fin_fpr, fin_tpr = getFPRAndTPR(dataset, model_name, criteria)
            print(f"{getAUC(fin_fpr, fin_tpr)}")
        # print()


In [15]:
getAUCForDataset("hdd")

0.517
0.506
0.518
0.513
0.999
0.517
0.506
0.518
0.513
0.999


In [16]:
getAUCForDataset("buffer-io")

0.505
0.501
0.502
0.501
0.999
0.487
0.501
0.502
0.501
0.999


In [17]:
getAUCForDataset("oom")

0.499
0.5
0.499
0.499
0.87
0.499
0.5
0.499
0.499
0.87


In [18]:
getAUCForDataset("cpu")

0.534
0.515
0.515
0.524
0.999
0.534
0.515
0.515
0.524
0.999
