In [180]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from tabulate import tabulate


In [181]:
def extract_info_from_path(path):
    """
    Extracts dataset name, author, context policy and model from the given path.
    """
    parts = path.split('/')
    name_parts = parts[-1].split('_')
    dataset_name = name_parts[0].replace('-',' ').capitalize()
    author = name_parts[1].capitalize() + " " + name_parts[2].capitalize()
    context_policy = name_parts[3].replace('-', ' ').capitalize()
    model = name_parts[4].replace('-',' ').replace('.csv','').capitalize()
    return dataset_name, author, model, context_policy

def compute_roc_values(auth1_df, auth2_df):
    """
    input: auth1_df, auth2_df holding response values over the same LM
    :return: ROC values where labels are 1 for auth1 and 0 for auth2, threshold is perplexity value and
    TP are all author1 responses with perplexity value above threshold
    FP are all author2 responses with perplexity value above threshold
    TN are all author2 responses with perplexity value below threshold
    FN are all author1 responses with perplexity value below threshold
    """
    labels = np.concatenate([np.zeros(len(auth1_df)), np.ones(len(auth2_df))])
    responses = np.concatenate([auth1_df['response'], auth2_df['response']])

    # Handle NaN values
    nan_mask = np.isnan(responses)
    labels = labels[~nan_mask]
    responses = responses[~nan_mask]

    fpr, tpr, _ = roc_curve(labels, responses)
    roc_auc = auc(fpr, tpr)

    return fpr, tpr, roc_auc

def calc_diff(auth1_path, auth2_path):
    """
    input: paths of author1 and author2 csv's holding responses for each sentence over the same LM
    output: Standardized Mean Difference between the authors responses (author1_log-ppx - author2_log-ppx)/pooled_std
    """
    auth1_df, auth2_df = pd.read_csv(auth1_path), pd.read_csv(auth2_path)
    auth1_mean, auth2_mean = auth1_df["response"].mean(), auth2_df["response"].mean()
    auth1_std, auth2_std = auth1_df["response"].std(), auth2_df["response"].std()
    len_auth1, len_auth2 = len(auth1_df), len(auth2_df)
    pooled_std = np.sqrt(((len_auth1-1) * auth1_std**2 + (len_auth2-1) * auth2_std**2)/ (len_auth1 + len_auth2 -2))
    diff = abs((auth1_mean - auth2_mean)/pooled_std)
    return diff

def lm_aggregate_metrics(lm_paths):
    """
    Compute AUC and standardized mean difference (calc_diff) for two use-cases:
    1. LM separator is one of the authors.
    2. LM separator is not one of the authors.
    
    :param paths: List paths containing the LM repsonses values over all authors
    :return: Results for both groups for each LM.
    """

    _, _, lm_name, _ = extract_info_from_path(lm_paths[0])
    group1_aucs, group1_diffs = [], [] # LM in authors
    group2_aucs, group2_diffs = [], [] # LM not in authors

    for i, auth1_path in enumerate(lm_paths):       # Iterate over authors
        for j, auth2_path in enumerate(lm_paths):
            if i >= j:                              # Avoid self-comparisons or duplicate pairs
                continue

            _, author1, _, _ = extract_info_from_path(auth1_path)
            _, author2, _, _ = extract_info_from_path(auth2_path)

            auth1_df = pd.read_csv(auth1_path)
            auth2_df = pd.read_csv(auth2_path)
            _, _, roc_auc = compute_roc_values(auth1_df, auth2_df)
            diff = calc_diff(auth1_path, auth2_path)

            # print(f"LM Name: {lm_name}, Author1: {author1}, Author2: {author2}")
            # print(auth1_df["response"].isna().sum(), auth2_df["response"].isna().sum())
            # plt.hist(auth1_df["response"], bins=50, alpha=0.5, label="Author1")
            # plt.hist(auth2_df["response"], bins=50, alpha=0.5, label="Author2")
            # plt.legend()
            # plt.show()

            # Check if the LM matches any part of the author names
            if any(keyword in author.lower() for keyword in lm_name.lower().split() for author in [author1, author2]):
                group1_aucs.append(roc_auc)
                group1_diffs.append(diff)
            else:
                group2_aucs.append(roc_auc)
                group2_diffs.append(diff)

    return group1_aucs, group1_diffs, group2_aucs, group2_diffs

def lm_comparison_table(group1_aucs, group1_diffs, group2_aucs, group2_diffs, lm_name):
    """ table preparation for per LM separator """
    avg_auc_in_authors = np.mean(group1_aucs) if group1_aucs else 0
    avg_diff_in_authors = np.mean(group1_diffs) if group1_diffs else 0
    avg_auc_not_in_authors = np.mean(group2_aucs) if group2_aucs else 0
    avg_diff_not_in_authors = np.mean(group2_diffs) if group2_diffs else 0
    return [lm_name, avg_auc_in_authors, avg_diff_in_authors, avg_auc_not_in_authors, avg_diff_not_in_authors]

def all_lms_comparison_table(group1_aucs, group1_diffs, group2_aucs, group2_diffs):
    """ table preparation for aggregate over all LMs """
    avg_auc_in_authors = np.mean(group1_aucs) if group1_aucs else 0
    avg_diff_in_authors = np.mean(group1_diffs) if group1_diffs else 0
    avg_auc_not_in_authors = np.mean(group2_aucs) if group2_aucs else 0
    avg_diff_not_in_authors = np.mean(group2_diffs) if group2_diffs else 0
    return ["All LMs", avg_auc_in_authors, avg_diff_in_authors, avg_auc_not_in_authors, avg_diff_not_in_authors]

def compare_hist(auth1_path1, auth2_path1, auth1_path2, auth2_path2):
    """
    input: paths of author1 and author2 csv holding responses for each sentence
    output: histograms of author1 and author2 log-ppx values
    """
    auth1_df1 = pd.read_csv(auth1_path1)
    auth2_df1 = pd.read_csv(auth2_path1)
    auth1_df2 = pd.read_csv(auth1_path2)
    auth2_df2 = pd.read_csv(auth2_path2)

    dataset_name, author1, model1, _ = extract_info_from_path(auth1_path1)
    _, _, model2, _ = extract_info_from_path(auth1_path2)
    _, author2, _, _ = extract_info_from_path(auth2_path2)


    # Compute ROC values for each dataset
    fpr1, tpr1, roc_auc1 = compute_roc_values(auth1_df1, auth2_df1)
    fpr2, tpr2, roc_auc2 = compute_roc_values(auth1_df2, auth2_df2)

    fig, axs = plt.subplots(2, 2, figsize=(12, 10))
    bins = np.arange(min(auth1_df1["response"].min(), auth2_df1["response"].min(),auth1_df2["response"].min(), auth2_df2["response"].min()),
                max(auth1_df1["response"].max(), auth2_df1["response"].max(), auth1_df2["response"].max(), auth2_df2["response"].max()),
                     0.1)
    axs[0, 0].hist(auth1_df1["response"], bins=bins, alpha=0.5, label=author1)
    axs[0, 0].hist(auth2_df1["response"], bins=bins, alpha=0.5, label=author2)
    axs[0, 0].set_title(f"LM response generator - {model1}")
    axs[0, 0].set_xlabel('Log-perplexity')
    axs[0, 0].set_ylabel('Frequency')
    axs[0, 0].legend()

    axs[0, 1].hist(auth1_df2["response"], bins=bins, alpha=0.5, label=author1)
    axs[0, 1].hist(auth2_df2["response"], bins=bins, alpha=0.5, label=author2)
    axs[0, 1].set_title(f"LM response generator - {model2}")
    axs[0, 1].set_xlabel('Log-perplexity')
    axs[0, 1].set_ylabel('Frequency')
    axs[0, 1].legend()

    # Plot the ROC curves using the computed values
    axs[1, 0].plot(fpr1, tpr1, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc1:.4f})')
    axs[1, 0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    axs[1, 0].set_xlabel('False Positive Rate')
    axs[1, 0].set_ylabel('True Positive Rate')
    axs[1, 0].legend(loc='lower right')
    axs[1, 0].grid(True, which='both', linestyle='--', linewidth=0.5)

    axs[1, 1].plot(fpr2, tpr2, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc2:.4f})')
    axs[1, 1].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    axs[1, 1].set_xlabel('False Positive Rate')
    axs[1, 1].set_ylabel('True Positive Rate')
    axs[1, 1].legend(loc='lower right')
    axs[1, 1].grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.suptitle(f"Dataset - {dataset_name}", fontsize=16)
    plt.tight_layout()
    plt.show()


# def calc_mean_ppx_instance(author1_responses, author2_responses):
#     """
#     Each instance in the dataset has a unique "name" value with a different number of sentences for author responses
#     This method calculates the mean perplexity for each instance
#     :param author1_responses: df with responses
#     :param author2_responses: df with responses
#     :return: dataframes containing the mean perplexity for each instance
#     """
#     author1_grouped_mean = author1_responses.groupby('name')['response'].mean().reset_index()
#     author1_sorted_df = author1_grouped_mean.sort_values(by='name', ascending=True)
#     author2_grouped_mean = author2_responses.groupby('name')['response'].mean().reset_index()
#     author2_sorted_df = author2_grouped_mean.sort_values(by='name', ascending=True)
#     return author1_sorted_df, author2_sorted_df

# def calc_diff_ppx_instance(author1_responses, author2_responses):
#     """
#     input: author1/author2 mean responses sorted my 'name' (samples)
#     returns df holding name and the difference between author1 to author2 responses
#     Note: the larger the difference the 'better' in seperation
#     """
#     diff_df = author1_responses.copy()
#     diff_df["response"] = author1_responses["response"] - author2_responses["response"]
#     return diff_df

# def compare_lm_results(auth1_path_base, auth2_path_base, auth1_path, auth2_path):
#     """
#     Compares responses from a baseline LM to responses from an additional LM.

#     input: 
#         - auth1_path_base, auth2_path_base: Paths to baseline LM responses for Author1 and Author2.
#         - auth1_path, auth2_path: Paths to comparison LM responses for Author1 and Author2.
#     output: 
#         - List of response results containing: 
#             [Author1 mean response, Author2 mean response, Auth1-Auth2 log-ppx difference, 
#     """
#     auth1_df, auth2_df = pd.read_csv(auth1_path), pd.read_csv(auth2_path)
#     auth1_mean, auth2_mean = auth1_df["response"].mean(), auth2_df["response"].mean()
#     diff = calc_diff(auth1_path, auth2_path)
#     base_diff = calc_diff(auth1_path_base, auth2_path_base)
#     diff_from_base = round((diff - base_diff),4)
#     if diff_from_base > 0:
#         diff_from_base = f"+ {diff_from_base} ↑"
#     elif diff_from_base < 0:
#         diff_from_base =  f"- {-diff_from_base} ↓"
#     else:
#         diff_from_base = "0"

#     _, _, roc_auc = compute_roc_values(auth1_df, auth2_df)

#     return [auth1_mean, auth2_mean, diff, diff_from_base, roc_auc]

In [182]:
# Paths to csv's holding response values and sentence lengths
wiki_paths = [["Responses/wiki_Llama3.1_clean_none_Meta-Llama-3.1-8B-Instruct.csv", "Responses/wiki_Falcon_clean_none_Meta-Llama-3.1-8B-Instruct.csv", "Responses/wiki_human_text_none_Meta-Llama-3.1-8B-Instruct.csv"],
              ["Responses/wiki_Llama3.1_clean_none_falcon-7b.csv","Responses/wiki_Falcon_clean_none_falcon-7b.csv","Responses/wiki_human_text_none_falcon-7b.csv"],
              ["Responses/wiki_Llama3.1_clean_none_phi-2.csv","Responses/wiki_Falcon_clean_none_phi-2.csv","Responses/wiki_human_text_none_phi-2.csv"]]

news_paths = [["Responses/news_Llama3.1_clean_none_Meta-Llama-3.1-8B-Instruct.csv", "Responses/news_Falcon_clean_none_Meta-Llama-3.1-8B-Instruct.csv", "Responses/news_human_text_none_Meta-Llama-3.1-8B-Instruct.csv"],
              ["Responses/news_Llama3.1_clean_none_falcon-7b.csv","Responses/news_Falcon_clean_none_falcon-7b.csv","Responses/news_human_text_none_falcon-7b.csv"],
              ["Responses/news_Llama3.1_clean_none_phi-2.csv","Responses/news_Falcon_clean_none_phi-2.csv","Responses/news_human_text_none_phi-2.csv"]]

abstracts_paths = [["Responses/abstracts_Llama3.1_clean_none_Meta-Llama-3.1-8B-Instruct.csv", "Responses/abstracts_Falcon_clean_none_Meta-Llama-3.1-8B-Instruct.csv", "Responses/abstracts_human_text_none_Meta-Llama-3.1-8B-Instruct.csv"],
              ["Responses/abstracts_Llama3.1_clean_none_falcon-7b.csv","Responses/abstracts_Falcon_clean_none_falcon-7b.csv","Responses/abstracts_human_text_none_falcon-7b.csv"],
              ["Responses/abstracts_Llama3.1_clean_none_phi-2.csv","Responses/abstracts_Falcon_clean_none_phi-2.csv","Responses/abstracts_human_text_none_phi-2.csv"]]

In [184]:
print("=== Wiki Dataset ===")
group1_aucs, group1_diffs, group2_aucs, group2_diffs = [],[],[],[]
for i, lm_paths in enumerate(wiki_paths):
    lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs = lm_aggregate_metrics(lm_paths)
    group1_aucs.extend(lm_is_author_aucs)
    group1_diffs.extend(lm_is_author_diffs)
    group2_aucs.extend(lm_not_author_aucs)
    group2_diffs.extend(lm_not_author_diffs)
    _, _, lm_name, _ = extract_info_from_path(lm_paths[0])

    print(f"\nResults for {lm_name} separator:")
    print(f"LM-In-Authors:")
    print(f"    AUCs: {lm_is_author_aucs}")
    print(f"    Diffs: {lm_is_author_diffs}")
    print(f"LM-Not-In-Authors:")
    print(f"    AUCs: {lm_not_author_aucs}")
    print(f"    Diffs: {lm_not_author_diffs}")

=== Wiki Dataset ===

Results for Meta llama 3.1 8b instruct separator:
LM-In-Authors:
    AUCs: [0.7471274732960242, 0.8642764178741291]
    Diffs: [0.9553828959238608, 1.3778681585758543]
LM-Not-In-Authors:
    AUCs: [0.5448500220720822]
    Diffs: [0.29174480358125343]

Results for Falcon 7b separator:
LM-In-Authors:
    AUCs: [0.5407859603180524, 0.6216545463316389]
    Diffs: [0.4646867388783228, 0.19062677338740236]
LM-Not-In-Authors:
    AUCs: [0.7715598124812962]
    Diffs: [1.0437651074235406]

Results for Phi 2 separator:
LM-In-Authors:
    AUCs: []
    Diffs: []
LM-Not-In-Authors:
    AUCs: [0.6836347739656425, 0.8873129819026562, 0.6558719387187516]
    Diffs: [0.7384709643308915, 1.6166164499858928, 0.2265489091456213]


In [176]:
# Results for Wiki dataset
print("=== Wiki Dataset ===")
lm_comparison_results = []
group1_aucs_all, group1_diffs_all, group2_aucs_all, group2_diffs_all = [], [], [], []

for i, lm_paths in enumerate(wiki_paths):
    lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs = lm_aggregate_metrics(lm_paths)
    group1_aucs_all.extend(lm_is_author_aucs)
    group1_diffs_all.extend(lm_is_author_diffs)
    group2_aucs_all.extend(lm_not_author_aucs)
    group2_diffs_all.extend(lm_not_author_diffs)
    _, _, lm_name, _ = extract_info_from_path(lm_paths[0])

    lm_comparison_results.append(lm_comparison_table(lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs, lm_name))

# first table (per LM separator)
columns = ["LM Separator", "Avg AUC (LM in Authors)", "Avg Diff (LM in Authors)", "Avg AUC (LM not in Authors)", "Avg Diff (LM not in Authors)"]
print("\nPer-LM Comparison Table")
print(tabulate(lm_comparison_results, headers=columns, tablefmt="psql"))

# second table (aggregate over all LMs)
all_lms_table = all_lms_comparison_table(group1_aucs_all, group1_diffs_all, group2_aucs_all, group2_diffs_all)
print("\nAll LMs Aggregate Table")
print(tabulate([all_lms_table], headers=columns, tablefmt="psql"))


=== Wiki Dataset ===

Per-LM Comparison Table
+----------------------------+---------------------------+----------------------------+-------------------------------+--------------------------------+
| LM Separator               |   Avg AUC (LM in Authors) |   Avg Diff (LM in Authors) |   Avg AUC (LM not in Authors) |   Avg Diff (LM not in Authors) |
|----------------------------+---------------------------+----------------------------+-------------------------------+--------------------------------|
| Meta llama 3.1 8b instruct |                  0.805702 |                   1.16663  |                      0.54485  |                       0.291745 |
| Falcon 7b                  |                  0.58122  |                   0.327657 |                      0.77156  |                       1.04377  |
| Phi 2                      |                  0        |                   0        |                      0.742273 |                       0.860545 |
+----------------------------+------

In [177]:
# Results for news dataset
print("=== News Dataset ===")
lm_comparison_results = []
group1_aucs_all, group1_diffs_all, group2_aucs_all, group2_diffs_all = [], [], [], []

for i, lm_paths in enumerate(news_paths):
    lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs = lm_aggregate_metrics(lm_paths)
    group1_aucs_all.extend(lm_is_author_aucs)
    group1_diffs_all.extend(lm_is_author_diffs)
    group2_aucs_all.extend(lm_not_author_aucs)
    group2_diffs_all.extend(lm_not_author_diffs)
    _, _, lm_name, _ = extract_info_from_path(lm_paths[0])

    lm_comparison_results.append(lm_comparison_table(lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs, lm_name))

# first table (per LM separator)
columns = ["LM Separator", "Avg AUC (LM in Authors)", "Avg Diff (LM in Authors)", "Avg AUC (LM not in Authors)", "Avg Diff (LM not in Authors)"]
print("\nPer-LM Comparison Table")
print(tabulate(lm_comparison_results, headers=columns, tablefmt="psql"))

# second table (aggregate over all LMs)
all_lms_table = all_lms_comparison_table(group1_aucs_all, group1_diffs_all, group2_aucs_all, group2_diffs_all)
print("\nAll LMs Aggregate Table")
print(tabulate([all_lms_table], headers=columns, tablefmt="psql"))

=== News Dataset ===

Per-LM Comparison Table
+----------------------------+---------------------------+----------------------------+-------------------------------+--------------------------------+
| LM Separator               |   Avg AUC (LM in Authors) |   Avg Diff (LM in Authors) |   Avg AUC (LM not in Authors) |   Avg Diff (LM not in Authors) |
|----------------------------+---------------------------+----------------------------+-------------------------------+--------------------------------|
| Meta llama 3.1 8b instruct |                  0.801406 |                   0.891757 |                      0.593834 |                      0.0276665 |
| Falcon 7b                  |                  0.625791 |                   0.452911 |                      0.809437 |                      1.01977   |
| Phi 2                      |                  0        |                   0        |                      0.744389 |                      0.676006  |
+----------------------------+------

In [179]:
# Results for news dataset
print("=== Abstracts Dataset ===")
lm_comparison_results = []
group1_aucs_all, group1_diffs_all, group2_aucs_all, group2_diffs_all = [], [], [], []

for i, lm_paths in enumerate(abstracts_paths):
    lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs = lm_aggregate_metrics(lm_paths)
    group1_aucs_all.extend(lm_is_author_aucs)
    group1_diffs_all.extend(lm_is_author_diffs)
    group2_aucs_all.extend(lm_not_author_aucs)
    group2_diffs_all.extend(lm_not_author_diffs)
    _, _, lm_name, _ = extract_info_from_path(lm_paths[0])

    lm_comparison_results.append(lm_comparison_table(lm_is_author_aucs, lm_is_author_diffs, lm_not_author_aucs, lm_not_author_diffs, lm_name))

# first table (per LM separator)
columns = ["LM Separator", "Avg AUC (LM in Authors)", "Avg Diff (LM in Authors)", "Avg AUC (LM not in Authors)", "Avg Diff (LM not in Authors)"]
print("\nPer-LM Comparison Table")
print(tabulate(lm_comparison_results, headers=columns, tablefmt="psql"))

# second table (aggregate over all LMs)
all_lms_table = all_lms_comparison_table(group1_aucs_all, group1_diffs_all, group2_aucs_all, group2_diffs_all)
print("\nAll LMs Aggregate Table")
print(tabulate([all_lms_table], headers=columns, tablefmt="psql"))

=== Abstracts Dataset ===

Per-LM Comparison Table
+----------------------------+---------------------------+----------------------------+-------------------------------+--------------------------------+
| LM Separator               |   Avg AUC (LM in Authors) |   Avg Diff (LM in Authors) |   Avg AUC (LM not in Authors) |   Avg Diff (LM not in Authors) |
|----------------------------+---------------------------+----------------------------+-------------------------------+--------------------------------|
| Meta llama 3.1 8b instruct |                  0.711765 |                   0.724436 |                      0.650887 |                       0.230043 |
| Falcon 7b                  |                  0.599284 |                   0.389243 |                      0.771363 |                       0.91704  |
| Phi 2                      |                  0        |                   0        |                      0.718649 |                       0.779301 |
+----------------------------+-