In [40]:
import numpy as np
import pandas as pd

In [41]:
class SpectrumData():
    def __init__(self, filename):
        self.filename = filename
        self.spectrum_df = self.read_df()
    
    def read_df(self):
        return pd.read_csv(self.filename)
    
    def get_dict(self):
        result = {}
        for sid, sid_df in self.spectrum_df.groupby('sid'):
            result[sid] = {}
            for freq, freq_df in sid_df.groupby('freq'):
                result[sid][freq] = freq_df['power'].values.tolist()
        return result

In [42]:
def classify_pair(x_human: dict, x_model: dict, eps: float = 0.0, higher: str = 'model'):
    """
    逐个sid比较两个频谱结构
    - x_human: dict[sid][freq] = list of power
    - x_model: dict[sid][freq] = list of power
    - k_freq: 取前k个频率
    - higher: 'model' or 'human'
    """
    assert x_human.keys() == x_model.keys()
    correct = 0

    for sid in x_human.keys():
        power_h = np.sum(x_human[sid][1])
        power_m = np.sum(x_model[sid][1])

        if higher == 'model':
            correct += int(power_m > power_h + eps)
        else:
            correct += int(power_h > power_m + eps)
    return correct / len(x_human)

In [43]:
datasets = ['pubmed', 'writing', 'xsum']
models = ['gpt-4', 'gpt-3.5', 'gpt-3']
labels = ['original', 'sampled']
generated_models = ['bigram', 'gpt2xl', 'mistral']

In [44]:
def classify_pair_with_path(original, sampled, higher='model'):
    spec_orig = SpectrumData(original)
    x_human = spec_orig.get_dict()
    spec_samp = SpectrumData(sampled)
    x_model = spec_samp.get_dict()
    accuracy = classify_pair(x_human=x_human, x_model=x_model, higher=higher)
    return accuracy

In [45]:
for dataset in datasets:
    print(f"{dataset}:")
    for model in models:
        print(f"    {model}:")
        for generated_model in generated_models:
            print(f"        {generated_model}:")
            original = f"../data/{dataset}/{dataset}_{model}.original.{generated_model}.nllzs.wavelet.txt"
            sampled = f"../data/{dataset}/{dataset}_{model}.sampled.{generated_model}.nllzs.wavelet.txt"
            accuracy = classify_pair_with_path(original, sampled)
            print(f"            {accuracy:.4f}")

pubmed:
    gpt-4:
        bigram:
            0.4200
        gpt2xl:
            0.4200
        mistral:
            0.4667
    gpt-3.5:
        bigram:
            0.4600
        gpt2xl:
            0.4067
        mistral:
            0.2800
    gpt-3:
        bigram:


KeyError: 1