In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
import os
import sys
os.environ["CODE_REPO_PATH"]="/home/haokunliu/past-interaction-learning"
os.environ["SERVER"]='dsi'
os.environ["PORT"]='6379'
code_repo_path = os.environ.get("CODE_REPO_PATH")

In [4]:
import json
def load_jsonl(file_path):
# Replace 'path_to_file.jsonl' with the actual file path
# Create an empty list to hold JSON objects
    json_objects = []

    # Open the .jsonl file and read lines
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            # Parse the JSON object and add to the list
            json_object = json.loads(line.strip())
            json_objects.append(json_object)

    # Now `json_objects` holds a list of dictionaries
    # You can process your json objects here
    return json_objects

def filter_dictionaries(dict_list, filter):
    filtered_list = [d for d in dict_list if all(d.get(key) == value for key, value in filter.items())]
    return filtered_list

def get_acc_by_config(config):
    path = config['path']
    method = config['inference_method']
    model = config['model']
    task = config['task']
    train_size = config['train_size']
    hyp_size = config['hyp_size']
    if model == 'RoBERTa':
        filename = f'{path}/RoBERTa.jsonl'
    elif method in ['zero_shot','few_shot']:
        filename = f'{path}/few_shot_{model}.jsonl'
    else:
        filename = f'{path}/{method}_{model}.jsonl'
    results = load_jsonl(filename)
    if model == 'RoBERTa':
        out = filter_dictionaries(results,{'train_size':train_size,
                                           'task':task})
    elif method in ['zero_shot','few_shot']:
        out = filter_dictionaries(results,{'task':task,
                                           'inference_method':method})
    else:
        out = filter_dictionaries(results,{'train_size':train_size,
                                           'task':task,
                                           'hyp_size':hyp_size})
    return out

def get_result_arrays(results):
    model = results[0]['inference_model']
    out = None
    if model == 'RoBERTa':
        out = np.array([entry['acc'] for entry in results])

    if model in ['claude_2','Mixtral-8x7B','turbo35_0613']:
        zero_shot = filter_dictionaries(results,{'inference_method':'zero_shot'})[0]['acc']
        few_shot = filter_dictionaries(results,{'inference_method':'few_shot'})[0]['acc']
        default = filter_dictionaries(results,{'inference_method':'default'})
        knn = filter_dictionaries(results,{'inference_method':'knn'})
        default = max([entry['acc'] for entry in default])
        knn = max([entry['acc'] for entry in knn])

        knn_separate_steps = filter_dictionaries(results,{'inference_method':'knn_separate_steps'})
        filter_and_weight = filter_dictionaries(results,{'inference_method':'filter_and_weight'})
        knn_separate_steps = max([entry['acc'] for entry in knn_separate_steps])
        filter_and_weight = max([entry['acc'] for entry in filter_and_weight])

        out = np.array([zero_shot,few_shot,default,filter_and_weight,knn,knn_separate_steps])
    
    if out is not None:
        return np.round(out*100, 3)
    raise ValueError("Invalid results")

In [5]:
# methods_to_include = ['default','knn']
methods_to_include = ['zero_shot','few_shot','default','filter_and_weight','knn','knn_separate_steps']

In [6]:
ind_file_path = f'{code_repo_path}/results_final'
roberta_ind = [get_acc_by_config({
    'model': 'RoBERTa',
    'inference_method': 'RoBERTa',
    'train_size': 200,
    'hyp_size': 3,
    'task': 'hotel_reviews',
    'path': ind_file_path
})[0]['acc'] , get_acc_by_config({
    'model': 'RoBERTa',
    'inference_method': 'RoBERTa',
    'train_size': 1000,
    'hyp_size': 3,
    'task': 'hotel_reviews',
    'path': ind_file_path
})[0]['acc']]
llms_array_ind = []
for model in ['claude_2', 'Mixtral-8x7B', 'turbo35_0613']:
    model_accs = []
    for method in methods_to_include:
        hyp_size_accs = []
        for hyp_size in [3,20]:
            acc_entry = get_acc_by_config({
                'model':model,
                'inference_method':method,
                'train_size':200,
                'hyp_size':hyp_size,
                'task':'hotel_reviews',
                'path':ind_file_path
            })
            # print(acc_entry)
            hyp_size_accs.append(acc_entry[0]['acc'])
        method_acc = max(hyp_size_accs)
        # print(method_acc)
        model_accs.append(method_acc)
    llms_array_ind.append(model_accs)
llms_array_ind = np.array(llms_array_ind)
roberta_ind = np.array(roberta_ind)
llms_array_ind = np.round(llms_array_ind*100,3)
roberta_ind = np.round(roberta_ind*100,3)
print(llms_array_ind)
print(roberta_ind)

[[31.  51.  67.3 68.  70.  67.7]
 [55.  56.3 61.3 62.  63.  61.3]
 [50.  55.  57.3 55.3 55.7 54.7]]
[84. 91.]


In [7]:
ood_file_path = f'{code_repo_path}/results_final'
ood_results_all = load_jsonl(f'{ood_file_path}/ood_reviews.jsonl')
roberta = filter_dictionaries(ood_results_all,{'inference_model':'RoBERTa'})
gpt = filter_dictionaries(ood_results_all,{'inference_model':'turbo35_0613'})
claude = filter_dictionaries(ood_results_all,{'inference_model':'claude_2'})
mixtral = filter_dictionaries(ood_results_all,{'inference_model':'Mixtral-8x7B'})

roberta = get_result_arrays(roberta)
gpt = get_result_arrays(gpt)
claude = get_result_arrays(claude)
mixtral = get_result_arrays(mixtral)

llms_array = [claude,mixtral,gpt]


In [13]:
llms = ['claude_2', 'Mixtral-8x7B', 'turbo35_0613']
inf_methods = ['default', 'knn']
methods = ['Zero shot', 'Few shot', '\\ours (default)', '\\ours (filter)', '\\ours (one-step adaptive)', '\\ours (two-step adaptive)']
METHOD_NAME = {
    'Zero shot': 'zero_shot',
    'Few shot': 'few_shot',
}
MODEL_NAME = {
    'claude_2': '\\claude',
    'Mixtral-8x7B': '\\mixtral',
    'turbo35_0613': '\\gpt'
}


table_latex_file = f'{code_repo_path}/figures/ood_table_full.txt'
with open(table_latex_file, 'w+') as f:
    f.write('''\\begin{table}[t]
\\centering
\\resizebox{\\columnwidth}{!}{%
\\begin{tabular}{@{}llcccc@{}}
    \\toprule
    Models           & Methods               \t & \\textsc{IND Deceptive Reviews} & \\textsc{OOD Deceptive Reviews} \\\\ \\midrule \\midrule
''')
    f.write(f'\tRoBERTa (Oracle)')
    for roberta_idx, train_size in enumerate([200, 1000]):
        if roberta_idx > 0:
            f.write('\t\t\t\t\t')
        f.write(f' & Train {train_size}')
        f.write('\t\t\t\t')
        result = roberta[roberta_idx]
        result_ind = roberta_ind[roberta_idx]
        f.write(f' & {result_ind}')
        if result > result_ind:
            diff = round(result-result_ind,3)
            diff = f'($\\uparrow{diff}$)'
        else:
            diff = round(result_ind-result,3)
            diff = f'($\\downarrow{diff}$)'
        f.write(f' & {result} {diff}')
        f.write(' \\\\')
        if roberta_idx == 0:
            f.write('\n')

    f.write('\\midrule \n')

    for model_idx,model in enumerate(llms):
        f.write(f'\t{MODEL_NAME[model]}\t')
        if model in ['claude_2', 'turbo35_0613']:
            f.write('\t')
        for method_idx, method in enumerate(methods):
            f.write(f'\t & {method}')
            if method in 'Zero shot':
                f.write('\t\t\t\t')
            elif method == 'Few shot':
                f.write('\t\t\t\t\t')
            elif method == '\\ours (default)':
                f.write('\t\t\t')
            elif method == '\\ours (filter)':
                f.write('\t\t\t')
            result = llms_array[model_idx][method_idx]
            result_ind = llms_array_ind[model_idx,method_idx]
            f.write(f' & {result_ind}')
            if result > result_ind:
                diff = round(result-result_ind,3)
                diff = f'($\\uparrow{diff}$)'
            else:
                diff = round(result_ind-result,3)
                diff = f'($\\downarrow{diff}$)'
            f.write(f' & {result} {diff}')

            if method != methods[-1]:
                f.write(' \\\\ \n\t\t\t\t')
            else:
                f.write(' \\\\ ')
        if model != llms[-1]:
            f.write('\\midrule \n')

    f.write('''\\bottomrule
    \\end{tabular}
    }
    \\caption{
        Performance of baselines compared to our methods on the out-of-distribution deceptive reviews.
    }
    \\label{tab:ood_results_full}\n''')
    f.write('\\end{table}')

In [16]:
llms_array

[array([27.7, 41.7, 71.7, 74.7, 68.3, 70.7]),
 array([49.7, 49. , 64.7, 61. , 54.7, 64.7]),
 array([49. , 52. , 60.7, 55.7, 51.7, 59. ])]

In [58]:
llms = ['claude_2', 'Mixtral-8x7B', 'turbo35_0613']
inf_methods = ['default', 'knn']
methods = ['Zero shot', 'Few shot', '\\ours']
METHOD_NAME = {
    'Zero shot': 'zero_shot',
    'Few shot': 'few_shot',
}
MODEL_NAME = {
    'claude_2': '\\claude',
    'Mixtral-8x7B': '\\mixtral',
    'turbo35_0613': '\\gpt'
}


table_latex_file = f'{code_repo_path}/figures/ood_table.txt'
with open(table_latex_file, 'w+') as f:
    f.write('''\\begin{table}[t]
\\centering
%\\resizebox{\\columnwidth}{!}{%
\\begin{tabular}{@{}llcccc@{}}
    \\toprule
    Models           \t & \\textsc{OOD} \\\\ \\midrule \\midrule
''')
    f.write(f'\tRoBERTa (Oracle)\t')
    for roberta_idx, train_size in enumerate([200]):
        if roberta_idx > 0:
            f.write('\t\t\t\t\t')
        # f.write(f' & Train {train_size}')
        # f.write('\t\t\t')
        result = roberta[roberta_idx]
        result_ind = roberta_ind[roberta_idx]
        # f.write(f' & {result_ind}')
        if result > result_ind:
            diff = round(result-result_ind,3)
            diff = f'(\\increase ${diff}$)'
        else:
            diff = round(result_ind-result,3)
            diff = f'(\\decrease ${diff}$)'
        f.write(f' & {result} {diff}')
        f.write(' \\\\')
        # if roberta_idx == 0:
        #     f.write('\n')

    f.write(' \\midrule \n')

    for model_idx,model in enumerate(llms):
        # f.write(f'\t{MODEL_NAME[model]}\t')
        f.write(f'\t')
        # if model in ['claude_2', 'turbo35_0613']:
        #     f.write('\t')
        for method_idx, method in enumerate(methods):


            if method in ['Zero shot']:
                continue
            f.write(f'{MODEL_NAME[model]}\\ {method}')
            if method in 'Zero shot':
                f.write('\t\t\t')
            elif method == 'Few shot':
                f.write('\t')
            elif method == '\\ours':
                f.write('\t\t')
            if model == 'turbo35_0613':
                f.write('\t') 
            if method == '\\ours':
                result = np.max(llms_array[model_idx][method_idx:])
                result_ind = np.max(llms_array_ind[model_idx,method_idx:])
            else:
                result = llms_array[model_idx][method_idx]
                result_ind = llms_array_ind[model_idx,method_idx]
            # f.write(f' & {result_ind}')
            if result > result_ind:
                diff = round(result-result_ind,3)
                diff = f'(\\increase ${diff}$)'
            else:
                diff = round(result_ind-result,3)
                diff = f'(\\decrease ${diff}$)'
            if result == 74.7:
                f.write(f' & {{\\bf {result}}} {diff}')
            else:   
                f.write(f' & {result} {diff}')

            if method != methods[-1]:
                f.write(' \\\\ \n\t')
            else:
                f.write(' \\\\ ')
        if model != llms[-1]:
            f.write('\\midrule \n')

    f.write('''\\bottomrule
    \\end{tabular}
%    }
    \\caption{
        Performance of baselines compared to our methods on the out-of-distribution deceptive reviews.
    }
    \\label{tab:ood_results}\n''')
    f.write('\\end{table}')