In [1]:
import pandas as pd
from pprint import pprint
import sys
import os
import numpy as np
import sys
import json
import re
import jsonlines

os.environ["CODE_REPO_PATH"]="/home/haokunliu/past-interaction-learning"
os.environ["SERVER"]='dsi'
os.environ["PORT"]='6379'
code_repo_path = os.environ.get("CODE_REPO_PATH")
sys.path.append(f'{code_repo_path}/code')
sys.path.append(f'{code_repo_path}/code/algorithm')

from utils import create_directory
from tasks import TASKS

TASK_NAMES = list(TASKS.keys())
TRAIN_SIZES = [10,25,50,100,200]
HYP_SIZES = [3,20]
ACC_PREFIX = {
    'default': 'Averaged accuracy: ',
    'knn': 'Averaged accuracy: ',
    'knn_separate_steps': 'Averaged accuracy: ',
    'filter_and_weight': 'Averaged accuracy: ',
    'few_shot': 'Accuracy: ',
    'zero_shot': 'Accuracy: ',
    'RoBERTa': 'Accuracy: ',
    'no_update': 'Test accuracy of best hypothesis:  ',
    'upperbound': 'upperbound'
}

%load_ext autoreload
%autoreload 2

Code repo path: /home/haokunliu/past-interaction-learning
Code repo path: /home/haokunliu/past-interaction-learning
Server: dsi


In [2]:
def extract_acc(text, prefix='Averaged accuracy: '):
    """Extract numbers from a string using regular expressions."""
    if prefix == 'upperbound':
        extracted_acc = re.findall(r'\(if one hyp is correct\):\s\d+\.\d+', text)
        prefix = '(if one hyp is correct): ' 
    else:
        extracted_acc = re.findall(fr'{prefix}\d+\.\d+', text)
    if len(extracted_acc)!= 1:
        return 0.0
    else:
        extracted_acc = float(extracted_acc[0][len(prefix):])
    return round(extracted_acc,3)

def get_result_from_file(filename, method='default'):
    with open(filename, 'r') as log_file:
    # Read the entire contents of the file
        log_contents = log_file.read()
    log_acc = extract_acc(log_contents, prefix=ACC_PREFIX[method])
    return log_acc

def save_model_inf_results(prefix_folder, model_name, inference_method):
    logs_folder = f'{code_repo_path}/{prefix_folder}/{inference_method}/{model_name}'
    save_folder = f'{code_repo_path}/results_final'
    create_directory(save_folder)
    results_list = []
    for task in TASK_NAMES:
        for train_size in TRAIN_SIZES:
            for hyp_size in HYP_SIZES:
                log_file = f'{logs_folder}/{task}_train_{train_size}_hyp_{hyp_size}.txt'
                if os.path.exists(log_file):
                    log_acc = get_result_from_file(log_file, inference_method)
                else:
                    log_acc = 0.0
                log_entry = {
                    'inference_method': inference_method,
                    'model_name': model_name,
                    'task': task,
                    'train_size': train_size,
                    'hyp_size': hyp_size,
                    "acc": log_acc
                }
                results_list.append(log_entry)
    output_file_name = f'{save_folder}/{inference_method}_{model_name}.jsonl'
    with jsonlines.open(output_file_name, mode='w') as writer:
        for result_entry in results_list:
            writer.write(result_entry)

def save_few_shot_results(prefix_folder, model_name):
    logs_folder = f'{code_repo_path}/{prefix_folder}/few_shot/{model_name}'
    save_folder = f'{code_repo_path}/results_final'
    create_directory(save_folder)
    results_list = []
    for task in TASK_NAMES:
        for num_shot in ['zero_shot','few_shot']:
            log_file = f'{logs_folder}/{task}_{num_shot}.txt'
            if os.path.exists(log_file):
                log_acc = get_result_from_file(log_file, num_shot)
            else:
                log_acc = 0.0
            log_entry = {
                'inference_method': num_shot,
                'model_name': model_name,
                'task': task,
                "acc": log_acc
            }
            results_list.append(log_entry)
    output_file_name = f'{save_folder}/few_shot_{model_name}.jsonl'
    with jsonlines.open(output_file_name, mode='w') as writer:
        for result_entry in results_list:
            writer.write(result_entry)

def save_no_update_results(prefix_folder, model_name):
    logs_folder = f'{code_repo_path}/{prefix_folder}/no_update/{model_name}'
    save_folder = f'{code_repo_path}/results_final'
    create_directory(save_folder)
    results_list = []
    for task in TASK_NAMES:
        for train_size in TRAIN_SIZES:
            log_file = f'{logs_folder}/{task}_train_{train_size}.txt'
            if os.path.exists(log_file):
                log_acc = get_result_from_file(log_file, 'no_update')
            else:
                log_acc = 0.0
            log_entry = {
                'inference_method': 'no_update',
                'model_name': model_name,
                'task': task,
                'train_size': train_size,
                "acc": log_acc
            }
            results_list.append(log_entry)
    output_file_name = f'{save_folder}/no_update_{model_name}.jsonl'
    with jsonlines.open(output_file_name, mode='w') as writer:
        for result_entry in results_list:
            writer.write(result_entry)



In [5]:
cross_model_results_list = []
for gen_model in ['turbo35_0613','claude_2', 'Mixtral-8x7B']:
    for inf_model in ['turbo35_0613','claude_2', 'Mixtral-8x7B']:
        for task in TASK_NAMES:
            for hyp_size in HYP_SIZES:
                for train_size in [200]:
                    if gen_model == inf_model:
                        log_file = f'{code_repo_path}/logs/default/{gen_model}/{task}_train_{train_size}_hyp_{hyp_size}.txt'
                    else:
                        log_file = f'{code_repo_path}/logs/ablation_default/{gen_model}-{inf_model}/{task}_train_{train_size}_hyp_{hyp_size}.txt'
                    if os.path.exists(log_file):
                        log_acc = get_result_from_file(log_file, 'default')
                    else:
                        log_acc = 0.0

                    log_entry = {
                        'inference_method': 'default',
                        'generation_model': gen_model,
                        'inference_model': inf_model,
                        'task': task,
                        'train_size': train_size,
                        'hyp_size': hyp_size,
                        "acc": log_acc
                    }
                    cross_model_results_list.append(log_entry)
output_file_name = f'{code_repo_path}/results_final/cross_models.jsonl'
with jsonlines.open(output_file_name, mode='w') as writer:
    for result_entry in cross_model_results_list:
        writer.write(result_entry)

In [3]:
ood_results_list = []
ood_path = f'{code_repo_path}/logs/OOD_hotel'
for model_name in ['turbo35_0613','claude_2', 'Mixtral-8x7B']:
    for inference_method in ['default', 'knn', 'knn_separate_steps', 'filter_and_weight']:
        for hyp_size in HYP_SIZES:
            for train_size in [200]:
                log_file = f'{ood_path}/{model_name}/OOD_hotel_reviews_{inference_method}_train_{train_size}_hyp_{hyp_size}.txt'
                if os.path.exists(log_file):
                    log_acc = get_result_from_file(log_file, inference_method)
                else:
                    log_acc = 0.0

                log_entry = {
                    'inference_method': inference_method,
                    'inference_model': model_name,
                    'train_size': train_size,
                    'hyp_size': hyp_size,
                    "acc": log_acc
                }
                ood_results_list.append(log_entry)

for train_size in [200,1000]:
    log_file = f'{ood_path}/RoBERTa/OOD_hotel_reviews_RoBERTa_train_{train_size}.txt'
    if os.path.exists(log_file):
        log_acc = get_result_from_file(log_file, 'RoBERTa')
    else:
        log_acc = 0.0
    log_entry = {
        'inference_method': 'RoBERTa',
        'inference_model': 'RoBERTa',
        'train_size': train_size,
        'hyp_size': None,
        "acc": log_acc
    }
    ood_results_list.append(log_entry)

for model_name in ['turbo35_0613','claude_2', 'Mixtral-8x7B']:
    for few_shot in ['few_shot', 'zero_shot']:
        log_file = f'{ood_path}/{model_name}/OOD_hotel_reviews_{few_shot}.txt'
        if os.path.exists(log_file):
            log_acc = get_result_from_file(log_file, few_shot)
        else:
            log_acc = 0.0
        log_entry = {
            'inference_method': few_shot,
            'inference_model': model_name,
            'train_size': None,
            'hyp_size': None,
            "acc": log_acc
        }
        ood_results_list.append(log_entry)
output_file_name = f'{code_repo_path}/results_final/ood_reviews.jsonl'
with jsonlines.open(output_file_name, mode='w') as writer:
    for result_entry in ood_results_list:
        writer.write(result_entry)

In [59]:
upperbound_results_list = []
upperbound_path = f'{code_repo_path}/logs/upperbound'
for model_name in ['turbo35_0613','claude_2', 'Mixtral-8x7B']:
    for task in TASK_NAMES:
        log_file = f'{upperbound_path}/{model_name}/{task}_train_200_hyp_20.txt'
        if os.path.exists(log_file):
            log_acc = get_result_from_file(log_file, 'upperbound')
        else:
            log_acc = 0.0

        log_entry = {
            'inference_method': 'upperbound',
            'inference_model': model_name,
            'task': task,
            'train_size': 200,
            'hyp_size': 20,
            "acc": log_acc
        }
        upperbound_results_list.append(log_entry)
output_file_name = f'{code_repo_path}/results_final/upperbound.jsonl'
with jsonlines.open(output_file_name, mode='w') as writer:
    for result_entry in upperbound_results_list:
        writer.write(result_entry)

In [11]:
prefix_folder = 'logs'
for model_name in ['turbo35_0613','claude_2', 'Mixtral-8x7B']:
    for inference_method in ['default', 'knn', 'knn_separate_steps', 'filter_and_weight','no_update']:
        save_model_inf_results(prefix_folder,model_name,inference_method)
    save_few_shot_results(prefix_folder,model_name)
    save_no_update_results(prefix_folder,model_name)

Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already exists.
Directory '/home/haokunliu/past-interaction-learning/results_final' already 