In [10]:
import matplotlib.pyplot as plt
import numpy as np
import os
import re

# initialize a dict to store the data
llm_performance = {
    "GPT": {"IMDB": 0.9415, "ISEAR": 0.70345, "FEVER": 0.7998},
    "LLAMA": {"IMDB": 0.93332, "ISEAR": 0.6823, "FEVER": 0.7715}
}
llm_name = 'LLAMA'

In [11]:
dataset_name = 'ISEAR'
log_dir = f"./paper_logs/{llm_name}/{dataset_name.lower()}-llama-small/"
# get all .log files in the directory
files = [f for f in os.listdir(log_dir) if f.endswith('.log')]
files.sort()
# divide the files into different cascade sizes
cascades = {}
for f in files:
    cascade_size = f.split('_')
    if '_'.join(cascade_size[0:-1]) not in cascades:
        cascades['_'.join(cascade_size[0:-1])] = []
    cascades['_'.join(cascade_size[0:-1])].append(f)
cascades

{'LR_BERT-base': ['LR_BERT-base_0.001400.log',
  'LR_BERT-base_0.001420.log',
  'LR_BERT-base_0.001440.log',
  'LR_BERT-base_0.001460.log',
  'LR_BERT-base_0.001480.log',
  'LR_BERT-base_0.001500.log',
  'LR_BERT-base_0.001520.log',
  'LR_BERT-base_0.001540.log',
  'LR_BERT-base_0.001560.log',
  'LR_BERT-base_0.001580.log',
  'LR_BERT-base_0.001600.log',
  'LR_BERT-base_0.001620.log',
  'LR_BERT-base_0.001640.log',
  'LR_BERT-base_0.001660.log',
  'LR_BERT-base_0.001680.log',
  'LR_BERT-base_0.001700.log',
  'LR_BERT-base_0.001720.log',
  'LR_BERT-base_0.001740.log',
  'LR_BERT-base_0.001760.log',
  'LR_BERT-base_0.001780.log',
  'LR_BERT-base_0.001800.log',
  'LR_BERT-base_0.001820.log',
  'LR_BERT-base_0.001840.log',
  'LR_BERT-base_0.001860.log',
  'LR_BERT-base_0.001880.log',
  'LR_BERT-base_0.001900.log',
  'LR_BERT-base_0.001920.log',
  'LR_BERT-base_0.001940.log',
  'LR_BERT-base_0.001960.log',
  'LR_BERT-base_0.001980.log',
  'LR_BERT-base_0.002000.log',
  'LR_BERT-base_0.00202

In [5]:
def stats(cascade_logs, cascade_costs):
    cascade_size = len(cascade_logs[0].split('_')) - 1
    print("cascade size: ", cascade_size)
    costs = []
    accus = []
    linear_accus = []
    # plot the data
    for f in cascade_logs:
        num_lines = sum(1 for line in open(log_dir + f))
        # read last line of the file
        with open(log_dir + f, 'r') as file:
            last_line = file.readlines()[-2]
        # data format: '''m1_prediction, m2_prediction, ..., llm_prediction, ground_truth, m1_proportion, m2_proportion, ..., m1_defer_prob, m2_defer_prob, ..., m1_score, m2_score, ..., m1_accuracy, m2_accuracy, llm_accuracy, overall_accuracy'''
        data = re.split(',', last_line.strip())
        model_prop = []
        linear_combination_acc = 0
        for k in range(cascade_size):
            model_prop.append(float(data[k + cascade_size + 2]))
            linear_combination_acc += float(data[k + cascade_size * 4 + 2]) * float(data[k + cascade_size + 2])
            
        llm_cost = 1 - sum(model_prop)
        model_accu = []
        for k in range(cascade_size):
            model_accu.append(float(data[k + cascade_size * 4 + 2]))
        llm_accuracy = float(data[k + cascade_size * 4 + 3])
        overall_accuracy = float(data[k + cascade_size * 4 + 4])
        linear_combination_acc += llm_accuracy * llm_cost
        print('file name: ', f, 'llm cost: ', int(llm_cost * num_lines), 'overall accuracy: ', overall_accuracy, 'online ensemble accuracy: ', linear_combination_acc)

In [6]:
cascade_name = 'LR_BERT-base'
cascade_logs = cascades[cascade_name]
cascade_costs = {
    "GPT": [1, 1182],
    "LLAMA": [1, 636]
}

stats(cascade_logs, cascade_costs)

cascade size:  2
file name:  LR_BERT-base_0.001400.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001420.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001440.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001460.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001480.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001500.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001520.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001540.log llm cost:  3833 overall accuracy:  0.6892 linear combination accuracy:  0.6892
file name:  LR_BERT-base_0.001560.log llm cost:

In [16]:
# example: LR: 0.3024;0.3987;0.3439 -> 0.3987
def get_recall(model_string):
    return float(model_string.split(';')[1])

def stats_hatespeech(cascade_logs, cascade_costs):
    cascade_size = len(cascade_logs[0].split('_')) - 1
    print("cascade size: ", cascade_size)
    costs = []
    accus = []
    linear_accus = []
    # plot the data
    for f in cascade_logs:
        num_lines = sum(1 for line in open(log_dir + f))
        # read last line of the file
        with open(log_dir + f, 'r') as file:
            last_line = file.readlines()[-2]
        # data format: '''m1_prediction, m2_prediction, ..., llm_prediction, ground_truth, m1_proportion, m2_proportion, ..., m1_defer_prob, m2_defer_prob, ..., m1_score, m2_score, ..., m1_accuracy, m2_accuracy, llm_accuracy, overall_accuracy'''
        data = re.split(',', last_line.strip())
        model_prop = []
        linear_combination_acc = 0
        my_recall = 0
        for k in range(cascade_size):
            model_prop.append(float(data[k + cascade_size + 2]))
            linear_combination_acc += float(data[k + cascade_size * 4 + 2]) * float(data[k + cascade_size + 2])
            my_recall += get_recall(data[k + cascade_size * 6 + 3]) * float(data[k + cascade_size + 2])
            
        llm_cost = 1 - sum(model_prop)
        model_accu = []
        for k in range(cascade_size):
            model_accu.append(float(data[k + cascade_size * 4 + 2]))
        llm_accuracy = float(data[k + cascade_size * 4 + 3])
        overall_accuracy = float(data[k + cascade_size * 4 + 4])
        linear_combination_acc += llm_accuracy * llm_cost

        llm_cost = 1 - sum(model_prop)

        llm_recall = get_recall(data[-2])
        my_recall += llm_recall * llm_cost
        overall_recall = get_recall(data[-1])
        # print overall cost and recall
        print('file name: ', f, 'llm cost: ', int(llm_cost * num_lines), 'overall recall: ', overall_recall, 
              'online ensemble recall: ', float(my_recall), 'overall accuracy: ', overall_accuracy, 'online ensemble accuracy: ', float(linear_combination_acc))

In [17]:
dataset_name = 'hatespeech'
log_dir = f"./paper_logs/{llm_name}/{dataset_name.lower()}-llama-small/"
files = [f for f in os.listdir(log_dir) if f.endswith('.log')]
files.sort()
# divide the files into different cascade sizes
cascades = {}
for f in files:
    cascade_size = f.split('_')
    if '_'.join(cascade_size[0:-1]) not in cascades:
        cascades['_'.join(cascade_size[0:-1])] = []
    cascades['_'.join(cascade_size[0:-1])].append(f)

cascade_name = 'LR_BERT-base'
cascade_logs = cascades[cascade_name]
cascade_costs = {
    "GPT": [1, 1182],
    "LLAMA": [1, 636]
}

stats_hatespeech(cascade_logs, cascade_costs)

cascade size:  2
file name:  LR_BERT-base_0.000001.log llm cost:  5103 overall recall:  0.827 linear combination recall:  0.8272132400000001 overall accuracy:  0.7815 linear combination accuracy:  0.77940018
file name:  LR_BERT-base_0.000002.log llm cost:  5115 overall recall:  0.827 linear combination recall:  0.82845618 overall accuracy:  0.7808 linear combination accuracy:  0.7791023899999999
file name:  LR_BERT-base_0.000003.log llm cost:  5115 overall recall:  0.827 linear combination recall:  0.82834512 overall accuracy:  0.7808 linear combination accuracy:  0.77910347
file name:  LR_BERT-base_0.000004.log llm cost:  5117 overall recall:  0.827 linear combination recall:  0.82841692 overall accuracy:  0.7808 linear combination accuracy:  0.7790732899999999
file name:  LR_BERT-base_0.000005.log llm cost:  5114 overall recall:  0.827 linear combination recall:  0.8282451000000001 overall accuracy:  0.7808 linear combination accuracy:  0.7790180800000001
file name:  LR_BERT-base_0.0