In [2]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
from constants import *
from sft import *
from utils import *
from datasets import load_dataset, concatenate_datasets, Dataset, DatasetDict

  from .autonotebook import tqdm as notebook_tqdm


# Looking at results from previous runs

In [10]:
def formatting_prompts_func(example, training=True):
    instruction = BINARY_INSTRUCTION if args.sampling=='binary' else SYSTEM_INSTRUCTION
    output_texts = []
    for i in range(len(example["error_type"])):
        text = f"{instruction}\n ### Text1: {example['doc'][i]}\n ### Text2: {example['summ'][i]}\n ### Output: "
        if training:
            text += (
                f"{LABEL_CONVERSIONS[example['error_type'][i]]} ." + tokenizer.eos_token
            )
        output_texts.append(text)
    return output_texts

def extract(json_file_path):
    with open(json_file_path, 'r') as f:
        data = json.load(f)

    # Extract predictions and labels into lists
    true_labels = []
    predicted_labels = []

    for entry in data:
        true_labels.append(entry['label'])
        predicted_labels.append(entry['prediction'])

    return true_labels, predicted_labels

In [15]:
labels, preds = extract("fine_tuning safe copy/meta-llama/Meta-Llama-3-8B-Instruct/whole_dataset/summary.json")

In [16]:
dataset = load_dataset("Lislaam/AggreFact", split=['validation[:]', 'test[:]'])
dataset = concatenate_datasets([dataset[0], dataset[1]]) # Turn into one dataset to make new split
dataset = reformat_data_split_labels(dataset, "Lislaam/AggreFact") # Get rid of non-standard error_type examples and split data

dataset = oversampling(dataset)

# Split the dataset into train and test sets (80% train, 20% test)
train_test = dataset.train_test_split(test_size=0.2)

# Further split the train set into train and validation sets (75% train, 25% validation of the original 80%)
train_valid = train_test['train'].train_test_split(test_size=0.25)

# Combine the splits into a DatasetDict
dataset = DatasetDict({
    'train': train_valid['train'],
    'validation': train_valid['test'],
    'test': train_test['test']
})

(1185, 1185)

In [None]:
dataset = dataset.map(
    lambda x: {"formatted_text": formatting_prompts_func(x, False)},
    batched=True,
)
dataloader = DataLoader(dataset['test'], batch_size=4)

# Make predictions
predictions = []
for batch in tqdm(dataloader):