In [3]:
import os
import random
import numpy as np
import torch

# Please make sure you are using CUDA enabled GPU for this project
device = 'cuda'

# Setting the seed value ensures that the results are reproducible across different runs
seed_val = 10

# Ensuring that the seed is set for Python's hashing, random operations, NumPy, and PyTorch
os.environ['PYTHONHASHSEED'] = str(seed_val)
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)

<torch._C.Generator at 0x1f2dbf654d0>

In [13]:
# LLM Parameter Tuning for NLI
params = {    
    # Model related arguments
    'model_version': 'opt-125m',
    'experiment_id': 'run_1',
}

In [14]:
import wandb

# We are using wandb to track our experiments
wandb.init(project='nlg_uncertainty', id=params['experiment_id'], config=params, resume='allow')

run_version = wandb.run.name

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

In [7]:
from transformers import AutoTokenizer

model = params['model_version']

# Tokenizer for the given model
# Since we will be using the same tokenizer for other notebooks, we will save it in the cache directory
generation_tokenizer = AutoTokenizer.from_pretrained(f"facebook/{model}", use_fast=False, cache_dir='./hf_cache_dir')
tokenizer = AutoTokenizer.from_pretrained(f"facebook/{model}", use_fast=False, cache_dir='./hf_cache_dir')

In [15]:
import pickle

# Please generate the responses using the generation notebook first
# Load the responses for the given run and model version
with open(f'./sequences/{run_version}/{model}_generations.pkl', 'rb') as infile:
    responses = pickle.load(infile)

In [16]:
from tqdm import tqdm

cleaned_responses_list = []

# Iterating through the responses for the given model
for sample in tqdm(responses):
    
    # Initializing a tensor with the same shape as 'generations' filled with ones
    cleaned_generations_tensor = torch.ones_like(sample['generations'])
    question_text = sample['question']
    generated_texts_list = sample['generated_texts']
    cleaned_generated_texts_list = []

    max_len_of_generations = cleaned_generations_tensor.shape[-1]

    # Strings to filter out from the generated texts
    strings_to_filter_on = [
        '.', '\n', 'Q:', 'A:', 'question:', 'answer:', 'Question:', 'Answer:', 'Questions:', 'questions:', 'QUESTION:',
        'ANSWER:'
    ]
    
    # Iterating through the generated texts
    for i, generated_text in enumerate(generated_texts_list):
        # Filtering out unwanted strings
        for string in strings_to_filter_on:
            if string in generated_text:
                generated_text = generated_text.split(string)[0]
        cleaned_generated_texts_list.append(generated_text)

        # Concatenating the prompt with the cleaned generated text
        clean_ids = torch.cat(
            [sample['prompt'].to(device),
             torch.tensor(tokenizer(generated_text)['input_ids'][1:], device=device)]
        )
        cleaned_generations_tensor[i, :min(len(clean_ids), max_len_of_generations)] = clean_ids[:max_len_of_generations]


    # Updating the sample with cleaned texts and generations
    sample['cleaned_generated_texts'] = cleaned_generated_texts_list
    sample['cleaned_generations'] = cleaned_generations_tensor

    # Appending the cleaned sample to the list
    cleaned_responses_list.append(sample)

100%|██████████| 7184/7184 [00:12<00:00, 561.37it/s]


In [12]:
with open(f'./sequences/{run_version}/{model}_cleaned_generations.pkl', 'wb') as outfile:
    pickle.dump(cleaned_responses_list, outfile)