In [3]:
# Notebook to send and run the llama3 predictions.
import csv
import io
import time

import numpy as np

from src.llm_client import parallel_generator
from src.utils.data_utility import process_squadv2_dataset

In [4]:
# Llama3 chat templates.
instruction_template = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{instruction} <|eot_id|>"
input_template = "<|start_header_id|>user<|end_header_id|>\n\n{input} <|eot_id|>"
output_template = "<|start_header_id|>assistant<|end_header_id|>\n\n{output} <|eot_id|>"

In [9]:
# Model name and server address
model_name = "/model-weights/Meta-Llama-3-8B-Instruct"
server_address = "http://172.17.8.9:51813/v1"
output_path = "/scratch/ssd004/scratch/snajafi/checkpoints/llama3-predictions"
# experiment_type = "normal_no_icl"
experiment_type = "explanation_no_icl"
# experiment_type = "normal_icl"
# experiment_type = "explanation_icl"

In [10]:
input_file = "../data/0.1-shot-datasets/squad/original_validation.tsv"
output_file = f"squadv2_predictions_original_validation.{experiment_type}.csv"

In [12]:
# read the input data.
squad_inputs, squad_ids, _, gold_outputs = process_squadv2_dataset(
    input_file, experiment_type, instruction_template, input_template, output_template
)

print(len(squad_inputs))
print(len(squad_ids))
# print(len(squad_outputs))
print(len(gold_outputs))

print(squad_inputs[0])
print(squad_ids[0])
# print(squad_outputs[0])
print(gold_outputs[0])

11873
11873
11873
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

This task is about writing a correct answer for the reading comprehension task. Based on the information provided in a given passage, you should identify the shortest continuous text span from the passage that serves as an answer to the given question. Avoid answers that are incorrect or have incomplete justification. Generate your explanations and thought process before generating the final answer. If you cannot find the answer from the passage for the given question, then generate the <no_answer> tag as the final answer. <|eot_id|><|start_header_id|>user<|end_header_id|>

Passage: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear 

In [13]:
start_time = time.perf_counter()
with io.open(f"{output_path}/{output_file}", mode="w", encoding="utf-8") as out_fp:
    writer = csv.writer(out_fp, quotechar='"', quoting=csv.QUOTE_ALL)
    headers = ["potential_answer", "prediction_score", "row_id", "gold_answer"]
    writer.writerow(headers)
    responses = parallel_generator(
        server_url=server_address,
        model_name=model_name,
        inputs=squad_inputs,
        num_threads=10,
        max_new_tokens=256,
        max_retries=3,
        seconds_between_retries=5,
        request_batch_size=16,
    )
    end_time = time.perf_counter()
    print(f"Finished prediction in {end_time - start_time} seconds!")
    for idx, response in enumerate(responses):
        to_write = [response.text, np.mean(response.logprobs.token_logprobs), squad_ids[idx], gold_outputs[idx]]
        writer.writerow(to_write)

print(f"Finished i/o in {time.perf_counter() - end_time} seconds!")

KeyboardInterrupt: 

In [None]:
experiment_types = ["normal_icl", "explanation_icl"]
for experiment_type in experiment_types:
    output_file = f"squadv2_predictions_original_validation.{experiment_type}.csv"
    # read the input data.
    squad_inputs, squad_ids, _, gold_outputs = process_squadv2_dataset(
        input_file, experiment_type, instruction_template, input_template, output_template
    )
    print(len(squad_inputs))
    print(len(squad_ids))
    print(len(gold_outputs))
    print(squad_inputs[0])
    print(squad_ids[0])
    print(gold_outputs[0])
    start_time = time.perf_counter()
    with io.open(f"{output_path}/{output_file}", mode="w", encoding="utf-8") as out_fp:
        writer = csv.writer(out_fp, quotechar='"', quoting=csv.QUOTE_ALL)
        headers = ["potential_answer", "prediction_score", "row_id", "gold_answer"]
        writer.writerow(headers)
        responses = parallel_generator(
            server_url=server_address,
            model_name=model_name,
            inputs=squad_inputs,
            num_threads=4,
            max_new_tokens=256,
            max_retries=3,
            seconds_between_retries=5,
            request_batch_size=16,
        )
        end_time = time.perf_counter()
        print(f"Finished prediction in {end_time - start_time} seconds!")
        for idx, response in enumerate(responses):
            to_write = [response.text, np.mean(response.logprobs.token_logprobs), squad_ids[idx], gold_outputs[idx]]
            writer.writerow(to_write)

    print(f"Finished i/o in {time.perf_counter() - end_time} seconds!")