In [50]:
import csv
import io
import pathlib
import time
from typing import Any

import google.generativeai as genai
import numpy as np
from absl import app, flags

from src.utils.data_utility import process_squadv2_dataset

In [51]:
genai.configure(api_key="AIzaSyCoclLuKGVpD9SAooMNWV53aRfscVt2Ncg")

In [52]:
for m in genai.list_models():
    if "generateContent" in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-001
models/gemini-1.5-flash-latest
models/gemini-1.5-pro
models/gemini-1.5-pro-001
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [53]:
model = genai.GenerativeModel("models/gemini-1.0-pro-latest")

In [54]:
# Generation Config
generation_config = genai.types.GenerationConfig(
    candidate_count=1, stop_sequences=["<end_of_turn>"], temperature=0.001, max_output_tokens=256, top_p=0.9
)

In [56]:
# Gemma2 chat templates.
instruction_template = "<bos><start_of_turn>user\n{instruction}"
input_template = "\n{input}<end_of_turn>\n<start_of_turn>model"
output_template = "\n{output} <end_of_turn>"

input_file = "../data/0.1-shot-datasets/squad/original_validation_part1.tsv"

experiment_types = ["normal_no_icl"]
for experiment_type in experiment_types:
    output_file = "../data/0.1-shot-datasets/squad/original_validation_part1.gemini-1.0-pro.tsv"
    # read the input data.
    squad_inputs, squad_ids, _, gold_outputs = process_squadv2_dataset(
        input_file, experiment_type, instruction_template, input_template, output_template
    )
    print(len(squad_inputs))
    print(len(squad_ids))
    print(len(gold_outputs))
    print(squad_inputs[0])
    print(squad_ids[0])
    print(gold_outputs[0])
    squad_results = []
    start_time = time.perf_counter()
    with io.open(output_file, mode="w", encoding="utf-8") as out_fp:
        writer = csv.writer(out_fp, quotechar='"', quoting=csv.QUOTE_ALL)
        headers = ["potential_answer", "prediction_score", "row_id", "gold_answer"]
        writer.writerow(headers)
        for idx, squad_input in enumerate(squad_inputs[0:1]):
            try:
                response = model.generate_content(squad_input, generation_config=generation_config)
                squad_results.append(
                    {
                        "prediction_score": str(0.0),
                        "gold_answer": gold_outputs[idx],
                        "potential_answer": response.text,
                        "row_id": squad_ids[idx],
                    }
                )
                print("processed", idx)
                time.sleep(0.5)
            except Exception as e:
                try:
                    time.sleep(5)
                    response = model.generate_content(squad_input, generation_config=generation_config)
                    squad_results.append(
                        {
                            "prediction_score": str(0.0),
                            "gold_answer": gold_outputs[idx],
                            "potential_answer": response.text,
                            "row_id": squad_ids[idx],
                        }
                    )
                    print("processed", idx)

                except Exception as e:
                    print(e)
                    squad_results.append(
                        {
                            "prediction_score": str(0.0),
                            "gold_answer": gold_outputs[idx],
                            "potential_answer": "<API failed>",
                            "row_id": squad_ids[idx],
                        }
                    )
                    print(f"skipped this idx {idx}")
        end_time = time.perf_counter()
        print(f"Finished prediction in {end_time - start_time} seconds!")
        for to_write in squad_results:
            writer.writerow(to_write)
    print(f"Finished i/o in {time.perf_counter() - end_time} seconds!")

3918
3918
3918
<bos><start_of_turn>user
This task is about writing a correct answer for the reading comprehension task. Based on the information provided in a given passage, you should identify the shortest continuous text span from the passage that serves as an answer to the given question. Avoid answers that are incorrect or have incomplete justification. If you cannot find the answer from the passage for the given question, then generate the <no_answer> tag as the final answer.
Passage: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gra

In [10]:
test_passage = """The Panthers offense, which led the NFL in scoring (500 points), was loaded with talent, boasting six Pro Bowl selections. Pro Bowl quarterback Cam Newton had one of his best seasons, throwing for 3,837 yards and rushing for 636, while recording a career-high and league-leading 45 total touchdowns (35 passing, 10 rushing), a career-low 10 interceptions, and a career-best quarterback rating of 99.4. Newton's leading receivers were tight end Greg Olsen, who caught a career-high 77 passes for 1,104 yards and seven touchdowns, and wide receiver Ted Ginn, Jr., who caught 44 passes for 739 yards and 10 touchdowns; Ginn also rushed for 60 yards and returned 27 punts for 277 yards. Other key receivers included veteran Jerricho Cotchery (39 receptions for 485 yards), rookie Devin Funchess (31 receptions for 473 yards and five touchdowns), and second-year receiver Corey Brown (31 receptions for 447 yards). The Panthers backfield featured Pro Bowl running back Jonathan Stewart, who led the team with 989 rushing yards and six touchdowns in 13 games, along with Pro Bowl fullback Mike Tolbert, who rushed for 256 yards and caught 18 passes for another 154 yards. Carolina's offensive line also featured two Pro Bowl selections: center Ryan Kalil and guard Trai Turner."""
test_question = "Who started at tight end for the Panthers?"
test_gold_answer = ["Greg Olsen", "Greg Olsen", "Olsen"]

test_input = f"{icl_input}\n\nPassage: {test_passage}\nQuestion: {test_question}\nAnswer:"
response = model.generate_content(test_input, generation_config=generation_config)
text = response.text
print(text)

Greg Olsen


In [14]:
# prompt: Given the previous cells, convert the context, question from the rows in the dataset and then call the model prediction from gemini api to generate the responses. Enumerate over all data from the squad dataset. Save output to a separate list and then save to disk.

import json
import time

squad_results = []
for idx, row in enumerate(dataset):
    context = row["context"]
    question = row["question"]
    squad_input = f"{icl_input}\n\nPassage: {context}\nQuestion: {question}\nAnswer:"
    response = model.generate_content(squad_input, generation_config=generation_config)
    squad_results.append(
        {
            "context": context,
            "question": question,
            "answers": row["answers"],
            "gemini-1.0-pro-latest_answer": response.text,
            "id": row["id"],
        }
    )
    print("processed", idx)
    if idx == 100:
        break

processed 0
processed 1
processed 2
processed 3
processed 4
processed 5
processed 6
processed 7
processed 8
processed 9
processed 10
processed 11
processed 12
processed 13
processed 14
processed 15
processed 16
processed 17
processed 18
processed 19
processed 20
processed 21
processed 22
processed 23
processed 24
processed 25
processed 26
processed 27
processed 28
processed 29
processed 30
processed 31
processed 32
processed 33
processed 34
processed 35
processed 36
processed 37
processed 38
processed 39
processed 40
processed 41
processed 42
processed 43
processed 44
processed 45
processed 46
processed 47
processed 48
processed 49
processed 50
processed 51
processed 52
processed 53
processed 54
processed 55
processed 56
processed 57
processed 58
processed 59
processed 60
processed 61
processed 62
processed 63
processed 64
processed 65
processed 66
processed 67
processed 68
processed 69
processed 70
processed 71
processed 72
processed 73
processed 74
processed 75
processed 76
processed

In [15]:
with open("gemini-1.0-pro-latest_squad1.0_validation.json", "w") as f:
    json.dump(squad_results, f)

In [15]:
# Plot the histogram of context lengths and questions in the hotpotQA train and test splits.
dataset = load_dataset("hotpot_qa", "distractor")

train_split = dataset["train"]

dev_split = dataset["validation"]

train_lengths = []
for row in train_split:
    sentences = row["context"]["sentences"]
    lens = 0.0
    for para in sentences:
        lens += sum([len(sent.split()) for sent in para])
    train_lengths.append(lens)

dev_lengths = []
for row in dev_split:
    sentences = row["context"]["sentences"]
    lens = 0.0
    for para in sentences:
        lens += sum([len(sent.split()) for sent in para])
    dev_lengths.append(lens)

print(max(train_lengths))
print(max(dev_lengths))

2792.0
2587.0


In [24]:
def white_space_fix(text: str) -> str:
    """Remove extra spaces in text."""
    return " ".join(text.split())

In [33]:
instruction = """In this task, you are given a set of context paragraphs to answer a question.
Your task is to generate answer for the given question based on context paragraphs.
You should generate Yes or No, or the shortest continuous text span from the passage that serves as an answer to the given question.
Avoid answers that are incorrect or provides incomplete justification for the question.
Do not generate the explanations for your answer.
You may want to combine information from two relevant passages to answer correctly for the two-hop questions."""

instruction = white_space_fix(instruction)

In [36]:
# random examples.
import random

random.seed(42)
icl_rows = random.choices(train_split, k=5)

icl_input = f"{instruction}"
for row_idx, row_example in enumerate(icl_rows):
    sentences = row_example["context"]["sentences"]
    context = []
    for passage_idx, para in enumerate(sentences):
        context.append(f"Context_{passage_idx+1}: ")
        context.extend(para)
    icl_input = f"{icl_input}\n\n(Example_{row_idx+1})\n{' '.join(context)}\nQuestion_{row_idx+1}: {row_example['question']}\nAnswer_{row_idx+1}: {row_example['answer']}"

print(icl_input)

In this task, you are given a set of context paragraphs to answer a question. Your task is to generate answer for the given question based on context paragraphs. You should generate Yes or No, or the shortest continuous text span from the passage that serves as an answer to the given question. Avoid answers that are incorrect or provides incomplete justification for the question. Do not generate the explanations for your answer. You may want to combine information from two relevant passages to answer correctly for the two-hop questions.

(Example_1)
Context_1:  The Harvard Advocate, the art and literary magazine of Harvard College, is the oldest continuously published college art and literary magazine in the United States.  The magazine (published then in newspaper format) was founded by Charles S. Gage and William G. Peckham in 1866 and, except for a hiatus during the last years of World War II, has published continuously since then.  In 1916, "The New York Times" published a commemor

In [39]:
dev_rows = random.choices(dev_split, k=2)
test_inputs = []
test_gold_outputs = []
for row_idx, row_example in enumerate(dev_rows):
    sentences = row_example["context"]["sentences"]
    context = []
    for passage_idx, para in enumerate(sentences):
        context.append(f"Context_{passage_idx+1}: ")
        context.extend(para)
    test_input = f"{icl_input}\n\n(Example)\n{' '.join(context)}\nQuestion: {row_example['question']}\nAnswer:"
    test_inputs.append(test_input)
    test_gold_outputs.append(row_example["answer"])

for idx, test_input in enumerate(test_inputs):
    response = model.generate_content(test_input, generation_config=generation_config)
    text = response.text
    print(text)
    print(test_gold_outputs[idx])
    print("####")

Osaka International Airport
Osaka International Airport
####
delivery
delivery service company
####
