In [3]:
#datasets = load_dataset("squad_v2" if squad_v2 else "squad")
import pandas as pd
from tqdm.auto import tqdm
import plotly.express as px
from plotly.subplots import make_subplots
import os
import nest_asyncio
import dspy

from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch
import numpy as np
from utils import f1_score
nest_asyncio.apply()

current_dir = os.getcwd()
if current_dir.endswith("code"): #or current_dir.endswith("week_5_6"):
    os.chdir("..")
else:
    print("current dir", current_dir)
    

print(os.getcwd())

/Users/HansPeter/Dev/ku_local/nlp_course/src/nlp_course


In [57]:
ds_train = pd.read_parquet("dataset/train_df.parquet")
ds_val = pd.read_parquet("dataset/val_df.parquet")

#same datasets size for inlang and en
eng_orig = ds_train[:150]
en_idx = int(len(eng_orig)*0.8)
en_train_df = eng_orig[:en_idx]
en_val_df = eng_orig[en_idx:]
en_test_df = ds_val[:300]

inlang_orig  = ds_train[~ds_train['answer_inlang'].isna()]
idx = int(len(inlang_orig)*0.8)

inlang_train_df = inlang_orig[:idx]
inlang_val_df = inlang_orig[idx:]
inlang_test_df = ds_val[~ds_val['answer_inlang'].isna()]

inlang_train = [dspy.Example(question=question, context=context, answer=answer).with_inputs('question', 'context') for question, context, answer in zip(inlang_train_df['question'], inlang_train_df['context'], inlang_train_df['answer_inlang'])]
inlang_val = [dspy.Example(question=question, context=context, answer=answer).with_inputs('question', 'context') for question, context, answer in zip(inlang_val_df['question'], inlang_val_df['context'], inlang_val_df['answer_inlang'])]
inlang_test = [dspy.Example(question=question, context=context, answer=answer).with_inputs('question', 'context') for question, context, answer in zip(inlang_test_df['question'], inlang_test_df['context'], inlang_test_df['answer_inlang'])]

en_train = [dspy.Example(question=question, context=context, answer=answer).with_inputs('question', 'context') for question, context, answer in zip(en_train_df['question'], en_train_df['context'], en_train_df['answer'])]
en_val = [dspy.Example(question=question, context=context, answer=answer).with_inputs('question', 'context') for question, context, answer in zip(en_val_df['question'], en_val_df['context'], en_val_df['answer'])]
en_test = [dspy.Example(question=question, context=context, answer=answer).with_inputs('question', 'context') for question, context, answer in zip(en_test_df['question'], en_test_df['context'], en_test_df['answer'])]


In [60]:

gpt4o_mini = dspy.OpenAI(model="gpt-4o-mini")
dspy.settings.configure(lm=gpt4o_mini)

from collections import Counter
from utils import normalize_text

def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_text(prediction).split()
    ground_truth_tokens = normalize_text(ground_truth).split()

    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())

    if len(prediction_tokens) == len(ground_truth_tokens) == 0:
        # Unlike most tasks, QReCC and SQuAD-2.0 assign 1.0 in this edge case. We don't for uniformity.
        print(
            "\n#> F1 Metric: Rare edge case of len(prediction_tokens) == len(ground_truth_tokens) == 0.\n")

    if num_same == 0:
        return 0

    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)

    return f1

def answer_f1_score(example, pred, trace=None, frac=1.0):
    assert(type(example.answer) is str )
    
    f1 = f1_score(pred.answer, example.answer)
    return f1

metric_EM = dspy.evaluate.answer_exact_match
metric_F1 = answer_f1_score

#TODO: add F1 metric

class CoT(dspy.Module):  # let's define a new module
    def __init__(self):
        super().__init__()

        # here we declare the chain of thought sub-module, so we can later compile it (e.g., teach it a prompt)
        self.generate_answer = dspy.ChainOfThought('question, context -> answer')
    
    def forward(self, question, context):
        return self.generate_answer(question=question, context=context)
    

In [None]:
from tqdm import tqdm
from itertools import product

for (lang, train_set, val_set), (metric_name, metric_func) in tqdm(list(product(
    [('inlang', inlang_train, inlang_val), ('en', en_train, en_val)],
    [('F1', metric_F1), ('EM', metric_EM)]
)), desc="Compiling models"):
        teleprompter = BootstrapFewShotWithRandomSearch(metric=metric_func, max_bootstrapped_demos=1)
        cot_compiled = teleprompter.compile(CoT(), trainset=train_set, valset=val_set)
        cot_compiled.save(f'code/{lang}_compiled_cot_{metric_name.lower()}.json')


In [67]:


eval = True

if eval:
    for (lang, test_set), (metric_name, metric_func) in tqdm(list(product(
        [('inlang', inlang_test), ('en', en_test)],
        [('F1', metric_F1), ('EM', metric_EM)]
    )), desc="Evaluating models"):
        
        cot_compiled = CoT()
        cot_compiled.load(f'code/{lang}_compiled_cot_{metric_name.lower()}.json')
        
        evaluate_exact_match = Evaluate(devset=test_set, metric=metric_EM, num_threads=int(os.cpu_count()), display_progress=True, display_table=15)
        evaluate_f1 = Evaluate(devset=test_set, metric=metric_F1, num_threads=int(os.cpu_count()), display_progress=True, display_table=15)
        res_em = evaluate_exact_match(cot_compiled)
        res_f1 = evaluate_f1(cot_compiled)
        
        print(lang, metric_name, "exact match", res_em, "f1", res_f1)



Going to sample between 1 and 1 traces per predictor.
Will attempt to bootstrap 16 candidate sets.


Average Metric: 48 / 300  (16.0): 100%|██████████| 300/300 [00:44<00:00,  6.69it/s]


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_exact_match
0,Kuinka monta väriä on Nigerian lipussa?,The flag of Nigeria is the official flag of the Federal Republic of Nigeria. It was designed in 1959 and was officially put into use...,kaksi,"produce the answer. The context describes the flag of Nigeria, which consists of two green vertical stripes and one white vertical stripe in the middle....",3,
1,Kuka johti konfederaatiota Yhdysvaltojen sisällissodassa?,"New Orleans, however, surrendered to the Union on May 1, 1862. The economy was a problem for the Confederacy throughout its existence. Politicians did not...",Jefferson Davis,"produce the answer. The context provides information about the Confederacy, mentioning that it was formed on February 4, 1861, and that Jefferson Davis was elected...",Jefferson Davis,✔️ [True]
2,MItä tarkoittaa siirtymäalkuaine?,The transition state is the point on the reaction coordinate of a chemical reaction where the group of atoms has the highest Gibbs energy value....,elementtien siirtymäsarja,"produce the answer. The context discusses the concept of a transition state in chemical reactions, which is a temporary state that occurs during the transformation...","siirtymäalkuaineet ovat alkuaineita, joilla on osittain täytetty d-orbitaali.",
3,Miksi Liettuan sosialistinen neuvostotasavalta toimi vain vuoden?,"From 1940 to 1990, the Lithuanian Soviet Socialist Republic (, ) or Lithuanian SNT (, ) was one of the Soviet republics belonging to the...",Operaatio Barbarossa,"produce the answer. The context explains that the Lithuanian Soviet Socialist Republic was established after the Soviet Union occupied Lithuania in 1940. However, it was...",Saksan miehityksen vuoksi.,
4,Milloin Dibleyn kirkon herraa aloitettiin esittämään Suomessa?,The Vicar of Dibley () is a British sitcom created by Richard Curtis and written for its lead actress Dawn French. The series was shown...,10. marraskuuta 1994,"produce the answer. The context provides information about ""The Vicar of Dibley,"" a British sitcom that aired from 1994 to 2007. It mentions that the...",1994,
5,Milloin Itä-Rooma perustettiin?,"around the 14th century BC. The urbanization of Rome must have started in the 7th century BC. Over the centuries, Rome developed into a world...",395,produce the answer. The context provides information about the split of the Roman Empire into Western and Eastern Rome in 395 AD. It also mentions...,395年,
6,Milloin Rooman perustamistarina sai alkunsa?,"The story of the founding of Rome is a fable that describes the founding of the city of Rome. According to the legend, Rome was...",21. huhtikuuta 753 eaa,"produce the answer. The context provides a specific date for the founding of Rome, which is mentioned as April 21, 753 BC. This date is...",753年4月21日,
7,Missä on Nintendon pääkonttori?,"by October 2, 2008. In addition to its hardware, Nintendo has become known for its many game titles such as ""Mario"", ""Donkey Kong"", ""Kirby"", ""Metroid"",...",Kioto ja Tokio,"produce the answer. The context states that Nintendo was founded in Kyoto, Japan. Therefore, the answer to the question about the location of Nintendo's headquarters...","Kioto, Japani",
8,Mistä hevoset ovat tulleet Suomeen?,"they could not be scheduled. Linguists also consider it possible that already in the Bronze Age they knew how to ride, based on the Germanic...",1200-luvulla,produce the answer. The context provides two main theories regarding the origin of horses in Finland. One theory suggests that horses of northern European origin...,Lännestä viikingeiltä ja etelästä ja kaakosta mongolialaiselta alkuperältä.,
9,Miten määritellään pienoismalli?,"A miniature model is a significantly smaller copy of an object, building or other object than the original. Common are e.g. Vehicle-themed miniature models, as...","pieni kuva, jota käytetään korostamaan muinaista tai keskiaikaista valaistua käsikirjoitusta","produce the answer. We know that a miniature model is defined as a significantly smaller copy of an object, building, or other item compared to...",原物の大幅に小さなコピー,


Average Metric: 59.64999982219661 / 300  (19.9): 100%|██████████| 300/300 [00:00<00:00, 4749.52it/s] 


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_f1_score
0,Kuinka monta väriä on Nigerian lipussa?,The flag of Nigeria is the official flag of the Federal Republic of Nigeria. It was designed in 1959 and was officially put into use...,kaksi,"produce the answer. The context describes the flag of Nigeria, which consists of two green vertical stripes and one white vertical stripe in the middle....",3,
1,Kuka johti konfederaatiota Yhdysvaltojen sisällissodassa?,"New Orleans, however, surrendered to the Union on May 1, 1862. The economy was a problem for the Confederacy throughout its existence. Politicians did not...",Jefferson Davis,"produce the answer. The context provides information about the Confederacy, mentioning that it was formed on February 4, 1861, and that Jefferson Davis was elected...",Jefferson Davis,✔️ [1.0]
2,MItä tarkoittaa siirtymäalkuaine?,The transition state is the point on the reaction coordinate of a chemical reaction where the group of atoms has the highest Gibbs energy value....,elementtien siirtymäsarja,"produce the answer. The context discusses the concept of a transition state in chemical reactions, which is a temporary state that occurs during the transformation...","siirtymäalkuaineet ovat alkuaineita, joilla on osittain täytetty d-orbitaali.",
3,Miksi Liettuan sosialistinen neuvostotasavalta toimi vain vuoden?,"From 1940 to 1990, the Lithuanian Soviet Socialist Republic (, ) or Lithuanian SNT (, ) was one of the Soviet republics belonging to the...",Operaatio Barbarossa,"produce the answer. The context explains that the Lithuanian Soviet Socialist Republic was established after the Soviet Union occupied Lithuania in 1940. However, it was...",Saksan miehityksen vuoksi.,
4,Milloin Dibleyn kirkon herraa aloitettiin esittämään Suomessa?,The Vicar of Dibley () is a British sitcom created by Richard Curtis and written for its lead actress Dawn French. The series was shown...,10. marraskuuta 1994,"produce the answer. The context provides information about ""The Vicar of Dibley,"" a British sitcom that aired from 1994 to 2007. It mentions that the...",1994,✔️ [0.5]
5,Milloin Itä-Rooma perustettiin?,"around the 14th century BC. The urbanization of Rome must have started in the 7th century BC. Over the centuries, Rome developed into a world...",395,produce the answer. The context provides information about the split of the Roman Empire into Western and Eastern Rome in 395 AD. It also mentions...,395年,
6,Milloin Rooman perustamistarina sai alkunsa?,"The story of the founding of Rome is a fable that describes the founding of the city of Rome. According to the legend, Rome was...",21. huhtikuuta 753 eaa,"produce the answer. The context provides a specific date for the founding of Rome, which is mentioned as April 21, 753 BC. This date is...",753年4月21日,
7,Missä on Nintendon pääkonttori?,"by October 2, 2008. In addition to its hardware, Nintendo has become known for its many game titles such as ""Mario"", ""Donkey Kong"", ""Kirby"", ""Metroid"",...",Kioto ja Tokio,"produce the answer. The context states that Nintendo was founded in Kyoto, Japan. Therefore, the answer to the question about the location of Nintendo's headquarters...","Kioto, Japani",✔️ [0.4]
8,Mistä hevoset ovat tulleet Suomeen?,"they could not be scheduled. Linguists also consider it possible that already in the Bronze Age they knew how to ride, based on the Germanic...",1200-luvulla,produce the answer. The context provides two main theories regarding the origin of horses in Finland. One theory suggests that horses of northern European origin...,Lännestä viikingeiltä ja etelästä ja kaakosta mongolialaiselta alkuperältä.,
9,Miten määritellään pienoismalli?,"A miniature model is a significantly smaller copy of an object, building or other object than the original. Common are e.g. Vehicle-themed miniature models, as...","pieni kuva, jota käytetään korostamaan muinaista tai keskiaikaista valaistua käsikirjoitusta","produce the answer. We know that a miniature model is defined as a significantly smaller copy of an object, building, or other item compared to...",原物の大幅に小さなコピー,




inlang F1 exact match 16.0 f1 19.88
Going to sample between 1 and 1 traces per predictor.
Will attempt to bootstrap 16 candidate sets.


Average Metric: 50 / 300  (16.7): 100%|██████████| 300/300 [01:04<00:00,  4.64it/s]


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_exact_match
0,Kuinka monta väriä on Nigerian lipussa?,The flag of Nigeria is the official flag of the Federal Republic of Nigeria. It was designed in 1959 and was officially put into use...,kaksi,"determine the number of colors in the Nigerian flag. The context states that the flag consists of green and white colors, with green representing forests...",2,
1,Kuka johti konfederaatiota Yhdysvaltojen sisällissodassa?,"New Orleans, however, surrendered to the Union on May 1, 1862. The economy was a problem for the Confederacy throughout its existence. Politicians did not...",Jefferson Davis,determine who led the Confederacy during the American Civil War. The context provides information about the formation of the Confederacy and mentions that Jefferson Davis...,Jefferson Davis,✔️ [True]
2,MItä tarkoittaa siirtymäalkuaine?,The transition state is the point on the reaction coordinate of a chemical reaction where the group of atoms has the highest Gibbs energy value....,elementtien siirtymäsarja,define what a transition state means in chemistry. The context explains that a transition state is a point in a chemical reaction where the energy...,"Kemiallisessa reaktiossa korkein energiatila, joka on epävakaa ja lyhytkestoinen.",
3,Miksi Liettuan sosialistinen neuvostotasavalta toimi vain vuoden?,"From 1940 to 1990, the Lithuanian Soviet Socialist Republic (, ) or Lithuanian SNT (, ) was one of the Soviet republics belonging to the...",Operaatio Barbarossa,determine why the Lithuanian Soviet Socialist Republic only functioned for a year. The context explains that Lithuania was occupied by the Soviet Union in June...,Koska se oli miehitetty Saksan toimesta.,
4,Milloin Dibleyn kirkon herraa aloitettiin esittämään Suomessa?,The Vicar of Dibley () is a British sitcom created by Richard Curtis and written for its lead actress Dawn French. The series was shown...,10. marraskuuta 1994,"determine when ""The Vicar of Dibley"" was first aired in Finland. The context provides information about the show's original airing dates, which were from 1994...",10. marraskuuta 1994,✔️ [True]
5,Milloin Itä-Rooma perustettiin?,"around the 14th century BC. The urbanization of Rome must have started in the 7th century BC. Over the centuries, Rome developed into a world...",395,determine when Eastern Rome was established. The context mentions that Rome split into Western Rome and Eastern Rome in 395 AD. This indicates that Eastern...,395年,
6,Milloin Rooman perustamistarina sai alkunsa?,"The story of the founding of Rome is a fable that describes the founding of the city of Rome. According to the legend, Rome was...",21. huhtikuuta 753 eaa,"determine when the founding story of Rome originated. The context provides information that the legend states Rome was founded on April 21, 753 BC. It...",21. huhtikuuta 753 eaa.,✔️ [True]
7,Missä on Nintendon pääkonttori?,"by October 2, 2008. In addition to its hardware, Nintendo has become known for its many game titles such as ""Mario"", ""Donkey Kong"", ""Kirby"", ""Metroid"",...",Kioto ja Tokio,"determine the location of Nintendo's headquarters. The context mentions that Nintendo was founded in Kyoto, Japan, and it is known for its various game titles...","Kiotossa, Japan",
8,Mistä hevoset ovat tulleet Suomeen?,"they could not be scheduled. Linguists also consider it possible that already in the Bronze Age they knew how to ride, based on the Germanic...",1200-luvulla,determine the origins of horses in Finland. The context provides two main theories regarding how horses arrived in Finland. The first theory suggests that horses...,Pohjois-Euroopasta ja Mongolian alueelta.,
9,Miten määritellään pienoismalli?,"A miniature model is a significantly smaller copy of an object, building or other object than the original. Common are e.g. Vehicle-themed miniature models, as...","pieni kuva, jota käytetään korostamaan muinaista tai keskiaikaista valaistua käsikirjoitusta","define what a miniature model is. The context provides a clear definition, stating that a miniature model is a significantly smaller copy of an object,...","Pienoismalli on merkittävästi pienempi kopio alkuperäisestä esineestä, rakennuksesta tai muusta objektista.",


Average Metric: 67.40578933669676 / 300  (22.5): 100%|██████████| 300/300 [00:00<00:00, 4233.04it/s] 


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_f1_score
0,Kuinka monta väriä on Nigerian lipussa?,The flag of Nigeria is the official flag of the Federal Republic of Nigeria. It was designed in 1959 and was officially put into use...,kaksi,"determine the number of colors in the Nigerian flag. The context states that the flag consists of green and white colors, with green representing forests...",2,
1,Kuka johti konfederaatiota Yhdysvaltojen sisällissodassa?,"New Orleans, however, surrendered to the Union on May 1, 1862. The economy was a problem for the Confederacy throughout its existence. Politicians did not...",Jefferson Davis,determine who led the Confederacy during the American Civil War. The context provides information about the formation of the Confederacy and mentions that Jefferson Davis...,Jefferson Davis,✔️ [1.0]
2,MItä tarkoittaa siirtymäalkuaine?,The transition state is the point on the reaction coordinate of a chemical reaction where the group of atoms has the highest Gibbs energy value....,elementtien siirtymäsarja,define what a transition state means in chemistry. The context explains that a transition state is a point in a chemical reaction where the energy...,"Kemiallisessa reaktiossa korkein energiatila, joka on epävakaa ja lyhytkestoinen.",
3,Miksi Liettuan sosialistinen neuvostotasavalta toimi vain vuoden?,"From 1940 to 1990, the Lithuanian Soviet Socialist Republic (, ) or Lithuanian SNT (, ) was one of the Soviet republics belonging to the...",Operaatio Barbarossa,determine why the Lithuanian Soviet Socialist Republic only functioned for a year. The context explains that Lithuania was occupied by the Soviet Union in June...,Koska se oli miehitetty Saksan toimesta.,
4,Milloin Dibleyn kirkon herraa aloitettiin esittämään Suomessa?,The Vicar of Dibley () is a British sitcom created by Richard Curtis and written for its lead actress Dawn French. The series was shown...,10. marraskuuta 1994,"determine when ""The Vicar of Dibley"" was first aired in Finland. The context provides information about the show's original airing dates, which were from 1994...",10. marraskuuta 1994,✔️ [1.0]
5,Milloin Itä-Rooma perustettiin?,"around the 14th century BC. The urbanization of Rome must have started in the 7th century BC. Over the centuries, Rome developed into a world...",395,determine when Eastern Rome was established. The context mentions that Rome split into Western Rome and Eastern Rome in 395 AD. This indicates that Eastern...,395年,
6,Milloin Rooman perustamistarina sai alkunsa?,"The story of the founding of Rome is a fable that describes the founding of the city of Rome. According to the legend, Rome was...",21. huhtikuuta 753 eaa,"determine when the founding story of Rome originated. The context provides information that the legend states Rome was founded on April 21, 753 BC. It...",21. huhtikuuta 753 eaa.,✔️ [1.0]
7,Missä on Nintendon pääkonttori?,"by October 2, 2008. In addition to its hardware, Nintendo has become known for its many game titles such as ""Mario"", ""Donkey Kong"", ""Kirby"", ""Metroid"",...",Kioto ja Tokio,"determine the location of Nintendo's headquarters. The context mentions that Nintendo was founded in Kyoto, Japan, and it is known for its various game titles...","Kiotossa, Japan",
8,Mistä hevoset ovat tulleet Suomeen?,"they could not be scheduled. Linguists also consider it possible that already in the Bronze Age they knew how to ride, based on the Germanic...",1200-luvulla,determine the origins of horses in Finland. The context provides two main theories regarding how horses arrived in Finland. The first theory suggests that horses...,Pohjois-Euroopasta ja Mongolian alueelta.,
9,Miten määritellään pienoismalli?,"A miniature model is a significantly smaller copy of an object, building or other object than the original. Common are e.g. Vehicle-themed miniature models, as...","pieni kuva, jota käytetään korostamaan muinaista tai keskiaikaista valaistua käsikirjoitusta","define what a miniature model is. The context provides a clear definition, stating that a miniature model is a significantly smaller copy of an object,...","Pienoismalli on merkittävästi pienempi kopio alkuperäisestä esineestä, rakennuksesta tai muusta objektista.",✔️ [0.09523809523809525]




inlang EM exact match 16.67 f1 22.47
Going to sample between 1 and 1 traces per predictor.
Will attempt to bootstrap 16 candidate sets.


Average Metric: 136 / 300  (45.3): 100%|██████████| 300/300 [00:56<00:00,  5.34it/s]


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_exact_match
0,Missä maassa Jack Churchill syntyi?,"Churchill was born at Colombo, British Ceylon to Alec Fleming ""Alex"" Churchill (1876–1961), later of Hove, East Sussex and Elinor Elizabeth, daughter of John Alexander...","Colombo, British Ceylon","identify the country where Jack Churchill was born. We know that he was born in Colombo, which is in British Ceylon. Ceylon is the former...",Sri Lanka,
1,Mikä on yleisin uskonto maailmassa?,"The five largest religious groups by world population, estimated to account for 5.8 billion people and 84% of the population, are Christianity, Islam, Buddhism, Hinduism...",Christianity,identify the most common religion in the world. We know that the five largest religious groups account for a significant portion of the global population....,Christianity,✔️ [True]
2,Kuka oli Glee sarjan pääosassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Rachel Barbra Berry,"identify the lead character of the show ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. It describes her...",Lea Michele,
3,Milloin Killzone-sarjan peli julkaistiin ensimmäisen kerran?,Killzone is a series of first-person shooter and twin sticks shooter video games for Sony Computer Entertainment's (SCE) video game consoles. The main series and...,November 2004,"identify when the first game in the Killzone series was released. The context states that the first game, simply titled ""Killzone,"" was released on the...",November 2004,✔️ [True]
4,Milloin Pennsylvania liitty USA?,The state is one of the 13 original founding states of the United States; it came into being in 1681 as a result of a...,"December 12, 1787",determine when Pennsylvania joined the United States. We know that Pennsylvania was one of the original 13 colonies and was established in 1681. It ratified...,"December 12, 1787",✔️ [True]
5,Missä Richard Attenborough kuoli?,"In June 2012, shortly before her 90th birthday, Sheila Sim entered the professional actors' retirement home Denville Hall, for which she and Attenborough had helped...",Denville Hall,determine where Richard Attenborough passed away. The context mentions that he moved into a nursing home in London to be with his wife due to...,London,
6,Milloin ateismi on syntynyt?,"In early ancient Greek, the adjective ' (, from the privative ἀ- + ""god"") meant ""godless"". It was first used as a term of censure...",5th century BCE,"identify the origins of atheism. We see that the term 'atheism' has its roots in ancient Greek, where it was used to describe a state...",5th century BCE,✔️ [True]
7,Mistä tulee nimitys digitelevisio?,"Digital television (DTV) is the transmission of television signals, including the sound channel, using digital encoding, in contrast to the earlier television technology, analog television,...","transmission of television signals, including the sound channel, using digital encoding","identify the origin of the term ""digitelevisio."" The context explains that digital television (DTV) is a significant advancement in television technology, which contrasts with analog...","Finnish language (from ""digitaalinen"" meaning digital and ""televisio"" meaning television)",
8,Ovatko suomen kielen murteet olleet aina olemassa?,Westrobothnian () is a number of closely related non-standardized Scandinavian dialects spoken natively along the coast of the historical province of Westrobothnia in co-existence with...,no,"determine whether Finnish dialects have always existed. The context discusses the Westrobothnian dialects, which are Scandinavian and not directly related to Finnish. It does not...",no,✔️ [True]
9,Kuka näytteli pääosaa Glee sarjassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Lea Michele,"identify the lead actor in the series ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. This indicates that...",Lea Michele,✔️ [True]


Average Metric: 172.35903258159183 / 300  (57.5): 100%|██████████| 300/300 [00:00<00:00, 4517.24it/s]


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_f1_score
0,Missä maassa Jack Churchill syntyi?,"Churchill was born at Colombo, British Ceylon to Alec Fleming ""Alex"" Churchill (1876–1961), later of Hove, East Sussex and Elinor Elizabeth, daughter of John Alexander...","Colombo, British Ceylon","identify the country where Jack Churchill was born. We know that he was born in Colombo, which is in British Ceylon. Ceylon is the former...",Sri Lanka,
1,Mikä on yleisin uskonto maailmassa?,"The five largest religious groups by world population, estimated to account for 5.8 billion people and 84% of the population, are Christianity, Islam, Buddhism, Hinduism...",Christianity,identify the most common religion in the world. We know that the five largest religious groups account for a significant portion of the global population....,Christianity,✔️ [1.0]
2,Kuka oli Glee sarjan pääosassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Rachel Barbra Berry,"identify the lead character of the show ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. It describes her...",Lea Michele,
3,Milloin Killzone-sarjan peli julkaistiin ensimmäisen kerran?,Killzone is a series of first-person shooter and twin sticks shooter video games for Sony Computer Entertainment's (SCE) video game consoles. The main series and...,November 2004,"identify when the first game in the Killzone series was released. The context states that the first game, simply titled ""Killzone,"" was released on the...",November 2004,✔️ [1.0]
4,Milloin Pennsylvania liitty USA?,The state is one of the 13 original founding states of the United States; it came into being in 1681 as a result of a...,"December 12, 1787",determine when Pennsylvania joined the United States. We know that Pennsylvania was one of the original 13 colonies and was established in 1681. It ratified...,"December 12, 1787",✔️ [1.0]
5,Missä Richard Attenborough kuoli?,"In June 2012, shortly before her 90th birthday, Sheila Sim entered the professional actors' retirement home Denville Hall, for which she and Attenborough had helped...",Denville Hall,determine where Richard Attenborough passed away. The context mentions that he moved into a nursing home in London to be with his wife due to...,London,
6,Milloin ateismi on syntynyt?,"In early ancient Greek, the adjective ' (, from the privative ἀ- + ""god"") meant ""godless"". It was first used as a term of censure...",5th century BCE,"identify the origins of atheism. We see that the term 'atheism' has its roots in ancient Greek, where it was used to describe a state...",5th century BCE,✔️ [1.0]
7,Mistä tulee nimitys digitelevisio?,"Digital television (DTV) is the transmission of television signals, including the sound channel, using digital encoding, in contrast to the earlier television technology, analog television,...","transmission of television signals, including the sound channel, using digital encoding","identify the origin of the term ""digitelevisio."" The context explains that digital television (DTV) is a significant advancement in television technology, which contrasts with analog...","Finnish language (from ""digitaalinen"" meaning digital and ""televisio"" meaning television)",✔️ [0.1904761904761905]
8,Ovatko suomen kielen murteet olleet aina olemassa?,Westrobothnian () is a number of closely related non-standardized Scandinavian dialects spoken natively along the coast of the historical province of Westrobothnia in co-existence with...,no,"determine whether Finnish dialects have always existed. The context discusses the Westrobothnian dialects, which are Scandinavian and not directly related to Finnish. It does not...",no,✔️ [1.0]
9,Kuka näytteli pääosaa Glee sarjassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Lea Michele,"identify the lead actor in the series ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. This indicates that...",Lea Michele,✔️ [1.0]




en F1 exact match 45.33 f1 57.45
Going to sample between 1 and 1 traces per predictor.
Will attempt to bootstrap 16 candidate sets.


Average Metric: 136 / 300  (45.3): 100%|██████████| 300/300 [00:00<00:00, 4022.10it/s]


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_exact_match
0,Missä maassa Jack Churchill syntyi?,"Churchill was born at Colombo, British Ceylon to Alec Fleming ""Alex"" Churchill (1876–1961), later of Hove, East Sussex and Elinor Elizabeth, daughter of John Alexander...","Colombo, British Ceylon","identify the country where Jack Churchill was born. We know that he was born in Colombo, which is in British Ceylon. Ceylon is the former...",Sri Lanka,
1,Mikä on yleisin uskonto maailmassa?,"The five largest religious groups by world population, estimated to account for 5.8 billion people and 84% of the population, are Christianity, Islam, Buddhism, Hinduism...",Christianity,identify the most common religion in the world. We know that the five largest religious groups account for a significant portion of the global population....,Christianity,✔️ [True]
2,Kuka oli Glee sarjan pääosassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Rachel Barbra Berry,"identify the lead character of the show ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. It describes her...",Lea Michele,
3,Milloin Killzone-sarjan peli julkaistiin ensimmäisen kerran?,Killzone is a series of first-person shooter and twin sticks shooter video games for Sony Computer Entertainment's (SCE) video game consoles. The main series and...,November 2004,"identify when the first game in the Killzone series was released. The context states that the first game, simply titled ""Killzone,"" was released on the...",November 2004,✔️ [True]
4,Milloin Pennsylvania liitty USA?,The state is one of the 13 original founding states of the United States; it came into being in 1681 as a result of a...,"December 12, 1787",determine when Pennsylvania joined the United States. We know that Pennsylvania was one of the original 13 colonies and was established in 1681. It ratified...,"December 12, 1787",✔️ [True]
5,Missä Richard Attenborough kuoli?,"In June 2012, shortly before her 90th birthday, Sheila Sim entered the professional actors' retirement home Denville Hall, for which she and Attenborough had helped...",Denville Hall,determine where Richard Attenborough passed away. The context mentions that he moved into a nursing home in London to be with his wife due to...,London,
6,Milloin ateismi on syntynyt?,"In early ancient Greek, the adjective ' (, from the privative ἀ- + ""god"") meant ""godless"". It was first used as a term of censure...",5th century BCE,"identify the origins of atheism. We see that the term 'atheism' has its roots in ancient Greek, where it was used to describe a state...",5th century BCE,✔️ [True]
7,Mistä tulee nimitys digitelevisio?,"Digital television (DTV) is the transmission of television signals, including the sound channel, using digital encoding, in contrast to the earlier television technology, analog television,...","transmission of television signals, including the sound channel, using digital encoding","identify the origin of the term ""digitelevisio."" The context explains that digital television (DTV) is a significant advancement in television technology, which contrasts with analog...","Finnish language (from ""digitaalinen"" meaning digital and ""televisio"" meaning television)",
8,Ovatko suomen kielen murteet olleet aina olemassa?,Westrobothnian () is a number of closely related non-standardized Scandinavian dialects spoken natively along the coast of the historical province of Westrobothnia in co-existence with...,no,"determine whether Finnish dialects have always existed. The context discusses the Westrobothnian dialects, which are Scandinavian and not directly related to Finnish. It does not...",no,✔️ [True]
9,Kuka näytteli pääosaa Glee sarjassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Lea Michele,"identify the lead actor in the series ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. This indicates that...",Lea Michele,✔️ [True]


Average Metric: 172.35903258159183 / 300  (57.5): 100%|██████████| 300/300 [00:00<00:00, 2664.94it/s]


Unnamed: 0,question,context,example_answer,rationale,pred_answer,answer_f1_score
0,Missä maassa Jack Churchill syntyi?,"Churchill was born at Colombo, British Ceylon to Alec Fleming ""Alex"" Churchill (1876–1961), later of Hove, East Sussex and Elinor Elizabeth, daughter of John Alexander...","Colombo, British Ceylon","identify the country where Jack Churchill was born. We know that he was born in Colombo, which is in British Ceylon. Ceylon is the former...",Sri Lanka,
1,Mikä on yleisin uskonto maailmassa?,"The five largest religious groups by world population, estimated to account for 5.8 billion people and 84% of the population, are Christianity, Islam, Buddhism, Hinduism...",Christianity,identify the most common religion in the world. We know that the five largest religious groups account for a significant portion of the global population....,Christianity,✔️ [1.0]
2,Kuka oli Glee sarjan pääosassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Rachel Barbra Berry,"identify the lead character of the show ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. It describes her...",Lea Michele,
3,Milloin Killzone-sarjan peli julkaistiin ensimmäisen kerran?,Killzone is a series of first-person shooter and twin sticks shooter video games for Sony Computer Entertainment's (SCE) video game consoles. The main series and...,November 2004,"identify when the first game in the Killzone series was released. The context states that the first game, simply titled ""Killzone,"" was released on the...",November 2004,✔️ [1.0]
4,Milloin Pennsylvania liitty USA?,The state is one of the 13 original founding states of the United States; it came into being in 1681 as a result of a...,"December 12, 1787",determine when Pennsylvania joined the United States. We know that Pennsylvania was one of the original 13 colonies and was established in 1681. It ratified...,"December 12, 1787",✔️ [1.0]
5,Missä Richard Attenborough kuoli?,"In June 2012, shortly before her 90th birthday, Sheila Sim entered the professional actors' retirement home Denville Hall, for which she and Attenborough had helped...",Denville Hall,determine where Richard Attenborough passed away. The context mentions that he moved into a nursing home in London to be with his wife due to...,London,
6,Milloin ateismi on syntynyt?,"In early ancient Greek, the adjective ' (, from the privative ἀ- + ""god"") meant ""godless"". It was first used as a term of censure...",5th century BCE,"identify the origins of atheism. We see that the term 'atheism' has its roots in ancient Greek, where it was used to describe a state...",5th century BCE,✔️ [1.0]
7,Mistä tulee nimitys digitelevisio?,"Digital television (DTV) is the transmission of television signals, including the sound channel, using digital encoding, in contrast to the earlier television technology, analog television,...","transmission of television signals, including the sound channel, using digital encoding","identify the origin of the term ""digitelevisio."" The context explains that digital television (DTV) is a significant advancement in television technology, which contrasts with analog...","Finnish language (from ""digitaalinen"" meaning digital and ""televisio"" meaning television)",✔️ [0.1904761904761905]
8,Ovatko suomen kielen murteet olleet aina olemassa?,Westrobothnian () is a number of closely related non-standardized Scandinavian dialects spoken natively along the coast of the historical province of Westrobothnia in co-existence with...,no,"determine whether Finnish dialects have always existed. The context discusses the Westrobothnian dialects, which are Scandinavian and not directly related to Finnish. It does not...",no,✔️ [1.0]
9,Kuka näytteli pääosaa Glee sarjassa?,"Rachel Barbra Berry (Lea Michele) is the lead character and is a ""strong, driven"" member of the glee club, who is misunderstood by her peers....",Lea Michele,"identify the lead actor in the series ""Glee."" The context mentions Rachel Barbra Berry as the lead character, played by Lea Michele. This indicates that...",Lea Michele,✔️ [1.0]


Evaluating models: 100%|██████████| 4/4 [02:46<00:00, 41.70s/it]

en EM exact match 45.33 f1 57.45



