In [1]:
import evaluate
from datasets import *
import numpy as np
import pathlib as pl
import pandas as pd
import torch
import ast
import os
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import DensePassageRetriever, TransformersReader, Seq2SeqGenerator
from haystack.nodes.answer_generator.transformers import _BartEli5Converter
from haystack.pipelines import Pipeline

YEAR = 2042
DPR_FINE_TUNE = False

MODEL_PATH_B = "../../data/models/BERT"
MODEL_PATH_T = "../../data/models/T5"

In [2]:
generated_data = load_dataset('csv', data_files=f"../../data/clean/sustainability-report-{YEAR}-squad-format.csv",
                    delimiter=";", split='train').train_test_split(test_size=0.3, shuffle=True, seed=42)

generated_data["test"] = generated_data["test"].map(
    lambda example: ast.literal_eval(example["answers"]))
generated_data["test"] = generated_data["test"].map(lambda example: {"question": example["question"], "context": example["context"], "answers": {
                                  "text": example["text"], "answer_start": example["answer_start"]}})
generated_data["test"] = generated_data["test"].map(lambda example: {"question": example["question"].replace("\n", " "), "context": example["context"].replace("\n", " "), "answers": {
                                "text": [example["answers"]["text"][0].replace("\n", " ")], "answer_start": example["answers"]["answer_start"]}})
generated_data["test"].remove_columns(["text", "answer_start"])

Found cached dataset csv (C:/Users/rjutr/.cache/huggingface/datasets/csv/default-dc46deea403e6d7a/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached split indices for dataset at C:\Users\rjutr\.cache\huggingface\datasets\csv\default-dc46deea403e6d7a\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-8c4226c3883b9f9f.arrow and C:\Users\rjutr\.cache\huggingface\datasets\csv\default-dc46deea403e6d7a\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-24d9770f6403af3d.arrow
Loading cached processed dataset at C:\Users\rjutr\.cache\huggingface\datasets\csv\default-dc46deea403e6d7a\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-47e77242f5a0d387.arrow
Loading cached processed dataset at C:\Users\rjutr\.cache\huggingface\datasets\csv\default-dc46deea403e6d7a\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-cb788cd2e517f2b5.arrow
Loading cached process

Dataset({
    features: ['question', 'context', 'answers', 'id'],
    num_rows: 162
})

In [3]:
document_store = FAISSDocumentStore.load(index_path="document_store.faiss", config_path="document_store.json")

In [4]:
retriever = DensePassageRetriever.load(load_dir=f"../../data/models/DPR/{YEAR}", document_store=document_store, use_gpu=True)

  return self.fget.__get__(instance, owner)()


In [5]:
# Set the device
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")

# Define the prediction function
def inference_answer(model, question):
    question = question
    with torch.no_grad():
        outputs = model.run(query=question, params={"Model": {"top_k": 1}, "Retriever": {"top_k": 1}})
    return outputs["answers"][0].answer, outputs["documents"][0].content

In [6]:
results = {
    "Model": [],
    "Question": [],
    "Ground Truth Context": [],
    "Ground Truth Answer": [],
    "Retrieved Context": [],
    "Extracted/Generated Answer": []
}

In [7]:
reader = TransformersReader(model_name_or_path=f"{MODEL_PATH_B}/roberta-base-squad2-finetuned-NLB-QA-2042-full_combined", use_gpu=True)
pipe = Pipeline()
pipe.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipe.add_node(component=reader, name="Model", inputs=["Retriever"])



for idx in range(generated_data["test"].shape[0]):
    results["Model"].append("roberta")
    row = generated_data["test"][idx]
    results["Question"].append(row["question"].strip())
    results["Ground Truth Context"].append(row["context"])
    results["Ground Truth Answer"].append(row["answers"]["text"][0])
    gotten_answer, gotten_context = inference_answer(pipe, row["question"].strip())
    results["Retrieved Context"].append(gotten_context)
    results["Extracted/Generated Answer"].append(gotten_answer)

del reader
del pipe
torch.cuda.empty_cache()



In [8]:
reader = TransformersReader(model_name_or_path=f"{MODEL_PATH_B}/distilbert-base-cased-distilled-squad-finetuned-NLB-QA-2042-full_combined", use_gpu=True)
pipe = Pipeline()
pipe.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipe.add_node(component=reader, name="Model", inputs=["Retriever"])



for idx in range(generated_data["test"].shape[0]):
    results["Model"].append("distilbert")
    row = generated_data["test"][idx]
    results["Question"].append(row["question"].strip())
    results["Ground Truth Context"].append(row["context"])
    results["Ground Truth Answer"].append(row["answers"]["text"][0])
    gotten_answer, gotten_context = inference_answer(pipe, row["question"].strip())
    results["Retrieved Context"].append(gotten_context)
    results["Extracted/Generated Answer"].append(gotten_answer)

del reader
del pipe
torch.cuda.empty_cache()

In [9]:
generator = Seq2SeqGenerator(model_name_or_path=f"{MODEL_PATH_T}/t5-base-finetuned-squadv2-finetuned-NLB-QA-2042-full_combined", input_converter=_BartEli5Converter(), use_gpu=True)
pipe = Pipeline()
pipe.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipe.add_node(component=generator, name="Model", inputs=["Retriever"])



for idx in range(generated_data["test"].shape[0]):
    results["Model"].append("t5-base")
    row = generated_data["test"][idx]
    results["Question"].append(row["question"].strip())
    results["Ground Truth Context"].append(row["context"])
    results["Ground Truth Answer"].append(row["answers"]["text"][0])
    gotten_answer, gotten_context = inference_answer(pipe, row["question"].strip())
    results["Retrieved Context"].append(gotten_context)
    results["Extracted/Generated Answer"].append(gotten_answer)

del generator
del pipe
torch.cuda.empty_cache()

In [10]:
generator = Seq2SeqGenerator(model_name_or_path=f"{MODEL_PATH_T}/t5-small-finetuned-squadv2-finetuned-NLB-QA-2042-full_combined", input_converter=_BartEli5Converter(), use_gpu=True)
pipe = Pipeline()
pipe.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipe.add_node(component=generator, name="Model", inputs=["Retriever"])



for idx in range(generated_data["test"].shape[0]):
    results["Model"].append("t5-small")
    row = generated_data["test"][idx]
    results["Question"].append(row["question"].strip())
    results["Ground Truth Context"].append(row["context"])
    results["Ground Truth Answer"].append(row["answers"]["text"][0])
    gotten_answer, gotten_context = inference_answer(pipe, row["question"].strip())
    results["Retrieved Context"].append(gotten_context)
    results["Extracted/Generated Answer"].append(gotten_answer)

del generator
del pipe
torch.cuda.empty_cache()

In [11]:
results_df = pd.DataFrame(results)

In [12]:
def get_answers_for_question(question):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(results_df[results_df["Question"] == question])

In [13]:
get_answers_for_question("How many modules did the 2022 ECB Climate Stress test exercise consist of?")

Unnamed: 0,Model,Question,Ground Truth Context,Ground Truth Answer,Retrieved Context,Extracted/Generated Answer
1,roberta,How many modules did the 2022 ECB Climate Stress test exercise consist of?,"was includedinto 2022 ECB Climate Stress test exercise, consisted of threemo...",three,The results of climate stress tests showed no material\nimpacts on the Group...,three
163,distilbert,How many modules did the 2022 ECB Climate Stress test exercise consist of?,"was includedinto 2022 ECB Climate Stress test exercise, consisted of threemo...",three,The results of climate stress tests showed no material\nimpacts on the Group...,three
325,t5-base,How many modules did the 2022 ECB Climate Stress test exercise consist of?,"was includedinto 2022 ECB Climate Stress test exercise, consisted of threemo...",three,The results of climate stress tests showed no material\nimpacts on the Group...,three
487,t5-small,How many modules did the 2022 ECB Climate Stress test exercise consist of?,"was includedinto 2022 ECB Climate Stress test exercise, consisted of threemo...",three,The results of climate stress tests showed no material\nimpacts on the Group...,three


In [14]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(results_df)

Unnamed: 0,Model,Question,Ground Truth Context,Ground Truth Answer,Retrieved Context,Extracted/Generated Answer
0,roberta,How much CO2 did we reduce by using electric vehicles instead of liquid fuels?,"siness purposes car sharing. Thus, we contributed toa reduction of 3,709 kg ...","3,709 kg",Heating\nMeasurement included consumption (converted to MWh) of various reso...,15%
1,roberta,How many modules did the 2022 ECB Climate Stress test exercise consist of?,"was includedinto 2022 ECB Climate Stress test exercise, consisted of threemo...",three,The results of climate stress tests showed no material\nimpacts on the Group...,three
2,roberta,How many behaviours were defined?,"ith several implementationinitiatives. As a result, 3 key values and 15 beha...",15,Set governance and different risk management\ntools enable adequate oversigh...,ESG risks
3,roberta,How much did the bank reduce the use of paper in 2022 compared to the previo...,"our initiatives,the bank managed to reduce the use of paper by upto 19% in 2...",19%,"In 2022, another step to achieving this goal has been made as\npaper usage (...",17%
4,roberta,What are the key strategic areas of operations of the Group in 2020?,ey strategic areas of operations of theGroup in 2020 continued to be Retail ...,"Retail Banking in Slovenia, Corporate Bankingin Slovenia, and Strategic Fore...",We want to take care of\nour home to the fullest and thus improve the qualit...,"Digitalisation, client centricity, and\ncost efficiency"
5,roberta,What percentage of NLB Group employees have a tertiary education level?,"onStructure of employees by education in the NLB GroupIn NLB Group, 70% of t...",70%,"Continuous on-job trainings, internal\nrotations, internal and external\nedu...",98%
6,roberta,In what year did the Bank sign a Contract of Guarantees with the Multilatera...,th theEuropean Bank for Reconstruction and Development EBRD and in 2020the C...,2020,"In 2020, the Bank faced the biggest strategic\nmilestone, when it became a s...",2020
7,roberta,What is the name of the Non-Financial Reporting Directive?,Investment Guarantee AgencyNACE Nomenclature of Economic ActivitiesNFRD The...,NFRD,"Upon taking\nthe position, any change and once every year, the members of\nt...",Statement of Independence
8,roberta,How many targets did the NLB Bank announce in 2021?,reaching the target? Pleasedisclose.The NLB Bank publicly announced four tar...,four,Activities and results in 2022\nNLB Group continued its focus on fulfilling ...,4
9,roberta,How many branches does NLB Group have in all markets where it operates?,B Group in numbers as at 31 December 2022 An extensive network of 440 branch...,440,NLB Group in numbers (as at 31 December 2022)\n• An extensive network of 440...,440


In [15]:
# split the dataset bz model
for model in results_df["Model"].unique():
    model_df = results_df[results_df["Model"] == model]
    model_df.to_csv(f"../../data/results/2042-combined/{model}-outputs.csv", index=False)