<a href="https://colab.research.google.com/github/FarhanDhanani/IBCAST/blob/main/TABLE_QA_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ENVIROMENT SETUP**

## Permission Flag for Sampling Tables

In [None]:
permToSampleTables = False

## Installation of Relevant Dependencies

In [None]:
# install datasets
%tensorflow_version 2.x
!pip install datasets
!pip install --upgrade langchain openai -q
#!pip install "pinecone-client[grpc]"
!pip install pinecone-client
!pip install transformers sentence_transformers torch-scatter
!pip install --upgrade protobuf

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.
Collecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0.0,>

## Required Imports

In [None]:
import json
import time
import torch
import pinecone
import pandas as pd
from tqdm.auto import tqdm
from datasets import load_dataset
from langchain.embeddings.openai import OpenAIEmbeddings

from sentence_transformers import SentenceTransformer
from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering

  from tqdm.autonotebook import tqdm


## Setup Google-Drive

In [None]:
from google.colab import drive
from google.colab import files
base_path = '/content/drive'
drive.mount(base_path)
base_path = base_path + '/My Drive/'
base_path = base_path + "/dataset/TABLEQA/attempt2/"

Mounted at /content/drive


## Loading Dataset

In [None]:
dataset = load_dataset("DongfuTingle/FeTaQA")

Downloading readme:   0%|          | 0.00/663 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/14.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.77M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.61M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

# **PROCESSING DATA-SETS**

## Sampling Records from the Loaded Data-Sets

In [None]:
def sampleTables(datasetFileName, totalRecordsToSample):
  processedDataSet=[]
  for index in range(totalRecordsToSample):

    '''
      # Loading data-frame
    '''
    headers = dataset['train'][index]['table_array'][0]
    content = dataset['train'][index]['table_array'][1:]
    table = pd.DataFrame(content, columns=headers)

    '''
      # Renaming duplicate column names
    '''
    s = pd.Series(table.columns)
    table.columns = table.columns+((" "+s.groupby(s).cumcount().astype(str)).replace(' 0','').astype(str))

    '''
      # Creating rows & cols chunks
    '''
    rowChunks = table.to_dict('records')
    colChunks = table.to_dict('list')
    allChunks = []

    for row in rowChunks:
      allChunks.append(row)

    for col in colChunks:
      colChunk = colChunks[col]
      allChunks.append({col:colChunk})

    allChunks.append({'table_page_title' :dataset['train'][index]['table_page_title']})
    allChunks.append({'table_section_title' :dataset['train'][index]['table_section_title']})

    wholeChunk = {
        dataset['train'][index]['table_page_title']
        + " " +
        dataset['train'][index]['table_section_title']:
        allChunks
    }
    processedDataSet.append(wholeChunk)

  '''
    # Saving extracted rows & cols chunks
  '''
  with open(base_path+datasetFileName, "w", encoding='utf8') as outfile:
      outfile.write(json.dumps(processedDataSet, indent = 4, ensure_ascii=False))


  return

## Pre-Processing Sampled Records

In [None]:
def _preprocess_tables(datasetFileName):
  with open(base_path+datasetFileName, 'r', encoding='utf8') as infile:
    processedDataSet = json.load(infile)

  tableChunks = []
  for wholeChunk in processedDataSet:
    for mainKey in wholeChunk:
      chunks = wholeChunk[mainKey]
      for chunk in chunks:
        chunk["record"] = mainKey
        tableChunks.append(json.dumps(chunk, indent = 4, ensure_ascii=False))
  return tableChunks

## Saving Pre-Proccessed Sampled Records

In [None]:
datasetFileName = "TQA.json"
totalRecordsToSample = 50

if(permToSampleTables):
  sampleTables(datasetFileName, totalRecordsToSample)

# **GENERATING EMBEDDINGS**

## Loading Open-AI Modules

In [None]:
import openai
openai.api_key = "SECRET_KEY"
openai.Engine.list()

<OpenAIObject list at 0x7e2bca120ae0> JSON: {
  "object": "list",
  "data": [
    {
      "object": "engine",
      "id": "davinci",
      "ready": true,
      "owner": "openai",
      "permissions": null,
      "created": null
    },
    {
      "object": "engine",
      "id": "gpt-3.5-turbo-16k-0613",
      "ready": true,
      "owner": "openai",
      "permissions": null,
      "created": null
    },
    {
      "object": "engine",
      "id": "text-davinci-001",
      "ready": true,
      "owner": "openai",
      "permissions": null,
      "created": null
    },
    {
      "object": "engine",
      "id": "text-search-curie-query-001",
      "ready": true,
      "owner": "openai-dev",
      "permissions": null,
      "created": null
    },
    {
      "object": "engine",
      "id": "babbage",
      "ready": true,
      "owner": "openai",
      "permissions": null,
      "created": null
    },
    {
      "object": "engine",
      "id": "text-babbage-001",
      "ready": true,
    

## Initiallizing Pine-Cone

In [None]:
pinecone.init(
    api_key="SECRET_KEY",
    environment="SECRET_ENV"
)

## Loading Text Embeddig Models

In [None]:
MODEL = "text-embedding-ada-002"
embeder = openai.Embedding.create(
    input=["checking emeding"],
    engine=MODEL
)

## Evaluating Functionality with Dummy Records

In [None]:
embeds = [record['embedding'] for record in embeder['data']]
embeds

[[-0.034399937838315964,
  0.0006195668247528374,
  -0.008419615216553211,
  -0.03783416002988815,
  -0.015453997999429703,
  0.019869426265358925,
  -0.03685295209288597,
  -0.031138869002461433,
  0.0016720187850296497,
  -0.010115082375705242,
  0.005241516977548599,
  0.01792144402861595,
  0.00796508602797985,
  0.015468427911400795,
  -0.0301576629281044,
  -0.001481729676015675,
  0.030013367533683777,
  0.012669104151427746,
  -0.015555004589259624,
  -0.018974797800183296,
  0.004588582087308168,
  0.0027398020029067993,
  -0.013982188887894154,
  -0.009444110095500946,
  -0.004711233079433441,
  -0.01942211203277111,
  0.004202593117952347,
  -0.022697608917951584,
  -0.0301576629281044,
  -0.0003600612108130008,
  0.0051765842363238335,
  -0.012380514293909073,
  -0.011687898077070713,
  -0.03099457360804081,
  -0.03592946380376816,
  -0.010887060314416885,
  -0.0023520090617239475,
  -0.026247266680002213,
  0.047905951738357544,
  0.004617440979927778,
  0.0108149126172065

In [None]:
len(embeds[0])

1536

## Initiallizing Pine-Cone Index

In [None]:
index_name = "table-qa-openai-2"

if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        index_name,
        dimension=len(embeds[0]),
        metric="cosine"
    )

index = pinecone.Index(index_name)

## Storing Generated Embeddings in Pine-Cone Index

In [None]:
batch_size = 4
processed_tables = _preprocess_tables(datasetFileName)
for i in tqdm(range(0, len(processed_tables), batch_size)):

    i_end = min(i+batch_size, len(processed_tables))
    batch = processed_tables[i:i_end]
    res = openai.Embedding.create(input=batch, engine=MODEL)
    embeds = [record['embedding'] for record in res['data']]

    meta = [{'chunk': chunk} for chunk in batch]
    ids = [f"{idx}" for idx in range(i, i_end)]

    to_upsert = list(zip(ids, embeds, meta))
    _ = index.upsert(vectors=to_upsert)
    time.sleep(2)

# check that we have all vectors in index
index.describe_index_stats()

  0%|          | 0/279 [00:00<?, ?it/s]

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1113}},
 'total_vector_count': 1113}

## Querying Pine-Cone Index


---



Retrieving relevant records from the Pine-Cone Index for the sampled queries and storing the results back in Google-Drive.

In [None]:
retreivedResultsFileName = "retreivedResults.json"

In [None]:
responses = []
for idx in tqdm(range(totalRecordsToSample)):
    query = dataset['train'][idx]['question']
    xq = openai.Embedding.create(input=query, engine=MODEL)['data'][0]['embedding']
    res = index.query([xq], top_k=5, include_metadata=True)
    retreivedResponse= {'question': query, 'retrievedResults': res.to_dict()}
    responses.append(retreivedResponse)
    time.sleep(2)

with open(base_path+retreivedResultsFileName, "w", encoding='utf8') as outfile:
    outfile.write(json.dumps(responses, indent = 4, ensure_ascii=False))


  0%|          | 0/50 [00:00<?, ?it/s]

# **QUESTION ANSWERING**

## Generating Dialogue-Like Answers with Text Davinci


---



Generating Dialogue-Like Answers for the Sample Queries and the  Relevant Retrieved Records from the Pine-Cone Index

In [None]:
def answer_question(
    extractedRecords,
    question,
    model="text-davinci-003",
    max_len=1800,
    size="ada",
    debug=False,
    max_tokens=500,
    stop_sequence=None
):

    """
    Answer a question based on the most similar context from the dataframe texts
    """
    contextDocs = [ rec['metadata']['chunk'] for rec in extractedRecords['matches'] ]

    # If debug, print the raw model response

    if debug:
        print(contextDocs)
        print("\n\n")

    try:
        # Create a completions using the question and context
        response = openai.Completion.create(
            prompt=f"Answer the question based on the context below \n\nContext: {contextDocs}\n\n---\n\nQuestion: {question}\nAnswer:",
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=stop_sequence,
            model=model,
        )
        return response["choices"][0]["text"].strip()
    except Exception as e:
        print(e)
        return ""


    return

## Storing Generated Answers


---


Saving the Generated Dialogue-Like Answers for the Sampled Queries from the Pine-Cone Index

In [None]:
with open(base_path+retreivedResultsFileName, 'r', encoding='utf8') as infile:
      retreivedResults = json.load(infile)

In [None]:
outputs = []
for results in tqdm(retreivedResults):
  question = results['question']
  gen_ans = answer_question(results['retrievedResults'], question, debug=True)
  output = {'question': question, 'gen_ans': gen_ans}
  outputs.append(output)
  time.sleep(2)

  0%|          | 0/50 [00:00<?, ?it/s]

['{\n    "Candidate": [\n        "James R. Thompson (incumbent)",\n        "Adlai Stevenson III",\n        "Bea Armstrong",\n        "John E. Roche",\n        "write-ins",\n        "Majority",\n        "Turnout",\n        "Republican hold"\n    ],\n    "record": "1982 Illinois gubernatorial election Results"\n}', '{\n    "table_page_title": "1982 Illinois gubernatorial election",\n    "record": "1982 Illinois gubernatorial election Results"\n}', '{\n    "Party": [\n        "-",\n        "-",\n        "-",\n        "-",\n        "-",\n        "Majority",\n        "Turnout",\n        "-"\n    ],\n    "record": "1982 Illinois gubernatorial election Results"\n}', '{\n    "Votes": [\n        "1,816,101",\n        "1,811,027",\n        "24,417",\n        "22,001",\n        "161",\n        "5,074",\n        "3,673,707",\n        "Swing"\n    ],\n    "record": "1982 Illinois gubernatorial election Results"\n}', '{\n    "Party": "Majority",\n    "Party 1": "Majority",\n    "Candidate": "Majorit

In [None]:
sentenceLikeOutputsFileName = "senteceLikeOutputs.json"

In [None]:
with open(base_path+sentenceLikeOutputsFileName, "w") as outfile:
    outfile.write(json.dumps(outputs, indent = 4))

# EVALUATIONS

## Testing Pine-Cone


---



Assessing the quality of the retrieved records against the sampled queries from the Pine-Cone Index.

In [None]:
def GetUniqueDictionaries(listofDicts):
    """Get a List unique dictionaries
    List to contain unique dictionaries"""
    listOfUniqueDicts = []
    # A set object
    setOfValues = set()
    # iterate over all dictionaries in list
    for dictObj in listofDicts:
        list_Of_tuples = []
        # For each dictionary, iterate over all key
        # and append that to a list as tuples
        for key, value in dictObj.items():
            list_Of_tuples.append( (key, value))
        strValue = ""
        # convert list of tuples to a string
        for key, value in sorted(list_Of_tuples):
            # sort list of tuples, and iterate over them
            # append each pair to string
            strValue += str(key) + "_" + str(value) + "_"
        # Add string to set if not already exist in set
        if strValue not in setOfValues:
            # If string is not in set, then it means
            # this dictionary is unique
            setOfValues.add(strValue)
            listOfUniqueDicts.append (dictObj)

    return listOfUniqueDicts

In [None]:
import warnings
warnings.filterwarnings("ignore")

with open(base_path+'retreivedResults.json', 'r', encoding='utf8') as infile:
      result = json.load(infile)

avg_recall = 0
avg_precission = 0

for index in range(50):
  cell_ids = dataset['train'][index]['highlighted_cell_ids']
  headers = dataset['train'][index]['table_array'][0]
  content = dataset['train'][index]['table_array'][1:]
  record_sub = dataset['train'][index]['table_page_title'] + " "\
                  + dataset['train'][index]['table_section_title']
  table = pd.DataFrame(content, columns=headers)


  ground_truth_ans = []
  for cell_id in cell_ids:
    ans = {}
    rowIndex = cell_id[0]-1
    colIndex = cell_id[1]

    ans["label"] = table.iloc[rowIndex,colIndex]
    ans["row"] = table.to_dict('records')[cell_id[0]-1]
    ans["row"]['record'] = record_sub
    ans["col"] =  { \
                    headers[cell_id[1]]:table.to_dict('list')[headers[cell_id[1]]], \
                    'record':record_sub
                   }
    ground_truth_ans.append(ans)


  found_matches = 0
  num_of_retrieved_docs = 0
  required_matches = len(ground_truth_ans)
  good_chunks = []

  '''
    # Calculation for Prececission
  '''

  for record in result[index]['retrievedResults']['matches']:
    retrieved_chunk = json.loads(record['metadata']['chunk'])
    num_of_retrieved_docs += 1

    for ground_truth in ground_truth_ans:
      labelFound = False

      for k,v in retrieved_chunk.items():
        if(ground_truth['label'] in v):
          labelFound = True

      if(labelFound == True \
          and \
          (ground_truth['row'].items() == retrieved_chunk.items() or\
            ground_truth['col'].items() == retrieved_chunk.items())):
            good_chunks.append(retrieved_chunk)
            break


  '''
    # Calculation for Recall
  '''
  for ground_truth in ground_truth_ans:
    labelFound = False

    for record in result[index]['retrievedResults']['matches']:
      retrieved_chunk = json.loads(record['metadata']['chunk'])

      for k,v in retrieved_chunk.items():
        if(ground_truth['label'] in v):
          labelFound = True

      if(labelFound == True \
          and \
          (ground_truth['row'].items() == retrieved_chunk.items() or\
            ground_truth['col'].items() == retrieved_chunk.items())):
            found_matches +=1
            break



  recall =  found_matches / required_matches * 100
  precission = len(GetUniqueDictionaries(good_chunks)) / num_of_retrieved_docs * 100
  avg_recall += recall
  avg_precission += precission
  print(recall, precission)

100.0 40.0
100.0 40.0
83.33333333333334 60.0
25.0 20.0
60.0 40.0
100.0 20.0
90.0 80.0
0.0 0.0
66.66666666666666 60.0
0.0 0.0
100.0 40.0
0.0 0.0
57.14285714285714 20.0
47.82608695652174 20.0
50.0 20.0
100.0 60.0
0.0 0.0
100.0 40.0
100.0 40.0
0.0 0.0
50.0 20.0
44.44444444444444 20.0
81.81818181818183 100.0
100.0 40.0
100.0 80.0
0.0 0.0
66.66666666666666 40.0
0.0 0.0
33.33333333333333 80.0
0.0 0.0
100.0 40.0
0.0 0.0
0.0 0.0
100.0 60.0
100.0 100.0
71.42857142857143 40.0
33.33333333333333 20.0
50.0 20.0
100.0 20.0
100.0 80.0
33.33333333333333 20.0
57.14285714285714 20.0
81.81818181818183 80.0
100.0 80.0
80.0 40.0
100.0 40.0
50.0 20.0
66.66666666666666 40.0
50.0 40.0
100.0 40.0


In [None]:
print("average_recall =", avg_recall/50, " average_precission = ", avg_precission/50)

average_recall = 60.599090281698984  average_precission =  35.6


## Testing Open AI Text Davinci


---


Assessing the quality of the generated dialogue-like answers against the sampled queries.

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from datasets import load_metric

In [None]:
with open(base_path+'senteceLikeOutputs.json', 'r', encoding='utf8') as infile:
      outputs = json.load(infile)

In [None]:
eval_outputs = []

for index in range(50):
  question = dataset['train'][index]['question']
  expected_ans = dataset['train'][index]['answer']
  generated_ans = outputs[index]['gen_ans']

  output = {
      "question": question,
      "expected_ans":expected_ans,
      "generated_ans": generated_ans
  }
  eval_outputs.append(output)

with open(base_path+"eval_outputs.json", "w", encoding='utf8') as outfile:
    outfile.write(json.dumps(eval_outputs, indent = 4, ensure_ascii=False))

In [None]:
expected_answers = []
generated_answers = []
questions = []

for index in range(50):
  question = dataset['train'][index]['question']
  expected_ans = dataset['train'][index]['answer']
  generated_ans = outputs[index]['gen_ans']

  expected_answers.append(expected_ans)
  generated_answers.append(generated_ans)
  questions.append(question)

headers = ["Question", "Expected Answer", "Generated Answer"]
content = list(zip(questions, expected_answers, generated_answers))
table = pd.DataFrame(content, columns=headers)
table.to_csv(base_path+'outputs.csv', encoding='utf-8', index = False)

## Performance with out RETRO Architecture



---



Assessing performance of the Open AI Text-Davinci model when the Table Records with the associated are available.

In [None]:
totalRecordsToSample = 50

outputs_without_retro = []

for index in tqdm(range(totalRecordsToSample)):

    '''
      # Loading data-frame
    '''
    headers = dataset['train'][index]['table_array'][0]
    content = dataset['train'][index]['table_array'][1:]
    table = pd.DataFrame(content, columns=headers)

    '''
      # Renaming duplicate column names
    '''
    s = pd.Series(table.columns)
    table.columns = table.columns+((" "+s.groupby(s).cumcount().astype(str)).replace(' 0','').astype(str))

    '''
      # Creating rows & cols chunks
    '''
    rowChunks = table.to_dict('records')
    colChunks = table.to_dict('list')
    allChunks = []

    for row in rowChunks:
      allChunks.append(row)

    for col in colChunks:
      colChunk = colChunks[col]
      allChunks.append({col:colChunk})

    allChunks.append({'table_page_title' :dataset['train'][index]['table_page_title']})
    allChunks.append({'table_section_title' :dataset['train'][index]['table_section_title']})

    contextDocs = allChunks
    question = dataset['train'][index]['question']

    try:
        # Create a completions using the question and context
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=f"Answer the question based on the context below \n\nContext: {contextDocs}\n\n---\n\nQuestion: {question}\nAnswer:",
            temperature=0,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None,
        )
        gen_ans_without_retro = response["choices"][0]["text"].strip()
        output_without_retro = {'question': question, 'gen_ans': gen_ans_without_retro}
        outputs_without_retro.append(output_without_retro)
    except Exception as e:
        print(e)
        print('error')

    time.sleep(20)

  0%|          | 0/50 [00:00<?, ?it/s]

This model's maximum context length is 4097 tokens, however you requested 4524 tokens (4508 in your prompt; 16 for the completion). Please reduce your prompt; or completion length.
error


In [None]:
# Failed to retrieve answer for the 50th Question (at 49 index) because of no. of limit of tokens in the input exceded.
outputs_without_retro

[{'question': 'Who won the 1982 Illinois gubernatorial election, and how many votes was the margin?',
  'gen_ans': 'James R. Thompson (incumbent) won the 1982 Illinois gubernatorial election with'},
 {'question': 'How did Michael and Mario Andretti do?',
  'gen_ans': 'Michael Andretti finished in 6th place and Mario Andretti finished in 32nd'},
 {'question': 'How many copies did "Pleasure" sell in 1998 alone, and how long was it the best selling album in Japan?',
  'gen_ans': '"Pleasure" sold 5,136,000 copies in 1998 and it'},
 {'question': 'How many passengers can that plane hold?',
  'gen_ans': 'The Airbus A320-200 can hold 180 passengers.'},
 {'question': 'When and in what play did Platt appear at the Music Box Theatre?',
  'gen_ans': 'In 2016–2017, Platt appeared in the play Dear Evan Hansen at the'},
 {'question': 'What are the download rates of EUTRAN?',
  'gen_ans': 'The download rates of EUTRAN range from 0.68 Mbit/'},
 {'question': 'What roles did Melina Kanakaredes play in th

In [None]:
sentenceLikeOutputsWithOutRETROFileName = "senteceLikeOutputsWithOutRetro.json"
with open(base_path+sentenceLikeOutputsWithOutRETROFileName, "w") as outfile:
    outfile.write(json.dumps(outputs_without_retro, indent = 4))

In [None]:
with open(base_path+'senteceLikeOutputs.json', 'r', encoding='utf8') as infile:
      outputs = json.load(infile)
with open(base_path+'senteceLikeOutputsWithOutRetro.json', 'r', encoding='utf8') as infile:
      outputs_without_retro = json.load(infile)

questions = []
expected_answers = []
generated_answers = []
generated_answers_without_retro = []


for index in tqdm(range(totalRecordsToSample)):
  question = dataset['train'][index]['question']
  expected_ans = dataset['train'][index]['answer']
  generated_ans = outputs[index]['gen_ans']
  if(index<49):
    generated_ans_without_retro = outputs_without_retro[index]['gen_ans']
  else:
    generated_ans_without_retro = "No Response"

  questions.append(question)
  expected_answers.append(expected_ans)
  generated_answers.append(generated_ans)
  generated_answers_without_retro.append(generated_ans_without_retro)



headers = ["Question", "Expected Answer", "Generated Answer Without RETRO", "Generated Answer With RETRO"]
content = list(zip(questions, expected_answers, generated_answers_without_retro, generated_answers))
table = pd.DataFrame(content, columns=headers)
table

  0%|          | 0/50 [00:00<?, ?it/s]

Unnamed: 0,Question,Expected Answer,Generated Answer Without RETRO,Generated Answer With RETRO
0,Who won the 1982 Illinois gubernatorial electi...,Thompson prevailed in the 1982 Illinois gubern...,James R. Thompson (incumbent) won the 1982 Ill...,James R. Thompson (incumbent) won the 1982 Ill...
1,How did Michael and Mario Andretti do?,Michael Andretti finished with a run of 214.52...,Michael Andretti finished in 6th place and Mar...,Michael Andretti finished in 6th place and Mar...
2,"How many copies did ""Pleasure"" sell in 1998 al...","B'z The Best ""Pleasure"" sold more than 5 milli...","""Pleasure"" sold 5,136,000 copies in 1998 and it","""Pleasure"" sold 5,136,000 copies in 1998 and i..."
3,How many passengers can that plane hold?,The Tigerair Australia fleet consists of the f...,The Airbus A320-200 can hold 180 passengers.,The plane can hold a maximum of 180 passengers.
4,When and in what play did Platt appear at the ...,"In 2016 and 2017, Platt played in Dear Evan Ha...","In 2016–2017, Platt appeared in the play Dear ...","Platt appeared in the play ""Dear Evan Hansen"" ..."
5,What are the download rates of EUTRAN?,EUTRAN has download rates of 299.6 Mbit/s and ...,The download rates of EUTRAN range from 0.68 M...,The download rates of EUTRAN range from 0.68 M...
6,What roles did Melina Kanakaredes play in the ...,Melina Kanakaredes played in television dramas...,Melina Kanakaredes played the lead role of Dr....,Melina Kanakaredes played the lead role of Dr....
7,What two teams did Austin Fyten play for durin...,"In the 2015–16 season, Austin Fyten spent with...",Austin Fyten played for the South Carolina Sti...,Austin Fyten played for the South Carolina Sti...
8,What countries did the World U-17 Hockey Chall...,The World U-17 Hockey Challenge attracted U-17...,"After 2016, the World U-17 Hockey Challenge at...","Sweden, Canada Black, Russia, United States, C..."
9,Which club did Renato Hyshmeri play with in th...,"After playing with Partizani, Hyshmeri played ...",Tirana,Bylis Ballsh
