In [27]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
import os

In [None]:

def load_roberta_squad2(model_name="deepset/roberta-base-squad2", save_dir="roberta_squad2"):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForQuestionAnswering.from_pretrained(model_name)
        tokenizer.save_pretrained(save_dir)
        model.save_pretrained(save_dir)
    else:
        tokenizer = AutoTokenizer.from_pretrained(save_dir)
        model = AutoModelForQuestionAnswering.from_pretrained(save_dir)
    return tokenizer, model

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
tokenizer, model = load_roberta_squad2()

In [None]:
# data = [
#     {
#         "context": "The capital of France is Paris.",
#         "question": "What is the capital of France?",
#         "answer_text": "Paris",
#         "answer_start": 25
#     },
#     {
#         "context": "Python is a popular programming language created by Guido van Rossum.",
#         "question": "Who created Python?",
#         "answer_text": "Guido van Rossum",
#         "answer_start": 52
#     },
#     {
#         "context": "The Great Wall of China is visible from space.",
#         "question": "What is visible from space?",
#         "answer_text": "The Great Wall of China",
#         "answer_start": 0
#     },
#     {
#         "context": "Mount Everest is the highest mountain in the world.",
#         "question": "Which is the highest mountain in the world?",
#         "answer_text": "Mount Everest",
#         "answer_start": 0
#     },
#     {
#         "context": "The Pacific Ocean is the largest ocean on Earth.",
#         "question": "Which is the largest ocean on Earth?",
#         "answer_text": "Pacific Ocean",
#         "answer_start": 4
#     }
# ]

# df = pd.DataFrame(data)
# #df.to_csv("sample_qa.csv", index=False)

In [32]:
data = [
    {
        "context": "The objective of the 2022 marketing campaign was to increase customer retention by 15% through personalized email outreach and loyalty programs.",
        "question": "What was the objective of the 2022 marketing campaign?",
        "answer_text": "to increase customer retention by 15%",
        "answer_start": 42
    },
    {
        "context": "Tesla's long-term vision includes reducing global carbon emissions by accelerating the transition to electric vehicles and renewable energy solutions.",
        "question": "What is Tesla's long-term vision?",
        "answer_text": "reducing global carbon emissions by accelerating the transition to electric vehicles and renewable energy solutions",
        "answer_start": 34
    },
    {
        "context": "One of the primary goals of the merger between Company A and Company B was to expand their market share in Southeast Asia.",
        "question": "What was the goal of the merger between Company A and Company B?",
        "answer_text": "to expand their market share in Southeast Asia",
        "answer_start": 80
    },
    {
        "context": "The digital transformation initiative focused on migrating legacy systems to cloud infrastructure to improve scalability and reduce operational costs.",
        "question": "What was the focus of the digital transformation initiative?",
        "answer_text": "migrating legacy systems to cloud infrastructure",
        "answer_start": 49
    },
    {
        "context": "By implementing data-driven pricing strategies, the company aimed to improve profit margins while staying competitive in the market.",
        "question": "What was the company’s aim in implementing data-driven pricing strategies?",
        "answer_text": "to improve profit margins while staying competitive in the market",
        "answer_start": 58
    },
    {
        "context": "The product team launched a beta version of the software to gather user feedback and refine the features based on real-world usage.",
        "question": "Why did the product team launch a beta version of the software?",
        "answer_text": "to gather user feedback and refine the features based on real-world usage",
        "answer_start": 58
    },
    {
        "context": "To enhance operational efficiency, the logistics department automated the inventory tracking system using RFID technology.",
        "question": "How did the logistics department enhance operational efficiency?",
        "answer_text": "automated the inventory tracking system using RFID technology",
        "answer_start": 61
    },
    {
        "context": "The customer support team aimed to reduce average response time from 24 hours to under 6 hours by introducing AI-powered chatbots.",
        "question": "What was the customer support team’s objective regarding response time?",
        "answer_text": "reduce average response time from 24 hours to under 6 hours",
        "answer_start": 45
    },
    {
        "context": "To address declining sales, the company introduced a direct-to-consumer model which eliminated intermediaries and increased profit margins.",
        "question": "What model did the company introduce to address declining sales?",
        "answer_text": "direct-to-consumer model",
        "answer_start": 52
    },
    {
        "context": "The ESG report highlighted efforts in reducing carbon footprint, improving employee well-being, and promoting ethical sourcing practices.",
        "question": "What efforts were highlighted in the ESG report?",
        "answer_text": "reducing carbon footprint, improving employee well-being, and promoting ethical sourcing practices",
        "answer_start": 39
    },
    {
        "context": "Through predictive maintenance, the manufacturing unit minimized equipment downtime and significantly extended the lifespan of machinery.",
        "question": "How did predictive maintenance benefit the manufacturing unit?",
        "answer_text": "minimized equipment downtime and significantly extended the lifespan of machinery",
        "answer_start": 39
    },
    {
        "context": "By adopting agile methodologies, the development team was able to release product updates every two weeks and respond quickly to customer feedback.",
        "question": "What was the impact of adopting agile methodologies?",
        "answer_text": "release product updates every two weeks and respond quickly to customer feedback",
        "answer_start": 65
    },
    {
        "context": "The new HR policy introduced flexible work hours and remote work options to enhance employee satisfaction and attract top talent.",
        "question": "Why were flexible work hours and remote options introduced?",
        "answer_text": "to enhance employee satisfaction and attract top talent",
        "answer_start": 71
    },
    {
        "context": "During Q1, the finance department restructured its budgeting process to allow for more accurate forecasting and better resource allocation.",
        "question": "Why did the finance department restructure its budgeting process?",
        "answer_text": "to allow for more accurate forecasting and better resource allocation",
        "answer_start": 67
    },
    {
        "context": "The company reduced its churn rate by launching a proactive customer success program that provided tailored solutions to high-risk clients.",
        "question": "How did the company reduce its churn rate?",
        "answer_text": "by launching a proactive customer success program that provided tailored solutions to high-risk clients",
        "answer_start": 40
    },
    {
        "context": "To improve sustainability, the supply chain was redesigned to source raw materials locally, reducing transportation emissions and costs.",
        "question": "What changes were made to improve supply chain sustainability?",
        "answer_text": "source raw materials locally, reducing transportation emissions and costs",
        "answer_start": 62
    },
    {
        "context": "A key business outcome of the customer loyalty program was a 20% increase in repeat purchases within the first six months.",
        "question": "What was a key outcome of the customer loyalty program?",
        "answer_text": "20% increase in repeat purchases within the first six months",
        "answer_start": 63
    },
    {
        "context": "As part of its digital-first strategy, the bank developed a mobile app that allowed customers to open accounts and apply for loans entirely online.",
        "question": "How did the bank implement its digital-first strategy?",
        "answer_text": "developed a mobile app that allowed customers to open accounts and apply for loans entirely online",
        "answer_start": 55
    },
    {
        "context": "The objective of the annual leadership summit was to align senior executives on company priorities and foster cross-functional collaboration.",
        "question": "What was the objective of the annual leadership summit?",
        "answer_text": "to align senior executives on company priorities and foster cross-functional collaboration",
        "answer_start": 49
    },
    {
        "context": "The new branding strategy emphasized sustainability and innovation to better appeal to environmentally conscious consumers and younger demographics.",
        "question": "What did the new branding strategy emphasize?",
        "answer_text": "sustainability and innovation",
        "answer_start": 37
    }
]
df = pd.DataFrame(data)


In [21]:
display(df)

Unnamed: 0,context,question,answer_text,answer_start
0,The capital of France is Paris.,What is the capital of France?,Paris,25
1,Python is a popular programming language creat...,Who created Python?,Guido van Rossum,52
2,The Great Wall of China is visible from space.,What is visible from space?,The Great Wall of China,0
3,Mount Everest is the highest mountain in the w...,Which is the highest mountain in the world?,Mount Everest,0
4,The Pacific Ocean is the largest ocean on Earth.,Which is the largest ocean on Earth?,Pacific Ocean,4


In [None]:
# df.loc[1, "context"][df.loc[1, "answer_start"]:]

'Guido van Rossum.'

In [33]:
def qa_inference(tokenizer, model, context, question):
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        answer_start = torch.argmax(outputs.start_logits)
        answer_end = torch.argmax(outputs.end_logits) + 1
        answer = tokenizer.convert_tokens_to_string(
            tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
    return answer

def pipeline_qa_inference(model_dir="roberta_squad2"):
    qa_pipe = pipeline("question-answering", model=model_dir, tokenizer=model_dir)
    return qa_pipe

qa_pipe = pipeline_qa_inference()

Device set to use mps:0


In [34]:
for idx, row in df.iterrows():
    pred_answer = qa_inference(tokenizer, model, row["context"], row["question"])
    df.loc[idx,"pred_answer"] = pred_answer

    result = qa_pipe({"context": row["context"], "question": row["question"]})
    df.loc[idx, "pipeline_pred_answer"] = result["answer"]

  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*ar

In [35]:
df

Unnamed: 0,context,question,answer_text,answer_start,pred_answer,pipeline_pred_answer
0,The objective of the 2022 marketing campaign w...,What was the objective of the 2022 marketing c...,to increase customer retention by 15%,42,to increase customer retention by 15%,to increase customer retention by 15%
1,Tesla's long-term vision includes reducing glo...,What is Tesla's long-term vision?,reducing global carbon emissions by accelerati...,34,reducing global carbon emissions by accelerat...,reducing global carbon emissions by accelerati...
2,One of the primary goals of the merger between...,What was the goal of the merger between Compan...,to expand their market share in Southeast Asia,80,to expand their market share in Southeast Asia,to expand their market share in Southeast Asia
3,The digital transformation initiative focused ...,What was the focus of the digital transformati...,migrating legacy systems to cloud infrastructure,49,migrating legacy systems to cloud infrastructure,migrating legacy systems to cloud infrastructure
4,By implementing data-driven pricing strategies...,What was the company’s aim in implementing dat...,to improve profit margins while staying compet...,58,improve profit margins,improve profit margins
5,The product team launched a beta version of th...,Why did the product team launch a beta version...,to gather user feedback and refine the feature...,58,to gather user feedback and refine the featur...,to gather user feedback and refine the feature...
6,"To enhance operational efficiency, the logisti...",How did the logistics department enhance opera...,automated the inventory tracking system using ...,61,automated the inventory tracking system using...,automated the inventory tracking system using ...
7,The customer support team aimed to reduce aver...,What was the customer support team’s objective...,reduce average response time from 24 hours to ...,45,reduce average response time from 24 hours to...,reduce average response time from 24 hours to ...
8,"To address declining sales, the company introd...",What model did the company introduce to addres...,direct-to-consumer model,52,direct-to-consumer,direct-to-consumer
9,The ESG report highlighted efforts in reducing...,What efforts were highlighted in the ESG report?,"reducing carbon footprint, improving employee ...",39,"reducing carbon footprint, improving employee...","reducing carbon footprint, improving employee ..."


In [38]:
df["pred_answer"]==df["pipeline_pred_answer"]

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
dtype: bool