In [3]:
# Source: https://www.ets.org/toefl_junior/prepare/standard_sample_questions/reading_comprehension/
context = "When another old cave is discovered in the south of France, it is not usually news. Rather, it is an ordinary event. Such discoveries are so frequent these days that hardly anybody pays heed to them. However, when the Lascaux cave complex was discovered in 1940, the world was amazed. Painted directly on its walls were hundreds of scenes showing how people lived thousands of years ago. The scenes show people hunting animals, such as bison or wild cats. Other images depict birds and, most noticeably, horses, which appear in more than 300 wall images, by far outnumbering all other animals. Early artists drawing these animals accomplished a monumental and difficult task. They did not limit themselves to the easily accessible walls but carried their painting materials to spaces that required climbing steep walls or crawling into narrow passages in the Lascaux complex. Unfortunately, the paintings have been exposed to the destructive action of water and temperature changes, which easily wear the images away. Because the Lascaux caves have many entrances, air movement has also damaged the images inside. Although they are not out in the open air, where natural light would have destroyed them long ago, many of the images have deteriorated and are barely recognizable. To prevent further damage, the site was closed to tourists in 1963, 23 years after it was discovered."
context

'When another old cave is discovered in the south of France, it is not usually news. Rather, it is an ordinary event. Such discoveries are so frequent these days that hardly anybody pays heed to them. However, when the Lascaux cave complex was discovered in 1940, the world was amazed. Painted directly on its walls were hundreds of scenes showing how people lived thousands of years ago. The scenes show people hunting animals, such as bison or wild cats. Other images depict birds and, most noticeably, horses, which appear in more than 300 wall images, by far outnumbering all other animals. Early artists drawing these animals accomplished a monumental and difficult task. They did not limit themselves to the easily accessible walls but carried their painting materials to spaces that required climbing steep walls or crawling into narrow passages in the Lascaux complex. Unfortunately, the paintings have been exposed to the destructive action of water and temperature changes, which easily wea

In [6]:
questions = [
    "Which animals appear most often on the cave walls?", # horses
    "Why was painting inside the Lascaux complex a difficult task?", # Many painting spaces were difficult to reach
    "What does the passage say happened at the Lascaux caves in 1963?" # Visitors were prohibited from entering
]
questions

['Which animals appear most often on the cave walls?',
 'Why was painting inside the Lascaux complex a difficult task?',
 'What does the passage say happened at the Lascaux caves in 1963?']

In [16]:
# Import libraries
# Used Models can be explored there: https://huggingface.co/models?pipeline_tag=question-answering&sort=downloads
# !pip install transformers
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

tokenizer = AutoTokenizer.from_pretrained("deepset/bert-base-cased-squad2")

# Pre-trained model for Q&A
model = AutoModelForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2")

In [18]:
# Tokenize the data
tokenizer.encode(questions[0], truncation=True, padding=True)

[101, 5979, 3551, 2845, 1211, 1510, 1113, 1103, 5812, 2928, 136, 102]

In [20]:
from transformers import pipeline

In [21]:
nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)

In [32]:
nlp({
    'question': questions[1],
     'context': context
})

{'answer': 'the paintings have been exposed to the destructive action of water and temperature changes',
 'end': 981,
 'score': 0.00807664729654789,
 'start': 891}

In [38]:
## Try different models_1
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

tokenizer_1 = AutoTokenizer.from_pretrained("ahotrod/electra_large_discriminator_squad2_512")

model_1 = AutoModelForQuestionAnswering.from_pretrained("ahotrod/electra_large_discriminator_squad2_512")

Downloading tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/475 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

In [39]:
nlp_1 = pipeline('question-answering', model=model_1, tokenizer=tokenizer_1)

In [40]:
nlp_1({
    'question': questions[1],
     'context': context
})

{'answer': 'required climbing steep walls or crawling into narrow passages',
 'end': 851,
 'score': 0.0891437828540802,
 'start': 789}

In [34]:
## Try different models_2
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

tokenizer_2 = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad")

model_2 = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad")

Downloading tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/426k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/249M [00:00<?, ?B/s]

In [35]:
nlp_2 = pipeline('question-answering', model=model_2, tokenizer=tokenizer_2)

In [37]:
nlp_2({
    'question': questions[1],
     'context': context
})

{'answer': 'They did not limit themselves to the easily accessible walls',
 'end': 736,
 'score': 0.18797005712985992,
 'start': 676}