In [3]:
import torch
from transformers import DistilBertForQuestionAnswering, DistilBertTokenizer

In [4]:
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')



Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model_path = './models/distilbert_qa_finetuned.pt'
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [6]:
model.eval()

DistilBertForQuestionAnswering(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
      

In [7]:
import pandas as pd

In [8]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
eval_data_path = 'datasets/squad_valid.parquet'  
eval_df = pd.read_parquet(eval_data_path)
eval_data = eval_df.to_dict(orient='records')

In [9]:
data = eval_df.iloc[1100]
print(data['context'])
print(data['question'])
print((data['answers']['text']))

The further decline of Byzantine state-of-affairs paved the road to a third attack in 1185, when a large Norman army invaded Dyrrachium, owing to the betrayal of high Byzantine officials. Some time later, Dyrrachium—one of the most important naval bases of the Adriatic—fell again to Byzantine hands.
What was the naval base called?
['Dyrrachium' 'Dyrrachium' 'Dyrrachium']


In [8]:
def evaluate_model(model, tokenizer, eval_data, device):
    em_total = 0
    f1_total = 0
    n = len(eval_data)

    print(eval_data)
    
    for item in eval_data:
        question = item['question']
        context = item['context']
        true_answers = item['answers']['text']  # Assuming true_answers is a list of strings
        
        # Tokenize inputs
        inputs = tokenizer(question, context, return_tensors='pt', truncation=True, padding=True)
        
        # Move inputs to GPU
        inputs = {key: val.to(device) for key, val in inputs.items()}
        
        with torch.no_grad():
            # Move model to GPU
            model = model.to(device)
            # Forward pass
            outputs = model(**inputs)
        
        start_scores = outputs.start_logits
        end_scores = outputs.end_logits
        
        # Move logits to CPU for decoding
        start_scores = start_scores.cpu()
        end_scores = end_scores.cpu()
        
        start_idx = torch.argmax(start_scores)
        end_idx = torch.argmax(end_scores)
        
        predicted_answer = tokenizer.decode(inputs['input_ids'][0][start_idx:end_idx+1], skip_special_tokens=True)
        
        # Initialize flags for EM and F1 calculation for each QA pair
        em_matched = False
        f1_matched = False
        
        # Check predicted answer against each true answer
        for true_answer in true_answers:
            true_answer = true_answer.lower()  # Convert true answer to lowercase
            
            # Calculate Exact Match (EM)
            if not em_matched:
                em = 1 if predicted_answer.strip().lower() == true_answer.strip().lower() else 0
                if em == 1:
                    em_matched = True
                    em_total += 1  # Count as 1 if any true answer matches
            
            # Calculate F1 Score
            predicted_tokens = set(predicted_answer.lower().split())  # Convert predicted answer tokens to lowercase set
            true_tokens = set(true_answer.lower().split())  # Convert true answer tokens to lowercase set
            
            if len(predicted_tokens) == 0 or len(true_tokens) == 0:
                continue  # Skip empty predictions or true answers
            
            common_tokens = predicted_tokens.intersection(true_tokens)
            
            if len(common_tokens) == 0:
                f1 = 0
            else:
                precision = len(common_tokens) / len(predicted_tokens)
                recall = len(common_tokens) / len(true_tokens)
                if precision + recall == 0:
                    f1 = 0
                else:
                    f1 = 2 * (precision * recall) / (precision + recall)
                f1_total += f1
                if f1 > 0:
                    f1_matched = True
        
        # If no match found for EM, count as 0
        if not em_matched:
            em_total += 0
        
        # If no match found for F1, count as 0
        if not f1_matched:
            f1_total += 0
    
    em_score = em_total / n
    f1_score = f1_total / n
    
    return em_score, f1_score


In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
em_score, f1_score = evaluate_model(model, tokenizer, eval_data, device)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
B

In [11]:
print(f'Exact Match (EM): {em_score:.2f}')
print(f'F1 Score: {f1_score:.2f}')

Exact Match (EM): 0.62
F1 Score: 2.17


In [12]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

model_name = "distilbert-base-uncased-distilled-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

In [13]:
def predicted_answer(model, tokenizer, context, question, device):
    inputs = tokenizer(question, context, return_tensors='pt', truncation=True, padding=True, return_offsets_mapping=True)
    offset_mapping = inputs.pop('offset_mapping').cpu().numpy()[0]
    with torch.no_grad():
        outputs = model(**inputs)
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits
    start_idx = torch.argmax(start_scores)
    end_idx = torch.argmax(end_scores)
    actual_start_idx = offset_mapping[start_idx][0]
    actual_end_idx = offset_mapping[end_idx][1]
    
    
    predicted_answer = tokenizer.decode(inputs['input_ids'][0][start_idx:end_idx+1], skip_special_tokens=True)

    return predicted_answer, actual_start_idx, actual_end_idx

In [14]:
context = """
Marshall Bruce Mathers III was born on October 17, 1972, in St. Joseph, Missouri, the only child of Marshall Bruce Mathers Jr. and Deborah Rae "Debbie" (née Nelson).[12][13] His mother nearly died during her 73-hour labor with him.[14] Eminem's parents were in a band called Daddy Warbucks, playing in Ramada Inns along the Dakotas–Montana border before they separated. His father abandoned his family when Eminem was a year and a half old, and Eminem was raised only by his mother, Debbie, in poverty.[12] His mother later had a son named Nathan "Nate" Kane Samara.[15] By age twelve, Eminem and his mother had moved several times and lived in several towns and cities in Missouri (including St. Joseph, Savannah, and Kansas City) before settling in Warren, Michigan, a suburb of Detroit.[16] Eminem frequently fought with his mother, whom a social worker described as having a "very suspicious, almost paranoid personality".[17] He wrote letters to his father, but Debbie said that they all came back marked "return to sender".[18]
When he was a child, a bully named D'Angelo Bailey severely injured Eminem's head in an assault,[19] an incident which Eminem later recounted (with comic exaggeration) on the song "Brain Damage". Debbie filed a lawsuit against the public school for this in 1982. The suit was dismissed the following year by a Macomb County, Michigan, judge, who said the schools were immune from lawsuits.[14] For much of his youth, Eminem and his mother lived in a working-class, primarily black, Detroit neighborhood. He and Debbie were one of three white households on their block, and Eminem was beaten several times by black youths.[18]
Eminem was interested in storytelling, aspiring to be a comic book artist before discovering hip hop.[20] He heard his first rap song ("Reckless", featuring Ice-T) on the Breakin' soundtrack, a gift from Debbie's half-brother, Ronnie Polkingharn. His uncle was close to the boy and later became a musical mentor to him. When Polkingharn committed suicide in 1991, Eminem stopped speaking publicly for days and did not attend his funeral.[18][21]
At age 14, Eminem began rapping with high-school friend Mike Ruby; they adopted the names "Manix" and "M&M", the latter evolving into "Eminem".[21][14] Eminem snuck into neighboring Osborn High School with friend and fellow rapper Proof for lunchroom freestyle rap battles.[22] On Saturdays, they attended open mic contests at the Hip-Hop Shop on West 7 Mile Road, considered "ground zero" for the Detroit rap scene.[18] Struggling to succeed in a predominantly black industry, Eminem was appreciated by underground hip hop audiences.[21][23][24] When he wrote verses, he wanted most of the words to rhyme; he wrote long words or phrases on paper and, underneath, worked on rhymes for each syllable. Although the words often made little sense, the drill helped Eminem practice sounds and rhymes.[25]
In 1987, Debbie allowed runaway Kimberly Anne "Kim" Scott to stay at their home. Several years later, Eminem began an on-and-off relationship with Scott.[14] After spending three years in ninth grade due to truancy and poor grades,[26] he dropped out of Lincoln High School at age 17. Although interested in English, Eminem never explored literature (preferring comic books) and he disliked math and social studies.[25] Eminem worked at several jobs to help his mother pay the bills. One of the jobs he had was with Little Caesar's Pizza in Warren.[27] He later said she often threw him out of the house anyway, often after taking most of his paycheck. When she left to play bingo, he would blast the stereo and write songs.[18]
"""

question = "Who injured eminem in his childhood"
model.to('cpu')
answer, start,end = predicted_answer(model, tokenizer, context, question, '')

In [15]:
print(answer)

d'angelo bailey
