In [46]:
!pip install transformers datasets



In [47]:
import numpy as np
import pandas as pd
import json
from transformers import pipeline
from datasets import Dataset


In [48]:
import numbers # Used to check for any numeric type (int, float, etc.)

def scores(prediction, ground_truth):
    """
    Calculates exact match, precision, recall, and F1 score for either
    numeric (float, int) or string inputs.
    """
    if isinstance(ground_truth, numbers.Number) and isinstance(prediction, numbers.Number):
        if prediction == ground_truth:
            exact_match = 1.0
            precision = 1.0
            recall = 1.0
            f1_score = 1.0
        else:
            exact_match = 0.0
            precision = 0.0
            recall = 0.0
            f1_score = 0.0

        return {
            "exact_match": exact_match, 
            "precision": precision, 
            "recall": recall, 
            "f1_score": f1_score
        }

    elif isinstance(ground_truth, str) and isinstance(prediction, str):
        exact_match = 1.0 if ground_truth.strip() == prediction.strip() else 0.0

        truth_tokens = set(ground_truth.strip().split())
        pred_tokens = set(prediction.strip().split())

       
        if not truth_tokens or not pred_tokens:
            
            if not truth_tokens and not pred_tokens:
                 return {"exact_match": 1.0, "precision": 1.0, "recall": 1.0, "f1_score": 1.0}
           
            return {"exact_match": 0.0, "precision": 0.0, "recall": 0.0, "f1_score": 0.0}

        common_tokens = pred_tokens.intersection(truth_tokens)

        
        precision = len(common_tokens) / len(pred_tokens)

        
        recall = len(common_tokens) / len(truth_tokens)


        if precision + recall == 0:
            f1_score = 0.0
        else:
            f1_score = (2 * precision * recall) / (precision + recall)

        return {
            "exact_match": exact_match, 
            "precision": precision, 
            "recall": recall, 
            "f1_score": f1_score
        }
        
    else:
        # Handle type mismatch
        return {"exact_match": 0.0, "precision": 0.0, "recall": 0.0, "f1_score": 0.0}

In [49]:
def json_extraction(files):
    dataset = []
    for file in files:
        with open(file, 'r') as File:
            data = dict(json.load(File))
        data = data['data']
        for doc in data:
            j = doc['paragraphs']
            for n in range(len(j)):
                for sample in (j[n]['qas']):
                    dataset.append([j[n]['context'], sample['question'], sample['answers'][0]['text']])

    dataset = pd.DataFrame(columns=["context", "question", "answer"], data=dataset)
    return dataset

In [50]:
data1 = json_extraction(['/kaggle/input/stanford-question-answering-dataset/dev-v1.1.json', '/kaggle/input/stanford-question-answering-dataset/train-v1.1.json'])

In [51]:
data2 = pd.read_csv("/kaggle/input/squad-v11/SQuAD-v1.1.csv")

In [52]:
data2 = data2.drop(columns=['title', 'answer_start', 'answer_end'])

In [53]:
full_data = pd.concat([data1, data2], axis=0, ignore_index=True)
full_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185768 entries, 0 to 185767
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   context   185768 non-null  object
 1   question  185768 non-null  object
 2   answer    185765 non-null  object
dtypes: object(3)
memory usage: 4.3+ MB


In [54]:
batched_data=np.array(full_data)

In [55]:
print(batched_data.shape)

(185768, 3)


In [56]:
shuffled_data=batched_data[np.random.choice(len(batched_data), size=len(full_data), replace=False)]

In [57]:
dataset = Dataset.from_dict({"question":list(shuffled_data[:,1]),"context":list(shuffled_data[:,0])})

In [58]:
print(dataset)

Dataset({
    features: ['question', 'context'],
    num_rows: 185768
})


In [59]:
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

Device set to use cuda:0


In [60]:
results=(list(qa_pipeline(dataset,batch_size=64)))



In [61]:
print(len(results))
print(results[0])
print(shuffled_data[0])

185768
{'score': 0.5454137325286865, 'start': 38, 'end': 45, 'answer': 'Namaqua'}
["From 1904 to 1907, the Herero and the Namaqua took up arms against the Germans and in calculated punitive action by the German occupiers, the 'first genocide of the Twentieth Century' was committed. In the Herero and Namaqua genocide, 10,000 Nama (half the population) and approximately 65,000 Hereros (about 80% of the population) were systematically murdered. The survivors, when finally released from detention, were subjected to a policy of dispossession, deportation, forced labour, racial segregation and discrimination in a system that in many ways anticipated apartheid."
 'Herero and what other group took action against German occupiers?'
 'Namaqua']


In [62]:
final_result = {"exact_match": 0, "precision": 0, "recall": 0, "f1_score": 0}

In [63]:
for i in range(len(results)):
    score=scores(results[i]['answer'],shuffled_data[i,2])
    final_result["exact_match"]+=score['exact_match']
    final_result["precision"]+=score['precision']
    final_result["recall"]+=score['recall']
    final_result["f1_score"]+=score['f1_score']

In [64]:
print(f"""Final Results on dataset of {len(full_data)} samples 
Count of samples predication exact match the ground truth = {final_result["exact_match"]}
precision = {final_result["precision"]/len(full_data)} 
recall = {final_result["recall"]/len(full_data)}
F1 Score ={final_result["f1_score"]/len(full_data)} """)

Final Results on dataset of 185768 samples 
Count of samples predication exact match the ground truth = 135833.0
precision = 0.9171602729442243 
recall = 0.8858116150470757
F1 Score =0.8818370763011365 


In [65]:
print(results[456])
print(shuffled_data[456])

{'score': 0.9939223527908325, 'start': 336, 'end': 340, 'answer': '1791'}
['Napoleon, expecting to win the war, delayed too long and lost this opportunity; by December the Allies had withdrawn the offer. When his back was to the wall in 1814 he tried to reopen peace negotiations on the basis of accepting the Frankfurt proposals. The Allies now had new, harsher terms that included the retreat of France to its 1791 boundaries, which meant the loss of Belgium. Napoleon would remain Emperor, however he rejected the term. The British wanted Napoleon permanently removed; they prevailed. Napoleon adamantly refused.'
 'The new Allied demands for peace insisted France return to its boundaries of what year?'
 '1791']
