C. Question Answering on SQUAD dataset using transformers 

In [15]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import DistilBertForQuestionAnswering, DistilBertTokenizer, Trainer, TrainingArguments

# Step 1: Load the dataset from CSV using pandas
train_df = pd.read_csv('/kaggle/input/squad-20-csv-file/squad_csv/train-squad.csv')
validation_df = pd.read_csv('/kaggle/input/squad-20-csv-file/squad_csv/validation-squad.csv')

# Convert pandas DataFrames to Huggingface Datasets
train_dataset = Dataset.from_pandas(train_df)
validation_dataset = Dataset.from_pandas(validation_df)

# Step 2: Initialize the pre-trained model and tokenizer
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Check if a GPU is available and move the model to GPU if it is
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Step 3: Preprocessing function
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    contexts = examples["context"]

    # Tokenizing questions and context
    inputs = tokenizer(
        questions, contexts,
        max_length=384,
        truncation="only_second",
        padding="max_length"
    )

    # Using the "answer_start" and "text" from the dataset to create labels
    start_positions = examples["answer_start"]
    end_positions = []

    for start, answer in zip(examples["answer_start"], examples["text"]):
        if answer is not None:
            end_positions.append(start + len(answer))
        else:
            end_positions.append(start)  # Default to start if answer is None

    # Add start and end positions to the inputs
    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions

    return inputs

# Step 4: Preprocess the dataset
train_dataset = train_dataset.map(preprocess_function, batched=True)
validation_dataset = validation_dataset.map(preprocess_function, batched=True)

# Step 5: Training Arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01
)

# Step 6: Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset
)

# Step 7: Fine-tune the model
trainer.train()

# Step 8: Inference (Sample Input)
question = "Who developed BERT?"
context = "BERT was developed by researchers at Google."

# Tokenize the input question and context
inputs = tokenizer(question, context, return_tensors='pt').to(device)  # Move inputs to the same device as the model

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get start and end logits
start_scores = outputs.start_logits
end_scores = outputs.end_logits

# Find the start and end positions of the answer
answer_start = torch.argmax(start_scores)
answer_end = torch.argmax(end_scores) + 1

# Convert tokens back to the answer string
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))

# Display the result
print("Question:", question)
print("Answer:", answer)


Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/86821 [00:00<?, ? examples/s]

Map:   0%|          | 0/10388 [00:00<?, ? examples/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss
1,5.3038,
2,5.0609,


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(devic

Question: Who developed BERT?
Answer: [CLS] who developed bert ? [SEP] bert was developed by researchers at google . [SEP]
