<a href="https://colab.research.google.com/github/Aleena24/Large-Language-Model/blob/main/activity3_fineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Q/A Model

In [1]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
import torch

In [2]:
model_name = "distilbert-base-uncased-distilled-squad"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:

def answer_question(question, context):
    inputs = tokenizer(question, context, truncation=True, padding=True, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    answer_start = torch.argmax(answer_start_scores)
    answer_end = torch.argmax(answer_end_scores) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
    return answer if answer else "No answer found"


In [4]:
context = """
Hugging Face is an artificial intelligence company that specializes in natural language processing (NLP) and transformer models.
They are best known for releasing pre-trained language models like BERT, RoBERTa, and DistilBERT, which have achieved state-of-the-art
results in various NLP tasks. Hugging Face also provides a popular open-source library called Transformers, which allows developers
to easily integrate these models into their applications. Additionally, they offer a range of tools and platforms for building, training,
and deploying AI models, making it easier for developers to leverage the power of artificial intelligence in their projects.
"""

questions = [
    "What is hugging face?",
    "What is transformers?",
    "which pre-trained model are do they release?"
]

In [5]:
for question in questions:
    answer = answer_question(question, context)
    print(f"Question: {question}")
    print(f"Answer: {answer}\n")

Question: What is hugging face?
Answer: an artificial intelligence company

Question: What is transformers?
Answer: open - source library

Question: which pre-trained model are do they release?
Answer: bert, roberta, and distilbert



# Fine-tuning BART for question answering using the Hugging Face Transformers library

In [6]:
pip install transformers datasets



In [7]:
from datasets import load_dataset

# Load the SQuAD dataset
dataset = load_dataset("squad")


Downloading readme:   0%|          | 0.00/7.62k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

# Tokenizing the Data

In [8]:
from transformers import AutoTokenizer

model_name = "facebook/bart-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=1024,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )
    return inputs

tokenized_datasets = dataset.map(preprocess_function, batched=True)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/87599 [00:00<?, ? examples/s]

Map:   0%|          | 0/10570 [00:00<?, ? examples/s]

# Defining the Model and Training

In [None]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Define training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Create a trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
)


pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

# Fine-Tuning the Model

In [None]:
trainer.train()


# Saving the Fine-Tuned Model

In [None]:
model.save_pretrained("./fine-tuned-bart")
tokenizer.save_pretrained("./fine-tuned-bart")


# The Fine-Tuned Model

In [None]:
from transformers import pipeline

# Load the fine-tuned model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("./fine-tuned-bart")
tokenizer = AutoTokenizer.from_pretrained("./fine-tuned-bart")

# Create a question answering pipeline
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

context = """
Hugging Face is an artificial intelligence company that specializes in natural language processing (NLP) and transformer models.
They are best known for releasing pre-trained language models like BERT, RoBERTa, and DistilBERT, which have achieved state-of-the-art
results in various NLP tasks. Hugging Face also provides a popular open-source library called Transformers, which allows developers
to easily integrate these models into their applications. Additionally, they offer a range of tools and platforms for building, training,
and deploying AI models, making it easier for developers to leverage the power of artificial intelligence in their projects.
"""

questions = [
    "What is hugging face?",
    "What is transformers?",
    "Which pre-trained model are do they release?"
]

for question in questions:
    result = qa_pipeline(question=question, context=context)
    print(f"Question: {question}")
    print(f"Answer: {result['answer']}\n")
