# Aim: 
Comparative Analysis of LLM Architectures – BERT, GPT, BART

Tasks:

· Load pretrained BERT, GPT-2, and BART using Hugging Face.

· Fine-tune:

· BERT for sentiment classification.

· GPT-2 for text generation.

· BART for text summarization or question-answering.

In [None]:
!pip install evaluate datasets numpy transformers torch accelerate


In [None]:
import os
from dataclasses import dataclass, field
from typing import Optional
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainer,
    pipeline,
    Seq2SeqTrainingArguments
)

PROJECT_DIR = os.getcwd()
DEVICE = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES", None) is not None else "cpu"

print("Device hint:", DEVICE)

@dataclass
class Config:
    bert_model: str = "bert-base-uncased"
    gpt2_model: str = "gpt2"
    bart_model: str = "facebook/bart-base"

    bert_dataset: str = "imdb"  
    gpt2_dataset: str = "wikitext"
    gpt2_dataset_config: str = "wikitext-2-raw-v1"
    bart_dataset: str = "cnn_dailymail"
    bart_dataset_config: str = "3.0.0"
    output_dir: str = "./outputs"
    per_device_train_batch_size: int = 8
    per_device_eval_batch_size: int = 8
    num_train_epochs: int = 1
    learning_rate: float = 5e-5

cfg = Config()

Device hint: cpu


In [None]:
def prepare_bert():
    ds = load_dataset(cfg.bert_dataset)
    tokenizer = AutoTokenizer.from_pretrained(cfg.bert_model)

    def tokenize_fn(example):
        return tokenizer(example["text"], truncation=True, padding=False)

    ds = ds.map(tokenize_fn, batched=True, remove_columns=["text"])

    num_labels = len(set(ds["train"]["label"]))
    model = AutoModelForSequenceClassification.from_pretrained(cfg.bert_model, num_labels=num_labels)

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    import evaluate
    accuracy = evaluate.load("accuracy")
    f1 = evaluate.load("f1")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        preds = np.argmax(logits, axis=-1)
        return {
            "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
            "f1": f1.compute(predictions=preds, references=labels, average="binary")["f1"],
        }

    training_args = TrainingArguments(
        output_dir=os.path.join(cfg.output_dir, "bert-sentiment"),
        per_device_train_batch_size=cfg.per_device_train_batch_size,
        per_device_eval_batch_size=cfg.per_device_eval_batch_size,
        num_train_epochs=cfg.num_train_epochs,
        learning_rate=cfg.learning_rate,
        save_total_limit=1,
        logging_steps=100,
        seed=42,
        do_eval=True,  
        report_to=[]  
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds["train"].shuffle().select(range(2000)),
        eval_dataset=ds["test"].shuffle().select(range(1000)),
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    return trainer, tokenizer

In [None]:
def prepare_gpt2():
    ds = load_dataset(cfg.gpt2_dataset, cfg.gpt2_dataset_config)
    tokenizer = AutoTokenizer.from_pretrained(cfg.gpt2_model)

    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({"pad_token": "<|pad|>"})

    model = AutoModelForCausalLM.from_pretrained(cfg.gpt2_model)
    model.resize_token_embeddings(len(tokenizer))

    block_size = 128
    from itertools import chain

    small_train = ds["train"].select(range(2000)) if "train" in ds else ds["validation"].select(range(2000))
    tokenized = small_train.map(lambda ex: tokenizer(ex["text"], truncation=True, padding=False), batched=True)

    all_input_ids = list(chain.from_iterable(tokenized["input_ids"]))
    examples = {"input_ids": [], "attention_mask": []}
    for i in range(0, len(all_input_ids) - block_size + 1, block_size):
        chunk = all_input_ids[i : i + block_size]
        examples["input_ids"].append(chunk)
        examples["attention_mask"].append([1] * len(chunk))

    import datasets
    token_ds = datasets.Dataset.from_dict(examples)

    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    training_args = TrainingArguments(
        output_dir=os.path.join(cfg.output_dir, "gpt2-lm"),
        per_device_train_batch_size=cfg.per_device_train_batch_size,
        num_train_epochs=cfg.num_train_epochs,
        logging_steps=100,
        save_total_limit=1,
        learning_rate=cfg.learning_rate,
        fp16=False,
        report_to=[],  
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=token_ds.shuffle(seed=42).select(range(min(2000, len(token_ds)))),
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    return trainer, tokenizer


In [None]:
def prepare_bart():
    ds = load_dataset(cfg.bart_dataset, cfg.bart_dataset_config)
    tokenizer = AutoTokenizer.from_pretrained(cfg.bart_model)
    model = AutoModelForSeq2SeqLM.from_pretrained(cfg.bart_model)

    max_input_length = 512
    max_target_length = 128

    def preprocess_function(examples):
        inputs = examples["article"] if "article" in examples else examples["document"]
        model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)
        with tokenizer.as_target_tokenizer():
            labels = tokenizer(examples["highlights"], max_length=max_target_length, truncation=True)
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    small_train = ds["train"].select(range(2000))
    small_val = ds["validation"].select(range(500))

    tokenized_train = small_train.map(preprocess_function, batched=True, remove_columns=small_train.column_names)
    tokenized_val = small_val.map(preprocess_function, batched=True, remove_columns=small_val.column_names)

    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

    training_args = Seq2SeqTrainingArguments(
        output_dir=os.path.join(cfg.output_dir, "bart-sum"),
        per_device_train_batch_size=cfg.per_device_train_batch_size,
        num_train_epochs=cfg.num_train_epochs,
        logging_steps=100,
        save_total_limit=1,
        learning_rate=cfg.learning_rate,
        fp16=False,
        report_to=[],  
    )

    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    return trainer, tokenizer


    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    return trainer, tokenizer


BERT for sentiment classification

In [None]:
print('\n--- Preparing BERT for sentiment classification ---')
bert_trainer, bert_tokenizer = prepare_bert()
print('Training BERT (this will run a short demo training)...')
bert_trainer.train()
print('Evaluating BERT...')
bert_metrics = bert_trainer.evaluate()
print('BERT metrics:', bert_metrics)

In [22]:
from transformers import pipeline

sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model=bert_trainer.model,
    tokenizer=bert_tokenizer
)

texts = [
    "I absolutely loved the movie, it was fantastic!",
    "This is the worst service I have ever experienced."
]

results = sentiment_pipeline(texts)
for text, result in zip(texts, results):
    print(f"Text: {text}")
    print(f"Predicted Sentiment: {result['label']} (Score: {result['score']:.4f})")

Device set to use cuda:0


Text: I absolutely loved the movie, it was fantastic!
Predicted Sentiment: LABEL_1 (Score: 0.9910)
Text: This is the worst service I have ever experienced.
Predicted Sentiment: LABEL_0 (Score: 0.9861)


GPT-2 for text generation

In [None]:
print('\n--- Preparing GPT-2 for LM fine-tuning ---')
gpt2_trainer, gpt2_tokenizer = prepare_gpt2()
print('Training GPT-2 (short demo)...')
gpt2_trainer.train()
gpt2_model = gpt2_trainer.model
gpt2_tokenizer = gpt2_tokenizer
gpt2_pipe = pipeline('text-generation', model=gpt2_model, tokenizer=gpt2_tokenizer)
prompts = [
        "In the year 2050,",
        "The experiment showed that",
    ]
print('\nGPT-2 generation examples:')
for p in prompts:
    out = gpt2_pipe(p, max_length=80, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=1)
    print('-', p, '->', out[0]['generated_text'])




--- Preparing GPT-2 for LM fine-tuning ---


  trainer = Trainer(


Training GPT-2 (short demo)...


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
100,3.7383


Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=256) and `max_length`(=80) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



GPT-2 generation examples:


Both `max_new_tokens` (=256) and `max_length`(=80) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


- In the year 2050, -> In the year 2050, the global total number of human beings living on Earth has increased by a quarter. In other words, the number of species living on Earth has increased by more than a third since the early 1900s. The number of species living on Earth is expected to increase to about 4 billion by the end of the century, with the remainder of the population on the planet and the remaining animals and plants on Earth under control. As a result, many of the species on Earth are more vulnerable than those that are on the planet, and many of the species that are on the planet are less able to withstand the effects of climate change. 
 The human population is projected to increase to about 13 billion by the end of the century, and to double to 14 billion by the end of the century, with the remaining remaining species on the planet under control. A total population of about 800 billion is expected to be in the range of about 1 billion individuals, and the rate of human 

BART for text summarization or question-answering

In [None]:
print('\n--- Preparing BART for summarization ---')
bart_trainer, bart_tokenizer = prepare_bart()
print('Training BART (short demo)...')
bart_trainer.train()
bart_model = bart_trainer.model
bart_pipe = pipeline('summarization', model=bart_model, tokenizer=bart_tokenizer)
example_article = (
        "The stock market experienced a turbulent day as investors digested the latest earnings reports. "
        "Several major tech firms reported mixed results, leading to increased volatility. Analysts said that "
        "the market reaction was in part due to uncertainty around consumer spending and supply chain concerns."
    )
print('\nBART summarization example:')
print(bart_pipe(example_article, max_length=60, min_length=20, do_sample=False)[0]['summary_text'])


--- Preparing BART for summarization ---


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

  trainer = Seq2SeqTrainer(


Training BART (short demo)...


Step,Training Loss
100,2.7403
200,2.4144


Device set to use cuda:0
Your max_length is set to 60, but your input_length is only 52. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=26)
Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



BART summarization example:
NEW: Market reaction was in part due to uncertainty around consumer spending and supply chain concerns .
The stock market experienced a turbulent day as investors digested earnings reports .


#Summary

BERT is a model that’s great at understanding text. It looks at a sentence in both directions — left and right — so it really gets the meaning of each word in context. Because of this, it’s perfect for tasks where you need to put text into categories, like figuring out if a review is positive or negative (sentiment analysis), spotting names and places in a paragraph (NER), or working out what someone wants from a message (intent classification).

GPT-2 is more of a text generator. It writes by guessing the next word over and over, based on what’s already written. This makes it good for open-ended things like writing stories, continuing conversations, or creating any text that doesn’t have one “correct” answer. If you train it on a certain style or topic, it can adapt its writing to match.

BART is like a middle ground between the two. It can read some text, understand it, and then rewrite it in a new form. That makes it handy for things like shortening an article into a summary, answering questions using given information, or translating between languages. It’s strong at tasks where you start with some text and want a different version of it as the output.