In [3]:
# Install transformers if not already installed
!pip install -q transformers torch sentencepiece



In [4]:
from transformers import pipeline

In [5]:
# Models to test
bert_model = "bert-base-uncased"
roberta_model = "roberta-base"
bart_model = "facebook/bart-base"

#  Experiment 1: Text Generation 

In [None]:
prompt = "The future of Artificial Intelligence is"

models = {
    "BERT": bert_model,
    "RoBERTa": roberta_model,
    "BART": bart_model
}

for name, model in models.items():
    print(f"\n--- {name} ---")
    try:
        generator = pipeline("text-generation", model=model)
        output = generator(prompt, max_length=30)
        print(output)
    except Exception as e:
        print("Error:", e)


--- BERT ---


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=256) and `max_length`(=30) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': 'The future of Artificial Intelligence is................................................................................................................................................................................................................................................................'}]

--- RoBERTa ---


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=256) and `max_length`(=30) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': 'The future of Artificial Intelligence is'}]

--- BART ---


config.json: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForCausalLM were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['lm_head.weight', 'model.decoder.embed_tokens.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=256) and `max_length`(=30) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


# Experiment 2: Masked Language Modeling 

In [None]:
# BERT
bert_fill = pipeline("fill-mask", model=bert_model)
print("BERT:", bert_fill("The goal of Generative AI is to [MASK] new content."))

# RoBERTa
roberta_fill = pipeline("fill-mask", model=roberta_model)
print("RoBERTa:", roberta_fill("The goal of Generative AI is to <mask> new content."))

# BART
bart_fill = pipeline("fill-mask", model=bart_model)
print("BART:", bart_fill("The goal of Generative AI is to <mask> new content."))


# Experiment 3: Question Answering

In [None]:
context = "Generative AI poses significant risks such as hallucinations, bias, and deepfakes."
question = "What are the risks?"

for name, model in models.items():
    print(f"\n--- {name} ---")
    try:
        qa_pipeline = pipeline("question-answering", model=model)
        answer = qa_pipeline(question=question, context=context)
        print(answer)
    except Exception as e:
        print("Error:", e)

# Observation Table 

| Task        | Model     | Classification (Success/Failure) | Observation (What actually happened?) | Why did this happen? (Architectural Reason) |
|------------|-----------|----------------------------------|---------------------------------------|---------------------------------------------|
| Generation | BERT      | Failure | Pipeline throws error or cannot generate text. | BERT is an encoder-only model and is not designed for autoregressive token generation. |
| Generation | RoBERTa   | Failure | Similar failure as BERT; text-generation not supported. | RoBERTa is also encoder-only and lacks a decoder to generate next tokens. |
| Generation | BART      | Success | Generated a continuation of the prompt, though somewhat generic. | BART is an encoder-decoder model trained for sequence-to-sequence generation tasks. |
| Fill-Mask  | BERT      | Success | Correctly predicted words like "create" and "generate". | BERT is trained using Masked Language Modeling (MLM). |
| Fill-Mask  | RoBERTa   | Success | Predicted contextually strong words like "generate". | RoBERTa improves upon BERT’s MLM training with more data and no NSP task. |
| Fill-Mask  | BART      | Partial Success | Predicts reasonable tokens but with less confidence than BERT/RoBERTa. | BART supports masking but is optimized more for sequence generation than token filling. |
| QA         | BERT      | Partial Success | Returned a fragment or vague answer. | Base BERT is not fine-tuned for extractive QA (e.g., SQuAD). |
| QA         | RoBERTa   | Partial Success | Answer was slightly better but still inconsistent. | Better pretraining than BERT, but still not QA-fine-tuned. |
| QA         | BART      | Failure | Returned incorrect or empty answer. | BART is generative and not designed for extractive QA without fine-tuning. |

