In [None]:
# Install packages
!pip install -q transformers datasets torch accelerate evaluate
!pip install -q gradio bertviz wordcloud matplotlib seaborn pandas peft

In [None]:
# Imports
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

from transformers import (
    AutoTokenizer, AutoModelForQuestionAnswering, AutoModelForCausalLM,
    TrainingArguments, Trainer, DefaultDataCollator, pipeline,
    DataCollatorForLanguageModeling
)
from datasets import load_dataset
import evaluate
from wordcloud import WordCloud
import gradio as gr

# Reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# 1. Data Preparation (15%)

# Load SQuAD 2.0
print("Loading SQuAD 2.0...")
squad = load_dataset("squad_v2")

print(f"\nDataset structure:")
print(squad)
print(f"Training samples: {len(squad['train'])}")
print(f"Validation samples: {len(squad['validation'])}")

In [None]:
# Print 5 sample questions
print("="*80)
print("SAMPLE QUESTIONS FROM SQUAD 2.0")
print("="*80)

for i in range(5):
    sample = squad['train'][i]
    print(f"\n--- EXAMPLE {i+1} ---")
    print(f"Question: {sample['question']}")
    print(f"Context (first 200 chars): {sample['context'][:200]}...")

    if len(sample['answers']['text']) > 0:
        print(f"Answer: {sample['answers']['text'][0]}")
        print(f"Answer Start: {sample['answers']['answer_start'][0]}")
    else:
        print("Answer: [UNANSWERABLE]")
    print("-"*80)

In [None]:
# Statistics
def count_answerable(dataset):
    answerable = sum(1 for ex in dataset if len(ex['answers']['text']) > 0)
    return answerable, len(dataset) - answerable

train_ans, train_unans = count_answerable(squad['train'])
val_ans, val_unans = count_answerable(squad['validation'])

print("\nDataset Statistics:")
print("="*50)
print(f"Training Set:")
print(f"  Answerable: {train_ans} ({train_ans/len(squad['train'])*100:.1f}%)")
print(f"  Unanswerable: {train_unans} ({train_unans/len(squad['train'])*100:.1f}%)")
print(f"\nValidation Set:")
print(f"  Answerable: {val_ans} ({val_ans/len(squad['validation'])*100:.1f}%)")
print(f"  Unanswerable: {val_unans} ({val_unans/len(squad['validation'])*100:.1f}%)")

# Lengths
#passage_lengths = [len(ex['context'].split()) for ex in squad['train'][:5000]]
passage_lengths = [len(context.split()) for context in squad['train']['context'][:5000]]
#question_lengths = [len(ex['question'].split()) for ex in squad['train'][:5000]]
question_lengths = [len(question.split()) for question in squad['train']['question'][:5000]]

print(f"\nText Length Statistics:")
print(f"  Avg passage length: {np.mean(passage_lengths):.1f} words")
print(f"  Avg question length: {np.mean(question_lengths):.1f} words")

In [None]:
# Visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Passage length histogram
axes[0,0].hist(passage_lengths, bins=50, color='skyblue', edgecolor='black', alpha=0.7)
axes[0,0].axvline(np.mean(passage_lengths), color='red', linestyle='--',
                 label=f'Mean: {np.mean(passage_lengths):.1f}')
axes[0,0].set_xlabel('Passage Length (words)')
axes[0,0].set_ylabel('Frequency')
axes[0,0].set_title('Distribution of Passage Lengths')
axes[0,0].legend()
axes[0,0].grid(alpha=0.3)

# Question length histogram
axes[0,1].hist(question_lengths, bins=30, color='lightcoral', edgecolor='black', alpha=0.7)
axes[0,1].axvline(np.mean(question_lengths), color='red', linestyle='--',
                 label=f'Mean: {np.mean(question_lengths):.1f}')
axes[0,1].set_xlabel('Question Length (words)')
axes[0,1].set_ylabel('Frequency')
axes[0,1].set_title('Distribution of Question Lengths')
axes[0,1].legend()
axes[0,1].grid(alpha=0.3)

# Answerable vs Unanswerable bars
categories = ['Answerable', 'Unanswerable']
train_counts = [train_ans, train_unans]
axes[1,0].bar(categories, train_counts, color=['#4CAF50', '#FF5252'], alpha=0.7)
axes[1,0].set_ylabel('Count')
axes[1,0].set_title('Training Set Distribution')
for i, v in enumerate(train_counts):
    axes[1,0].text(i, v + 1000, str(v), ha='center', va='bottom', fontweight='bold')

val_counts = [val_ans, val_unans]
axes[1,1].bar(categories, val_counts, color=['#4CAF50', '#FF5252'], alpha=0.7)
axes[1,1].set_ylabel('Count')
axes[1,1].set_title('Validation Set Distribution')
for i, v in enumerate(val_counts):
    axes[1,1].text(i, v + 200, str(v), ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.savefig('data_exploration.png', dpi=300, bbox_inches='tight')
plt.show()
print("Saved: data_exploration.png")

In [None]:
# Word cloud
#all_questions = ' '.join([ex['question'] for ex in squad['train'][:5000]])
all_questions = ' '.join(squad['train']['question'][:5000])

plt.figure(figsize=(12, 6))
wordcloud = WordCloud(width=1200, height=600, background_color='white',
                     colormap='viridis', max_words=100).generate(all_questions)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Question Word Cloud', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('question_wordcloud.png', dpi=300, bbox_inches='tight')
plt.show()
print("Saved: question_wordcloud.png")

In [None]:
# Subsample for efficiency (OPTIMIZED FOR FAST EXECUTION)
# Note: Using small sample sizes for quick testing
# For better accuracy, increase to TRAIN_SAMPLES=10000, VAL_SAMPLES=2000
TRAIN_SAMPLES = 500   # Reduced for faster training (~5-10 min per epoch)
VAL_SAMPLES = 300     # Reduced for faster evaluation

train_dataset = squad['train'].shuffle(seed=SEED).select(range(TRAIN_SAMPLES))
val_dataset = squad['validation'].shuffle(seed=SEED).select(range(VAL_SAMPLES))

print(f"Using {len(train_dataset)} training samples (FAST MODE)")
print(f"Using {len(val_dataset)} validation samples (FAST MODE)")
print(f"\nNote: This will train quickly but with lower accuracy.")
print(f"For production: increase TRAIN_SAMPLES to 10000+")

In [None]:
# Initialize tokenizer
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

print(f"Loaded tokenizer: {model_checkpoint}")
print(f"Vocabulary size: {tokenizer.vocab_size}")

In [None]:
# Preprocessing function
max_length = 384
doc_stride = 128

def preprocess_training_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]

        if len(answer["answer_start"]) == 0:
            start_positions.append(0)
            end_positions.append(0)
        else:
            start_char = answer["answer_start"][0]
            end_char = start_char + len(answer["text"][0])
            sequence_ids = inputs.sequence_ids(i)

            # Find context bounds
            idx = 0
            while sequence_ids[idx] != 1:
                idx += 1
            context_start = idx
            while sequence_ids[idx] == 1:
                idx += 1
            context_end = idx - 1

            if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
                start_positions.append(0)
                end_positions.append(0)
            else:
                idx = context_start
                while idx <= context_end and offset[idx][0] <= start_char:
                    idx += 1
                start_positions.append(idx - 1)

                idx = context_end
                while idx >= context_start and offset[idx][1] >= end_char:
                    idx -= 1
                end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

print("Tokenizing training data...")
train_dataset_tokenized = train_dataset.map(
    preprocess_training_examples,
    batched=True,
    remove_columns=train_dataset.column_names
)

print(f"Tokenized {len(train_dataset_tokenized)} examples")

In [None]:
#2. Extractive QA with Transformers (25%)

# Load model
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
model.to(device)

print(f"Model: {model_checkpoint}")
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# Training configuration
training_args = TrainingArguments(
    output_dir="./results_qa",
    eval_strategy="no",  # Changed from "epoch" since we don't have eval_dataset
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=False,  # Changed from True since we need eval_dataset for this
    logging_steps=100,
    seed=SEED,
)

data_collator = DefaultDataCollator()

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_tokenized,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

print("Training configuration:")
print(f"  Learning rate: {training_args.learning_rate}")
print(f"  Batch size: {training_args.per_device_train_batch_size}")
print(f"  Epochs: {training_args.num_train_epochs}")

In [None]:
# Train
print("Starting training...\n")
train_result = trainer.train()

print("\nTraining completed!")
print(f"Time: {train_result.metrics['train_runtime']:.2f}s")
print(f"Loss: {train_result.metrics['train_loss']:.4f}")

trainer.save_model("./qa_model")
print("Model saved to './qa_model'")

In [None]:
# Evaluation functions
def normalize_answer(s):
    import re, string
    s = s.lower()
    s = re.sub(r'\b(a|an|the)\b', ' ', s)
    s = ''.join(ch for ch in s if ch not in string.punctuation)
    return ' '.join(s.split())

def compute_exact_match(pred, truth):
    return int(normalize_answer(pred) == normalize_answer(truth))

def compute_f1(pred, truth):
    pred_tokens = normalize_answer(pred).split()
    truth_tokens = normalize_answer(truth).split()

    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)

    common = set(pred_tokens) & set(truth_tokens)
    if len(common) == 0:
        return 0

    prec = len(common) / len(pred_tokens)
    rec = len(common) / len(truth_tokens)
    return 2 * (prec * rec) / (prec + rec)

print("Evaluation functions defined")

In [None]:
# Create QA pipeline and evaluate
qa_pipeline = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

exact_matches = []
f1_scores = []

print("Evaluating on validation set...")
for example in val_dataset:
    prediction = qa_pipeline(question=example['question'], context=example['context'])['answer']

    if len(example['answers']['text']) > 0:
        ground_truths = example['answers']['text']
    else:
        ground_truths = [""]

    em = max([compute_exact_match(prediction, gt) for gt in ground_truths])
    f1 = max([compute_f1(prediction, gt) for gt in ground_truths])

    exact_matches.append(em)
    f1_scores.append(f1)

em_score = np.mean(exact_matches) * 100
f1_score = np.mean(f1_scores) * 100

print("\n" + "="*50)
print("EXTRACTIVE QA EVALUATION RESULTS")
print("="*50)
print(f"Exact Match (EM): {em_score:.2f}%")
print(f"F1 Score: {f1_score:.2f}%")
print("="*50)

In [None]:
# Show sample predictions
print("\n" + "="*80)
print("SAMPLE PREDICTIONS")
print("="*80)

for i in range(5):
    example = val_dataset[i]
    prediction = qa_pipeline(question=example['question'], context=example['context'])

    print(f"\n--- EXAMPLE {i+1} ---")
    print(f"Question: {example['question']}")
    print(f"Context (first 150 chars): {example['context'][:150]}...")

    if len(example['answers']['text']) > 0:
        print(f"Ground Truth: {example['answers']['text'][0]}")
    else:
        print(f"Ground Truth: [UNANSWERABLE]")

    print(f"Prediction: {prediction['answer']}")
    print(f"Confidence: {prediction['score']:.2%}")
    print("-" * 80)

In [None]:
# 3. Response Generation with Transformers (25%)

# Prepare generation data (OPTIMIZED FOR FAST EXECUTION)
def create_generation_data(dataset, num_samples=400):  # Reduced from 5000
    generation_data = []

    for example in dataset:
        if len(example['answers']['text']) == 0:
            continue

        question = example['question']
        context = example['context'][:300]
        answer = example['answers']['text'][0]

        prompt = f"Question: {question}\nContext: {context}\nAnswer: {answer}\n\nExplanation:"
        response = f"The answer is '{answer}' based on the given context."

        full_text = prompt + " " + response + tokenizer_gen.eos_token
        generation_data.append(full_text)

        if len(generation_data) >= num_samples:
            break

    return generation_data

model_checkpoint_gen = "distilgpt2"
tokenizer_gen = AutoTokenizer.from_pretrained(model_checkpoint_gen)
tokenizer_gen.pad_token = tokenizer_gen.eos_token

print(f"Loaded tokenizer: {model_checkpoint_gen}")

train_texts = create_generation_data(train_dataset, num_samples=400)
print(f"Created {len(train_texts)} training examples (FAST MODE)")
print(f"Note: Reduced from 5000 for faster training")

In [None]:
# Tokenize
from datasets import Dataset

def tokenize_function(examples):
    return tokenizer_gen(examples['text'], truncation=True, max_length=512, padding='max_length')

train_gen_dataset = Dataset.from_dict({'text': train_texts})
train_gen_tokenized = train_gen_dataset.map(tokenize_function, batched=True, remove_columns=['text'])

print(f"Tokenized {len(train_gen_tokenized)} examples")

In [None]:
# Load and train GPT-2
model_gen = AutoModelForCausalLM.from_pretrained(model_checkpoint_gen)
model_gen.to(device)

training_args_gen = TrainingArguments(
    output_dir="./results_generation",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    learning_rate=5e-5,
    logging_steps=100,
    seed=SEED,
)

trainer_gen = Trainer(
    model=model_gen,
    args=training_args_gen,
    train_dataset=train_gen_tokenized,
    data_collator=DataCollatorForLanguageModeling(tokenizer_gen, mlm=False)
)

print("Training generation model...\n")
train_result_gen = trainer_gen.train()

print("\nGeneration training completed!")
print(f"Time: {train_result_gen.metrics['train_runtime']:.2f}s")

trainer_gen.save_model("./generation_model")
print("Model saved")

In [None]:
# Generate explanations
def generate_explanation(question, context, answer, max_length=100):
    prompt = f"Question: {question}\nContext: {context[:300]}\nAnswer: {answer}\n\nExplanation:"

    inputs = tokenizer_gen(prompt, return_tensors='pt', truncation=True, max_length=400)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model_gen.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer_gen.eos_token_id
        )

    generated_text = tokenizer_gen.decode(outputs[0], skip_special_tokens=True)
    explanation = generated_text.split("Explanation:")[-1].strip()
    return explanation

print("\n" + "="*80)
print("SAMPLE GENERATED EXPLANATIONS")
print("="*80)

for i in range(5):
    example = val_dataset[i]

    if len(example['answers']['text']) == 0:
        continue

    question = example['question']
    context = example['context']
    answer = example['answers']['text'][0]

    explanation = generate_explanation(question, context, answer)

    print(f"\n--- EXAMPLE {i+1} ---")
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print(f"Explanation: {explanation}")
    print("-" * 80)

In [None]:
# 4. Advanced Exploration (15%)

# Attention visualization
try:
    from bertviz import head_view

    example = val_dataset[0]
    inputs = tokenizer(example['question'], example['context'][:200], return_tensors='pt').to(device)

    with torch.no_grad():
        outputs = model(**inputs, output_attentions=True)

    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
    html = head_view(outputs.attentions, tokens, html_action='return')

    with open('attention_viz.html', 'w') as f:
        f.write(html.data)

    print("Attention visualization saved to 'attention_viz.html'")
    print("Open in browser to explore attention patterns")
except ImportError:
    print("bertviz not available, skipping visualization")

In [None]:
# Zero-shot vs Fine-tuned
model_zeroshot = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
model_zeroshot.to(device)

qa_zeroshot = pipeline("question-answering", model=model_zeroshot, tokenizer=tokenizer,
                       device=0 if torch.cuda.is_available() else -1)

zero_ems = []
ft_ems = []

test_samples = val_dataset.select(range(min(100, len(val_dataset))))

for example in test_samples:
    if len(example['answers']['text']) == 0:
        continue

    question = example['question']
    context = example['context']
    truth = example['answers']['text'][0]

    try:
        zero_pred = qa_zeroshot(question=question, context=context)['answer']
        ft_pred = qa_pipeline(question=question, context=context)['answer']

        zero_ems.append(compute_exact_match(zero_pred, truth))
        ft_ems.append(compute_exact_match(ft_pred, truth))
    except:
        pass

print("\n" + "="*50)
print("ZERO-SHOT VS FINE-TUNED COMPARISON")
print("="*50)
print(f"Zero-shot EM: {np.mean(zero_ems)*100:.2f}%")
print(f"Fine-tuned EM: {np.mean(ft_ems)*100:.2f}%")
print(f"Improvement: +{(np.mean(ft_ems)-np.mean(zero_ems))*100:.2f}%")
print("="*50)

In [None]:
# LoRA parameter-efficient fine-tuning
from peft import get_peft_model, LoraConfig, TaskType

model_lora = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
lora_config = LoraConfig(
    task_type=TaskType.QUESTION_ANS,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"]
)

model_lora = get_peft_model(model_lora, lora_config)
model_lora.to(device)

print("\n" + "="*50)
print("LoRA MODEL INFO")
print("="*50)
model_lora.print_trainable_parameters()

full_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
lora_params = sum(p.numel() for p in model_lora.parameters() if p.requires_grad)

print(f"\nParameter Reduction: {(1 - lora_params/full_params)*100:.1f}%")
print("="*50)

In [None]:
# 5. Integrated Demo (10%)

# Create integrated QA system
def qa_system(question, context):
    if not question or not context:
        return "Please provide both question and context.", ""

    try:
        result = qa_pipeline(question=question, context=context)
        answer = result['answer']
        confidence = result['score']

        if confidence < 0.001:
            return "Cannot find a confident answer.", ""

        explanation = generate_explanation(question, context, answer, max_length=80)

        extracted_answer = f"**Answer:** {answer}\n\n**Confidence:** {confidence:.2%}"
        generated_explanation = f"**Explanation:** {explanation}"

        return extracted_answer, generated_explanation
    except Exception as e:
        return f"Error: {str(e)}", ""

print("QA system ready")

In [None]:
# Generate explanations
def generate_explanation(question, context, answer, max_length=100):
    prompt = f"Question: {question}\nContext: {context[:300]}\nAnswer: {answer}\n\nExplanation:"

    inputs = tokenizer_gen(prompt, return_tensors='pt', truncation=True, max_length=400)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model_gen.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer_gen.eos_token_id
        )

    generated_text = tokenizer_gen.decode(outputs[0], skip_special_tokens=True)
    explanation = generated_text.split("Explanation:")[-1].strip()
    return explanation

print("\n" + "="*80)
print("SAMPLE GENERATED EXPLANATIONS")
print("="*80)

for i in range(5):
    example = val_dataset[i]

    if len(example['answers']['text']) == 0:
        continue

    question = example['question']
    context = example['context']
    answer = example['answers']['text'][0]

    explanation = generate_explanation(question, context, answer)

    print(f"\n--- EXAMPLE {i+1} ---")
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print(f"Explanation: {explanation}")
    print("-" * 80)

In [None]:
# Attention visualization
try:
    from bertviz import head_view

    example = val_dataset[0]
    inputs = tokenizer(example['question'], example['context'][:200], return_tensors='pt').to(device)

    with torch.no_grad():
        outputs = model(**inputs, output_attentions=True)

    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
    html = head_view(outputs.attentions, tokens, html_action='return')

    with open('attention_viz.html', 'w') as f:
        f.write(html.data)

    print("Attention visualization saved to 'attention_viz.html'")
    print("Open in browser to explore attention patterns")
except ImportError:
    print("bertviz not available, skipping visualization")

In [None]:
# Zero-shot vs Fine-tuned
model_zeroshot = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
model_zeroshot.to(device)

qa_zeroshot = pipeline("question-answering", model=model_zeroshot, tokenizer=tokenizer,
                       device=0 if torch.cuda.is_available() else -1)

zero_ems = []
ft_ems = []

test_samples = val_dataset.select(range(min(100, len(val_dataset))))

for example in test_samples:
    if len(example['answers']['text']) == 0:
        continue

    question = example['question']
    context = example['context']
    truth = example['answers']['text'][0]

    try:
        zero_pred = qa_zeroshot(question=question, context=context)['answer']
        ft_pred = qa_pipeline(question=question, context=context)['answer']

        zero_ems.append(compute_exact_match(zero_pred, truth))
        ft_ems.append(compute_exact_match(ft_pred, truth))
    except:
        pass

print("\n" + "="*50)
print("ZERO-SHOT VS FINE-TUNED COMPARISON")
print("="*50)
print(f"Zero-shot EM: {np.mean(zero_ems)*100:.2f}%")
print(f"Fine-tuned EM: {np.mean(ft_ems)*100:.2f}%")
print(f"Improvement: +{(np.mean(ft_ems)-np.mean(zero_ems))*100:.2f}%")
print("="*50)

In [None]:
# LoRA parameter-efficient fine-tuning
from peft import get_peft_model, LoraConfig, TaskType

model_lora = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
lora_config = LoraConfig(
    task_type=TaskType.QUESTION_ANS,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"]
)

model_lora = get_peft_model(model_lora, lora_config)
model_lora.to(device)

print("\n" + "="*50)
print("LoRA MODEL INFO")
print("="*50)
model_lora.print_trainable_parameters()

full_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
lora_params = sum(p.numel() for p in model_lora.parameters() if p.requires_grad)

print(f"\nParameter Reduction: {(1 - lora_params/full_params)*100:.1f}%")
print("="*50)

In [None]:
# Create integrated QA system
def qa_system(question, context):
    if not question or not context:
        return "Please provide both question and context.", ""

    try:
        result = qa_pipeline(question=question, context=context)
        answer = result['answer']
        confidence = result['score']

        if confidence < 0.01:
            return "Cannot find a confident answer.", ""

        explanation = generate_explanation(question, context, answer, max_length=80)

        extracted_answer = f"**Answer:** {answer}\n\n**Confidence:** {confidence:.2%}"
        generated_explanation = f"**Explanation:** {explanation}"

        return extracted_answer, generated_explanation
    except Exception as e:
        return f"Error: {str(e)}", ""

print("QA system ready")

In [None]:
# # Gradio interface
# demo = gr.Interface(
#     fn=qa_system,
#     inputs=[
#         gr.Textbox(label="Question", placeholder="Ask a question...", lines=2),
#         gr.Textbox(label="Context/Passage", placeholder="Enter passage...", lines=8)
#     ],
#     outputs=[
#         gr.Markdown(label="Extracted Answer"),
#         gr.Markdown(label="Generated Explanation")
#     ],
#     title="ðŸ¤– Transformer-Based Question Answering System",
#     description="BERT for extraction + GPT-2 for generation | Trained on SQuAD 2.0",
#     examples=[
#         [
#             "Which country contains the majority of the Amazon rainforest?",
#             "The Amazon rainforest covers most of the Amazon basin of South America. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, and Colombia with 10%."
#         ],
#         [
#             "Who developed the theory of relativity?",
#             "The theory of relativity usually encompasses two interrelated theories by Albert Einstein: special relativity and general relativity."
#         ],
#         [
#             "When did construction of the Great Wall begin?",
#             "The Great Wall of China is a series of fortifications. Construction began as early as the 7th century BC."
#         ]
#     ]
# )

# print("\nLaunching Gradio demo...")
# demo.launch(share=False)





from transformers import pipeline
import gradio as gr
import torch

# âœ… Stable SQuAD-trained model for demo
qa_pipe = pipeline(
    "question-answering",
    model="distilbert-base-uncased-distilled-squad",
    tokenizer="distilbert-base-uncased-distilled-squad",
    device=0 if torch.cuda.is_available() else -1
)

def answer(context, question):
    return qa_pipe(question=question, context=context)["answer"]

demo = gr.Interface(
    fn=answer,
    inputs=[
        gr.Textbox(lines=8, label="Context"),
        gr.Textbox(lines=2, label="Question"),
    ],
    outputs="text",
    title="SQuAD 2.0 Question Answering System (Demo)"
)

demo.launch(share=True)

In [None]:
# Model comparison table
comparison_data = pd.DataFrame({
    'Model': ['BERT Fine-tuned', 'BERT Zero-shot', 'GPT-2 Generation', 'LoRA BERT'],
    'Task': ['Extractive QA', 'Extractive QA', 'Explanation', 'Extractive QA'],
    'Primary Metric': [
        f"{em_score:.2f}% (EM)",
        f"{np.mean(zero_ems)*100:.2f}% (EM)",
        "Perplexity",
        "Similar to full"
    ],
    'Training Time': [
        f"{train_result.metrics['train_runtime']/60:.1f}min",
        "N/A",
        f"{train_result_gen.metrics['train_runtime']/60:.1f}min",
        "~50% less"
    ],
    'Trainable Params': [
        f"{full_params:,}",
        "0",
        f"{sum(p.numel() for p in model_gen.parameters()):,}",
        f"{lora_params:,}"
    ]
})

print("\n" + "="*80)
print("COMPREHENSIVE MODEL COMPARISON")
print("="*80)
print(comparison_data.to_string(index=False))
print("="*80)