In [3]:
# EMERGENCY FIX: Complete TensorFlow bypass for Windows
import os
import sys

# Block TensorFlow completely
os.environ["USE_TF"] = "0"
os.environ["USE_TENSORFLOW"] = "0"  
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Mock TensorFlow to prevent any imports
class MockTF:
    def __getattr__(self, name): 
        raise ImportError("TensorFlow disabled")

# Block all TensorFlow imports
tf_modules = [
    'tensorflow', 'tensorflow.python', 'tensorflow.python.framework',
    'tensorflow.python.pywrap_tensorflow', 'tensorflow.python._pywrap_tensorflow_internal'
]

for module in tf_modules:
    sys.modules[module] = MockTF()

print(" TensorFlow completely blocked!")

# Now try importing transformers with only PyTorch backend
try:
    import torch
    print(f" PyTorch {torch.__version__} loaded successfully")
    
    # Import transformers components individually to isolate issues
    from transformers import AutoTokenizer, AutoModelForQuestionAnswering
    from transformers import Trainer, TrainingArguments, DefaultDataCollator
    from datasets import load_dataset
    
    print(" All imports successful! TensorFlow bypass worked!")
    
except Exception as e:
    print(f"Import failed: {e}")
    print("Proceeding with alternative pure PyTorch implementation...")

 TensorFlow completely blocked!
 PyTorch 2.6.0+cpu loaded successfully
Import failed: TensorFlow disabled
Proceeding with alternative pure PyTorch implementation...


In [4]:
# Cell 1: Clean installation approach to avoid TensorFlow conflicts
# This cell ensures a clean PyTorch-only environment

# First, completely remove any TensorFlow installations
import subprocess
import sys

print(" Cleaning TensorFlow installations...")
tf_packages = [
    'tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 
    'tf-nightly', 'tf-estimator', 'tensorboard',
    'tensorflow-io-gcs-filesystem', 'tensorflow-estimator'
]

for package in tf_packages:
    try:
        subprocess.run([sys.executable, '-m', 'pip', 'uninstall', package, '-y'], 
                      capture_output=True, check=False)
        print(f"✓ Removed {package}")
    except:
        pass

print("\n Installing PyTorch-only packages...")

# Install PyTorch first (CPU version for compatibility)
subprocess.run([
    sys.executable, '-m', 'pip', 'install', 
    'torch', 'torchvision', 'torchaudio', 
    '--index-url', 'https://download.pytorch.org/whl/cpu'
], check=True)

# Install transformers with specific PyTorch backend
subprocess.run([
    sys.executable, '-m', 'pip', 'install', 
    'transformers[torch]', 'datasets', 'accelerate'
], check=True)

# Verify PyTorch installation
import torch
print(f"\n PyTorch version: {torch.__version__}")
print(f" CUDA available: {torch.cuda.is_available()}")
print(" Installation complete!")

 Cleaning TensorFlow installations...
✓ Removed tensorflow
✓ Removed tensorflow-cpu
✓ Removed tensorflow-gpu
✓ Removed tf-nightly
✓ Removed tf-estimator
✓ Removed tensorboard
✓ Removed tensorflow-io-gcs-filesystem
✓ Removed tensorflow-estimator

 Installing PyTorch-only packages...

 PyTorch version: 2.6.0+cpu
 CUDA available: False
 Installation complete!


In [5]:
# Cell 2: Import all necessary libraries with TensorFlow bypass
# This approach prevents TensorFlow from being imported by transformers

import os
import sys

# CRITICAL: Set environment variables to disable TensorFlow before any imports
os.environ["USE_TF"] = "0"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
os.environ["TRANSFORMERS_VERBOSITY"] = "error"

# Mock tensorflow module to prevent import errors
class MockTensorFlow:
    def __getattr__(self, name):
        raise ImportError("TensorFlow is disabled")

# Insert mock tensorflow into sys.modules before importing transformers
sys.modules['tensorflow'] = MockTensorFlow()
sys.modules['tensorflow.python'] = MockTensorFlow()
sys.modules['tensorflow.python.framework'] = MockTensorFlow()

# Now import PyTorch and other required libraries
import torch
from torch.utils.data import DataLoader

# Import transformers components one by one to catch any TF dependencies
try:
    from transformers import AutoTokenizer
    print("✓ AutoTokenizer imported successfully")
except ImportError as e:
    print(f"✗ Failed to import AutoTokenizer: {e}")
    
try:
    from transformers import AutoModelForQuestionAnswering
    print("✓ AutoModelForQuestionAnswering imported successfully")
except ImportError as e:
    print(f"✗ Failed to import AutoModelForQuestionAnswering: {e}")

try:
    from transformers import TrainingArguments
    print("✓ TrainingArguments imported successfully")
except ImportError as e:
    print(f"✗ Failed to import TrainingArguments: {e}")

try:
    from transformers import Trainer
    print("✓ Trainer imported successfully")
except ImportError as e:
    print(f"✗ Failed to import Trainer: {e}")

try:
    from transformers import DefaultDataCollator
    print("✓ DefaultDataCollator imported successfully")
except ImportError as e:
    print(f"✗ Failed to import DefaultDataCollator: {e}")

# Import other required libraries
from datasets import load_dataset
import numpy as np
from collections import Counter
import string
import re

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n Setup complete! Using device: {device}")
print(f"PyTorch version: {torch.__version__}")

✓ AutoTokenizer imported successfully
✓ AutoModelForQuestionAnswering imported successfully
✓ TrainingArguments imported successfully
✗ Failed to import Trainer: TensorFlow is disabled
✓ DefaultDataCollator imported successfully

 Setup complete! Using device: cpu
PyTorch version: 2.6.0+cpu


In [6]:
# Cell 3: Load SQuAD v1.1 dataset and create small subset for fast prototyping
# We'll use only 200 training samples and 50 evaluation samples

# Load the full SQuAD dataset
dataset = load_dataset("squad")

# Create small subsets for quick training and evaluation
train_dataset = dataset["train"].select(range(200))  # First 200 training samples
eval_dataset = dataset["validation"].select(range(50))  # First 50 validation samples

print(f"Training samples: {len(train_dataset)}")
print(f"Evaluation samples: {len(eval_dataset)}")

# Display a sample to understand the data structure
print("\nSample training example:")
sample = train_dataset[0]
print(f"Question: {sample['question']}")
print(f"Context: {sample['context'][:200]}...")
print(f"Answer: {sample['answers']}")

README.md: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


train-00000-of-00001.parquet:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


validation-00000-of-00001.parquet:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Training samples: 200
Evaluation samples: 50

Sample training example:
Question: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Context: Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper sta...
Answer: {'text': ['Saint Bernadette Soubirous'], 'answer_start': [515]}


## Load DistilBERT Model and Tokenizer

In [7]:
# Cell 4: Load DistilBERT tokenizer and model for Question Answering
# DistilBERT is chosen for its speed and efficiency while maintaining good performance

model_name = "distilbert-base-uncased-distilled-squad"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model for question answering
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

# Move model to appropriate device
model.to(device)

print(f"Model loaded: {model_name}")
print(f"Tokenizer vocab size: {tokenizer.vocab_size}")
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

Model loaded: distilbert-base-uncased-distilled-squad
Tokenizer vocab size: 30522
Model parameters: 66,364,418


## Define Tokenization Function

In [8]:
# Cell 5: Define preprocessing function to tokenize question-context pairs
# This function handles the crucial task of mapping answer spans to token positions

def preprocess_function(examples):
    """
    Tokenize questions and contexts, and map answer spans to token positions.
    This is crucial for training the model to predict start and end positions.
    """
    questions = [q.strip() for q in examples["question"]]
    contexts = examples["context"]
    
    # Tokenize with truncation and padding
    tokenized_examples = tokenizer(
        questions,
        contexts,
        truncation=True,
        padding=True,
        max_length=384,  # Standard length for SQuAD
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        stride=128
    )
    
    # Initialize lists for start and end positions
    start_positions = []
    end_positions = []
    
    # Map answer spans to token positions
    for i, offsets in enumerate(tokenized_examples["offset_mapping"]):
        input_ids = tokenized_examples["input_ids"][i]
        cls_index = input_ids.index(tokenizer.cls_token_id)
        
        # Get the sequence that corresponds to our example
        sequence_ids = tokenized_examples.sequence_ids(i)
        
        # Find the start and end of the context
        context_start = sequence_ids.index(1) if 1 in sequence_ids else None
        context_end = len(sequence_ids) - 1 - sequence_ids[::-1].index(1) if 1 in sequence_ids else None
        
        # If no context, set answer as impossible
        if context_start is None:
            start_positions.append(cls_index)
            end_positions.append(cls_index)
            continue
            
        # Get the answer from the original example
        # Handle the case where we might have overflow tokens
        sample_index = tokenized_examples["overflow_to_sample_mapping"][i]
        answer = examples["answers"][sample_index]
        
        if len(answer["answer_start"]) == 0:
            # No answer case
            start_positions.append(cls_index)
            end_positions.append(cls_index)
        else:
            # Find answer start and end positions
            start_char = answer["answer_start"][0]
            end_char = start_char + len(answer["text"][0])
            
            # Find token positions corresponding to character positions
            token_start_index = context_start
            while token_start_index <= context_end and offsets[token_start_index][0] <= start_char:
                token_start_index += 1
            start_positions.append(token_start_index - 1)
            
            token_end_index = context_end
            while token_end_index >= context_start and offsets[token_end_index][1] >= end_char:
                token_end_index -= 1
            end_positions.append(token_end_index + 1)
    
    tokenized_examples["start_positions"] = start_positions
    tokenized_examples["end_positions"] = end_positions
    
    return tokenized_examples

print("Tokenization function defined successfully!")

Tokenization function defined successfully!


## Apply Preprocession to Datasets

In [11]:
# Cell 6: Apply the preprocessing function to both training and evaluation datasets
# This creates the final tokenized datasets ready for training

print("Preprocessing training dataset...")
tokenized_train_dataset = train_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=train_dataset.column_names
)

print("Preprocessing evaluation dataset...")
tokenized_eval_dataset = eval_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=eval_dataset.column_names
)

print(f"Tokenized training samples: {len(tokenized_train_dataset)}")
print(f"Tokenized evaluation samples: {len(tokenized_eval_dataset)}")

# Verify the structure of tokenized data
print("\nTokenized sample structure:")
print(tokenized_train_dataset[0].keys())

Preprocessing training dataset...


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Preprocessing evaluation dataset...
Tokenized training samples: 200
Tokenized evaluation samples: 50

Tokenized sample structure:
dict_keys(['input_ids', 'attention_mask', 'offset_mapping', 'overflow_to_sample_mapping', 'start_positions', 'end_positions'])


## Define Training Arguments

In [14]:
# Cell 7: Set up training arguments optimized for quick prototyping
# Small batch size and few epochs for fast iteration during development

# Check transformers version to use correct parameter names
import transformers
print(f"Transformers version: {transformers.__version__}")

# Use updated parameter names for newer transformers versions
training_args = TrainingArguments(
    output_dir="./results",              # Directory to save model checkpoints
    eval_strategy="epoch",               # Updated parameter name (was evaluation_strategy)
    learning_rate=3e-5,                  # Standard learning rate for BERT-like models
    per_device_train_batch_size=8,       # Small batch size for memory efficiency
    per_device_eval_batch_size=8,        # Small batch size for evaluation
    num_train_epochs=3,                  # Few epochs for quick training
    weight_decay=0.01,                   # L2 regularization
    logging_dir="./logs",                # Directory for storing logs
    logging_steps=10,                    # Log every 10 steps
    save_strategy="epoch",               # Save model at the end of each epoch
    load_best_model_at_end=True,         # Load the best model when training ends
    metric_for_best_model="eval_loss",   # Use evaluation loss to determine best model
    greater_is_better=False,             # Lower loss is better
    warmup_steps=10,                     # Number of warmup steps for learning rate scheduler
    report_to="none",                    # Disable wandb/tensorboard logging (updated from None)
    dataloader_pin_memory=False,         # Disable pin memory to avoid potential issues
    push_to_hub=False,                   # Don't push to Hugging Face Hub
    use_cpu=device.type == "cpu",        # Use CPU if no CUDA available
)

print(" Training arguments configured for fast prototyping!")
print(f" Total training steps: {len(tokenized_train_dataset) // training_args.per_device_train_batch_size * training_args.num_train_epochs}")
print(f" Evaluation strategy: {training_args.eval_strategy}")
print(f" Output directory: {training_args.output_dir}")

Transformers version: 4.54.1
 Training arguments configured for fast prototyping!
 Total training steps: 75
 Evaluation strategy: IntervalStrategy.EPOCH
 Output directory: ./results


## Setup Trainer with Data Collector

In [16]:
# Cell 8: Initialize the Trainer with PyTorch-compatible components
# First, let's ensure all components are properly imported

# Re-import components if needed (handles any import issues)
try:
    from transformers import Trainer
    print(" Trainer imported successfully")
except ImportError as e:
    print(f" Failed to import Trainer: {e}")
    print(" Trying alternative import...")
    
    # Alternative import approach
    import sys
    import importlib
    
    # Force reload transformers module
    if 'transformers' in sys.modules:
        importlib.reload(sys.modules['transformers'])
    
    try:
        from transformers import Trainer
        print(" Trainer imported successfully on retry")
    except ImportError as e:
        print(f" Still failed to import Trainer: {e}")
        print(" Using manual training loop instead...")

# Verify all required components are available
required_components = {
    'model': 'model',
    'training_args': 'training_args', 
    'tokenized_train_dataset': 'tokenized_train_dataset',
    'tokenized_eval_dataset': 'tokenized_eval_dataset',
    'tokenizer': 'tokenizer'
}

print("\n Checking required components:")
missing_components = []
for name, var_name in required_components.items():
    try:
        eval(var_name)
        print(f" {name}: Available")
    except NameError:
        print(f" {name}: Missing")
        missing_components.append(name)

if missing_components:
    print(f"\n  Missing components: {missing_components}")
    print("Please run the previous cells first!")
else:
    print("\n All components available!")

# Initialize the data collator (PyTorch-compatible)
try:
    from transformers import DefaultDataCollator
    data_collator = DefaultDataCollator(return_tensors="pt")
    print(" DefaultDataCollator initialized")
except ImportError:
    # Manual data collator as fallback
    print(" Using manual data collator...")
    
    def manual_data_collator(features):
        """Manual data collator for PyTorch tensors"""
        batch = {}
        first = features[0]
        
        for key in first.keys():
            if key in ['input_ids', 'attention_mask', 'start_positions', 'end_positions']:
                batch[key] = torch.stack([torch.tensor(f[key]) for f in features])
        
        return batch
    
    data_collator = manual_data_collator
    print(" Manual data collator created")

# Create the Trainer (with error handling)
try:
    trainer = Trainer(
        model=model,                           # Our DistilBERT model
        args=training_args,                    # Training configuration
        train_dataset=tokenized_train_dataset, # Tokenized training data
        eval_dataset=tokenized_eval_dataset,   # Tokenized evaluation data
        data_collator=data_collator,           # PyTorch data collator
        tokenizer=tokenizer,                   # Tokenizer for saving
    )
    
    print("\n Trainer initialized successfully!")
    print(" Ready to start training...")
    
except NameError as e:
    print(f"\n Trainer creation failed: {e}")
    print("\n Creating manual training loop as fallback...")
    
    # Manual training setup
    from torch.optim import AdamW
    from torch.utils.data import DataLoader
    
    # Create data loaders
    train_dataloader = DataLoader(
        tokenized_train_dataset, 
        batch_size=training_args.per_device_train_batch_size,
        shuffle=True,
        collate_fn=data_collator
    )
    
    eval_dataloader = DataLoader(
        tokenized_eval_dataset,
        batch_size=training_args.per_device_eval_batch_size,
        shuffle=False,
        collate_fn=data_collator
    )
    
    # Create optimizer
    optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)
    
    print(" Manual training setup complete!")
    print(f" Train batches: {len(train_dataloader)}")
    print(f" Eval batches: {len(eval_dataloader)}")
    
except Exception as e:
    print(f"\n Unexpected error: {e}")
    print("Please check previous cells and try again.")

 Failed to import Trainer: TensorFlow is disabled
 Trying alternative import...
 Still failed to import Trainer: TensorFlow is disabled
 Using manual training loop instead...

 Checking required components:
 model: Available
 training_args: Available
 tokenized_train_dataset: Available
 tokenized_eval_dataset: Available
 tokenizer: Available

 All components available!
 DefaultDataCollator initialized

 Trainer creation failed: name 'Trainer' is not defined

 Creating manual training loop as fallback...
 Manual training setup complete!
 Train batches: 25
 Eval batches: 7


## Train the model (with fallback options)

In [18]:
# Cell 9: Train the model with multiple approaches
# This cell handles both Trainer-based and manual training

import time

print(" Starting training process...")
print("=" * 50)

# Check if Trainer was successfully created
if 'trainer' in locals():
    print(" Using Hugging Face Trainer for training...")
    
    try:
        # Start training with Trainer
        print(" Training started...")
        start_time = time.time()
        
        trainer.train()
        
        end_time = time.time()
        training_time = end_time - start_time
        
        print(f"\n Training completed in {training_time:.2f} seconds!")
        
        # Save the final model
        trainer.save_model("./final_model")
        tokenizer.save_pretrained("./final_model")
        print(" Model and tokenizer saved to './final_model'")
        
    except Exception as e:
        print(f" Training with Trainer failed: {e}")
        print(" Switching to manual training...")
        use_manual_training = True

else:
    print(" Using manual training loop...")
    use_manual_training = True

# Manual training loop (fallback)
if 'use_manual_training' in locals() or 'trainer' not in locals():
    print("\n" + "="*50)
    print("  MANUAL TRAINING LOOP")
    print("="*50)
    
    model.train()
    total_loss = 0
    step = 0
    
    print(f" Training for {training_args.num_train_epochs} epochs...")
    
    for epoch in range(int(training_args.num_train_epochs)):
        print(f"\n Epoch {epoch + 1}/{training_args.num_train_epochs}")
        epoch_loss = 0
        
        for batch_idx, batch in enumerate(train_dataloader):
            # Move batch to device
            batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
            
            # Forward pass
            outputs = model(
                input_ids=batch['input_ids'],
                attention_mask=batch.get('attention_mask', None)
            )
            
            # Calculate loss (simplified QA loss)
            start_positions = batch.get('start_positions', torch.zeros(batch['input_ids'].size(0)).long().to(device))
            end_positions = batch.get('end_positions', torch.zeros(batch['input_ids'].size(0)).long().to(device))
            
            loss_fct = torch.nn.CrossEntropyLoss()
            start_loss = loss_fct(outputs['start_logits'], start_positions)
            end_loss = loss_fct(outputs['end_logits'], end_positions)
            loss = (start_loss + end_loss) / 2
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Track loss
            epoch_loss += loss.item()
            total_loss += loss.item()
            step += 1
            
            # Log progress
            if (batch_idx + 1) % training_args.logging_steps == 0:
                avg_loss = epoch_loss / (batch_idx + 1)
                print(f"  Step {step}, Batch {batch_idx + 1}/{len(train_dataloader)}, Loss: {avg_loss:.4f}")
        
        # End of epoch
        avg_epoch_loss = epoch_loss / len(train_dataloader)
        print(f" Epoch {epoch + 1} completed. Average loss: {avg_epoch_loss:.4f}")
        
        # Simple evaluation
        if epoch == training_args.num_train_epochs - 1:  # Evaluate on last epoch
            print(" Running evaluation...")
            model.eval()
            eval_loss = 0
            
            with torch.no_grad():
                for eval_batch in eval_dataloader:
                    eval_batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in eval_batch.items()}
                    
                    eval_outputs = model(
                        input_ids=eval_batch['input_ids'],
                        attention_mask=eval_batch.get('attention_mask', None)
                    )
                    
                    # Calculate evaluation loss
                    eval_start_positions = eval_batch.get('start_positions', torch.zeros(eval_batch['input_ids'].size(0)).long().to(device))
                    eval_end_positions = eval_batch.get('end_positions', torch.zeros(eval_batch['input_ids'].size(0)).long().to(device))
                    
                    eval_start_loss = loss_fct(eval_outputs['start_logits'], eval_start_positions)
                    eval_end_loss = loss_fct(eval_outputs['end_logits'], eval_end_positions)
                    eval_batch_loss = (eval_start_loss + eval_end_loss) / 2
                    
                    eval_loss += eval_batch_loss.item()
            
            avg_eval_loss = eval_loss / len(eval_dataloader)
            print(f" Evaluation loss: {avg_eval_loss:.4f}")
            
            model.train()  # Back to training mode
    
    print(f"\n Manual training completed!")
    print(f" Final average loss: {total_loss / step:.4f}")
    
    # Save model manually
    print(" Saving model...")
    torch.save(model.state_dict(), "./final_model/pytorch_model.bin")
    tokenizer.save_pretrained("./final_model")
    
    # Save model config
    import json
    config = {
        "vocab_size": model.embedding.num_embeddings if hasattr(model, 'embedding') else 30522,
        "hidden_size": 768,
        "model_type": "custom_qa_model"
    }
    with open("./final_model/config.json", "w") as f:
        json.dump(config, f)
    
    print(" Model saved to './final_model'")

print("\n" + "="*50)
print(" TRAINING PROCESS COMPLETED!")
print("="*50)

 Starting training process...
 Using manual training loop...

  MANUAL TRAINING LOOP
 Training for 3 epochs...

 Epoch 1/3
  Step 10, Batch 10/25, Loss: 0.0251
  Step 20, Batch 20/25, Loss: 0.0402
 Epoch 1 completed. Average loss: 0.0438

 Epoch 2/3
  Step 35, Batch 10/25, Loss: 0.0343
  Step 45, Batch 20/25, Loss: 0.0454
 Epoch 2 completed. Average loss: 0.0386

 Epoch 3/3
  Step 60, Batch 10/25, Loss: 0.0227
  Step 70, Batch 20/25, Loss: 0.0192
 Epoch 3 completed. Average loss: 0.0196
 Running evaluation...
 Evaluation loss: 1.9928

 Manual training completed!
 Final average loss: 0.0340
 Saving model...
 Model saved to './final_model'

 TRAINING PROCESS COMPLETED!


## Evaluate the model

In [20]:
# Cell 10: Evaluate the trained model and compute metrics
# We'll compute basic metrics and also implement simple EM and F1 scores

# Run evaluation
eval_results = trainer.evaluate()

print("Evaluation Results:")
for key, value in eval_results.items():
    print(f"{key}: {value:.4f}")

# Simple evaluation functions for exact match and F1 score
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)
    
    def white_space_fix(text):
        return ' '.join(text.split())
    
    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)
    
    def lower(text):
        return text.lower()
    
    return white_space_fix(remove_articles(remove_punc(lower(s))))

def exact_match_score(prediction, ground_truth):
    """Compute exact match score between prediction and ground truth."""
    return normalize_answer(prediction) == normalize_answer(ground_truth)

def f1_score(prediction, ground_truth):
    """Compute F1 score between prediction and ground truth."""
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    
    if len(prediction_tokens) == 0 or len(ground_truth_tokens) == 0:
        return int(prediction_tokens == ground_truth_tokens)
    
    common_tokens = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common_tokens.values())
    
    if num_same == 0:
        return 0
    
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    
    return f1

print("\nCustom evaluation metrics computed successfully!")

NameError: name 'Trainer' is not defined

## Inference

In [21]:
# Cell 11: Inference cell - test the trained model with custom questions
# This demonstrates how to use the model for predictions on new data

def predict_answer(question, context):
    """
    Predict answer for a given question and context using the trained model.
    """
    # Tokenize the input
    inputs = tokenizer(
        question, 
        context, 
        return_tensors="pt",
        max_length=384,
        truncation=True,
        padding=True
    )
    
    # Move inputs to the same device as the model
    inputs = {key: value.to(device) for key, value in inputs.items()}
    
    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        start_logits = outputs.start_logits
        end_logits = outputs.end_logits
    
    # Get the most likely beginning and end of answer
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)
    
    # Convert token indices to answer text
    input_ids = inputs["input_ids"][0]
    answer_tokens = input_ids[start_index:end_index + 1]
    answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
    
    # Get confidence scores
    start_score = torch.softmax(start_logits, dim=1)[0][start_index].item()
    end_score = torch.softmax(end_logits, dim=1)[0][end_index].item()
    confidence = (start_score + end_score) / 2
    
    return answer, confidence

# Test with a custom example
test_context = """
The Amazon rainforest, also known as Amazonia, is a moist broadleaf tropical rainforest 
in the Amazon biome that covers most of the Amazon basin of South America. This basin 
encompasses 7,000,000 square kilometers, of which 5,500,000 square kilometers are covered 
by the rainforest. This region includes territory belonging to nine nations.
"""

test_question = "How large is the Amazon basin?"

# Make prediction
predicted_answer, confidence = predict_answer(test_question, test_context)

print("=== INFERENCE EXAMPLE ===")
print(f"Context: {test_context[:200]}...")
print(f"Question: {test_question}")
print(f"Predicted Answer: {predicted_answer}")
print(f"Confidence: {confidence:.4f}")

# Test with another example
test_question_2 = "How many nations are included in the Amazon region?"
predicted_answer_2, confidence_2 = predict_answer(test_question_2, test_context)

print(f"\nQuestion 2: {test_question_2}")
print(f"Predicted Answer: {predicted_answer_2}")
print(f"Confidence: {confidence_2:.4f}")

print("\n=== MODEL READY FOR USE ===")
print("You can now use the predict_answer() function with any question and context!")

=== INFERENCE EXAMPLE ===
Context: 
The Amazon rainforest, also known as Amazonia, is a moist broadleaf tropical rainforest 
in the Amazon biome that covers most of the Amazon basin of South America. This basin 
encompasses 7,000,000 s...
Question: How large is the Amazon basin?
Predicted Answer: 7, 000, 000 square kilometers
Confidence: 0.9754

Question 2: How many nations are included in the Amazon region?
Predicted Answer: nine
Confidence: 0.9900

=== MODEL READY FOR USE ===
You can now use the predict_answer() function with any question and context!
