In [None]:
# ================================================
# ü§ñ BERT Fake News Classifier Training (ULTRA-CONSERVATIVE VERSION)
# 1 Epoch + Lower Learning Rate to Prevent Overfitting
# ================================================

# ================================================
# STEP 0: DISABLE WANDB FIRST (PREVENT HANGING)
# ================================================
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"

print("üöÄ Setting up Google Colab environment...")

# ================================================
# STEP 1: SETUP AND ENVIRONMENT
# ================================================

# Install required packages
!pip install -q transformers torch accelerate pandas scikit-learn matplotlib seaborn

# Mount Google Drive to access your data
from google.colab import drive
drive.mount('/content/drive')

# Import all necessary libraries
import torch
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

from torch.utils.data import Dataset, DataLoader
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)

print("‚úÖ Environment setup complete!")

# ================================================
# STEP 2: CHECK GPU AVAILABILITY
# ================================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("\nüñ•Ô∏è  Hardware Information:")
print("=" * 40)
print(f"Device: {device}")

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU: {gpu_name}")
    print(f"GPU Memory: {gpu_memory:.1f}GB")
    print("üéØ Ready for fast BERT training!")
else:
    print("‚ö†Ô∏è No GPU detected - training will be very slow on CPU")

print("=" * 40)

# ================================================
# STEP 3: LOAD YOUR PREPROCESSED DATA
# ================================================
print("\nüìä Loading your data from Google Drive...")

# Update this path to match your Google Drive structure
BASE_PATH = "/content/drive/MyDrive/fake_news_project"
DATA_PATH = f"{BASE_PATH}/data"

try:
    # Load your preprocessed CSV files
    train_df = pd.read_csv(f"{DATA_PATH}/train_processed.csv")
    val_df = pd.read_csv(f"{DATA_PATH}/val_processed.csv")
    test_df = pd.read_csv(f"{DATA_PATH}/test_processed.csv")

    print("‚úÖ Data loaded successfully!")
    print(f"   üìà Training: {len(train_df):,} samples")
    print(f"   üìä Validation: {len(val_df):,} samples")
    print(f"   üìã Test: {len(test_df):,} samples")
    print(f"   üìö Total: {len(train_df) + len(val_df) + len(test_df):,} samples")

    # Verify data structure
    print(f"\nüìã Data Structure Check:")
    print(f"   Columns: {list(train_df.columns)}")

    # Check label distribution
    train_labels = dict(train_df['label'].value_counts())
    print(f"   Training labels: {train_labels}")

    if len(train_labels) == 2 and 0 in train_labels and 1 in train_labels:
        print("‚úÖ Perfect! Binary classification data detected (0=REAL, 1=FAKE)")
    else:
        print("‚ö†Ô∏è Warning: Unexpected label format detected")

except FileNotFoundError as e:
    print(f"‚ùå ERROR: Could not find data files!")
    print(f"Expected location: {DATA_PATH}")
    print("Please ensure you uploaded your CSV files to the correct Google Drive folder:")
    print("  1. train_processed.csv")
    print("  2. val_processed.csv")
    print("  3. test_processed.csv")
    raise
except Exception as e:
    print(f"‚ùå ERROR loading data: {str(e)}")
    raise

# ================================================
# STEP 4: DATASET CLASS FOR BERT
# ================================================
class FakeNewsDataset(Dataset):
    """PyTorch Dataset class for BERT training"""

    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

        print(f"üì¶ Dataset created: {len(texts)} samples, max_length={max_length}")

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = int(self.labels[idx])

        # Tokenize text for BERT
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# ================================================
# STEP 5: ULTRA-CONSERVATIVE TRAINING CONFIGURATION
# ================================================
print("\n‚öôÔ∏è Setting up ULTRA-CONSERVATIVE training configuration...")

# ULTRA-CONSERVATIVE configuration to prevent overfitting
CONFIG = {
    'model_name': 'bert-base-uncased',
    'max_length': 512,
    'batch_size': 16,  # Perfect for Colab's T4 GPU
    'learning_rate': 5e-6,  # ‚úÖ MUCH SMALLER: 5e-6 instead of 2e-5 (4x smaller)
    'num_epochs': 1,  # ‚úÖ JUST ONE EPOCH to prevent memorization
    'warmup_steps': 100,  # ‚úÖ REDUCED: Less warmup
    'weight_decay': 0.1,  # ‚úÖ STRONGER: 10x more regularization (was 0.01)
    'adam_epsilon': 1e-8,
    'max_grad_norm': 0.5,  # ‚úÖ SMALLER: More gradient clipping
    'eval_steps': 200,  # ‚úÖ REDUCED: Evaluate even more frequently
    'save_steps': 200,  # ‚úÖ REDUCED: Save even more frequently
    'logging_steps': 50,   # ‚úÖ REDUCED: Log more frequently
    'early_stopping_patience': 1,  # ‚úÖ VERY AGGRESSIVE: Stop after 1 bad step
    'output_dir': '/content/bert_training_temp'
}

print("üìã ULTRA-CONSERVATIVE Training Configuration:")
print("üéØ Designed to prevent any overfitting!")
for key, value in CONFIG.items():
    print(f"   {key}: {value}")

# ================================================
# STEP 6: INITIALIZE BERT MODEL AND TOKENIZER
# ================================================
print(f"\nü§ñ Loading BERT model: {CONFIG['model_name']}")

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained(CONFIG['model_name'])
print("‚úÖ Tokenizer loaded successfully")

# Load pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained(
    CONFIG['model_name'],
    num_labels=2,  # Binary classification (Real=0/Fake=1)
    output_attentions=False,
    output_hidden_states=False,
    hidden_dropout_prob=0.3,  # ‚úÖ MORE DROPOUT: Prevent overfitting
    attention_probs_dropout_prob=0.3  # ‚úÖ MORE DROPOUT: Prevent overfitting
)

# Move model to GPU
model.to(device)

# Model information
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("‚úÖ BERT model loaded successfully")
print(f"   üìä Total parameters: {total_params:,}")
print(f"   üéØ Trainable parameters: {trainable_params:,}")
print(f"   üíæ Model size: ~{total_params * 4 / 1e6:.0f}MB")
print("   üõ°Ô∏è Extra dropout added to prevent overfitting")

# ================================================
# STEP 7: CREATE PYTORCH DATASETS
# ================================================
print("\nüì¶ Creating PyTorch datasets...")

# Extract texts and labels from dataframes
train_texts = train_df['text'].tolist()
train_labels = train_df['label'].tolist()

val_texts = val_df['text'].tolist()
val_labels = val_df['label'].tolist()

test_texts = test_df['text'].tolist()
test_labels = test_df['label'].tolist()

# Create dataset objects
train_dataset = FakeNewsDataset(train_texts, train_labels, tokenizer, CONFIG['max_length'])
val_dataset = FakeNewsDataset(val_texts, val_labels, tokenizer, CONFIG['max_length'])
test_dataset = FakeNewsDataset(test_texts, test_labels, tokenizer, CONFIG['max_length'])

print("‚úÖ All datasets created successfully")

# ================================================
# STEP 8: ULTRA-CONSERVATIVE TRAINING ARGUMENTS
# ================================================
print("\n‚öôÔ∏è Configuring ultra-conservative training arguments...")

training_args = TrainingArguments(
    output_dir=CONFIG['output_dir'],
    num_train_epochs=CONFIG['num_epochs'],  # JUST 1 epoch
    per_device_train_batch_size=CONFIG['batch_size'],
    per_device_eval_batch_size=CONFIG['batch_size'],
    learning_rate=CONFIG['learning_rate'],  # Much smaller learning rate
    weight_decay=CONFIG['weight_decay'],    # Strong regularization
    warmup_steps=CONFIG['warmup_steps'],    # Less warmup
    eval_strategy="steps",
    eval_steps=CONFIG['eval_steps'],        # Very frequent evaluation
    save_strategy="steps",
    save_steps=CONFIG['save_steps'],        # Very frequent saving
    logging_steps=CONFIG['logging_steps'],  # Very frequent logging
    load_best_model_at_end=True,           # Load the best checkpoint
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,
    save_total_limit=2,
    dataloader_num_workers=2,
    dataloader_pin_memory=True,
    report_to=[],  # ‚úÖ DISABLE ALL EXTERNAL LOGGING (NO WANDB)
    seed=42,
    fp16=torch.cuda.is_available(),  # Use mixed precision if GPU available
    gradient_accumulation_steps=2,   # ‚úÖ GRADIENT ACCUMULATION: Smoother updates
    max_grad_norm=CONFIG['max_grad_norm'],  # Strong gradient clipping
)

print("‚úÖ Ultra-conservative training arguments configured")

# ================================================
# STEP 9: METRICS COMPUTATION
# ================================================
def compute_metrics(eval_pred):
    """Compute accuracy and F1-score during training"""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted')

    return {
        'accuracy': accuracy,
        'f1': f1
    }

# ================================================
# STEP 10: INITIALIZE TRAINER
# ================================================
print("\nüèãÔ∏è Setting up ultra-conservative BERT trainer...")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=CONFIG['early_stopping_patience'])]
)

print("‚úÖ Ultra-conservative trainer initialized and ready")

# ================================================
# STEP 11: START ULTRA-CONSERVATIVE TRAINING! üöÄ
# ================================================
print("\n" + "="*70)
print("üöÄ STARTING ULTRA-CONSERVATIVE BERT TRAINING (1 EPOCH)!")
print("="*70)
print("üìÖ Expected duration: 6-10 minutes on Tesla T4 GPU")
print("üéØ Training for ONLY 1 epoch with tiny learning rate")
print("üõ°Ô∏è Maximum overfitting prevention enabled!")
print("‚òï Time for a quick coffee!")
print("="*70)

# Record training start time
training_start_time = datetime.now()
print(f"üïê Training started at: {training_start_time.strftime('%H:%M:%S')}")

try:
    # Start training
    train_result = trainer.train()

    # Record training completion
    training_end_time = datetime.now()
    training_duration = (training_end_time - training_start_time).total_seconds()

    print("\n" + "="*70)
    print("üéâ ULTRA-CONSERVATIVE TRAINING COMPLETED!")
    print("="*70)
    print(f"üïê Training finished at: {training_end_time.strftime('%H:%M:%S')}")
    print(f"‚è±Ô∏è Total training time: {training_duration/60:.1f} minutes")
    print(f"üìä Final training loss: {train_result.training_loss:.4f}")
    print("="*70)

except Exception as e:
    print(f"\n‚ùå TRAINING FAILED: {str(e)}")
    raise

# ================================================
# STEP 12: EVALUATE ON TEST SET
# ================================================
print("\nüìä Evaluating trained model on test set...")

# Run evaluation
test_results = trainer.evaluate(test_dataset)

# Get detailed predictions for analysis
predictions = trainer.predict(test_dataset)
predicted_labels = np.argmax(predictions.predictions, axis=1)
predicted_probs = torch.softmax(torch.tensor(predictions.predictions), dim=1)
true_labels = predictions.label_ids

# Calculate comprehensive metrics
test_accuracy = accuracy_score(true_labels, predicted_labels)
test_f1 = f1_score(true_labels, predicted_labels, average='weighted')
test_f1_macro = f1_score(true_labels, predicted_labels, average='macro')

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Confidence analysis
max_confidences = torch.max(predicted_probs, dim=1)[0].numpy()
avg_confidence = np.mean(max_confidences)
high_conf_count = np.sum(max_confidences > 0.9)
medium_conf_count = np.sum((max_confidences > 0.7) & (max_confidences <= 0.9))
low_conf_count = np.sum(max_confidences <= 0.7)

# Display results
print("üéØ FINAL TEST SET RESULTS:")
print("="*50)
print(f"üìà Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"üìä F1-Score (Weighted): {test_f1:.4f}")
print(f"üìä F1-Score (Macro): {test_f1_macro:.4f}")
print(f"üìâ Test Loss: {test_results['eval_loss']:.4f}")

# ‚úÖ OVERFITTING CHECK
if test_accuracy > 0.98:
    print("‚ö†Ô∏è WARNING: Accuracy > 98% - possible overfitting detected!")
    print("üí° Consider reducing learning rate further or using fewer training steps")
elif test_accuracy > 0.95:
    print("‚ö†Ô∏è CAUTION: Accuracy > 95% - monitor for overfitting signs")
else:
    print("‚úÖ GOOD: Accuracy in healthy range - likely good generalization")

print(f"\nüìã Confusion Matrix:")
print(f"   True REAL ‚Üí Predicted REAL: {conf_matrix[0,0]:,}")
print(f"   True REAL ‚Üí Predicted FAKE: {conf_matrix[0,1]:,}")
print(f"   True FAKE ‚Üí Predicted REAL: {conf_matrix[1,0]:,}")
print(f"   True FAKE ‚Üí Predicted FAKE: {conf_matrix[1,1]:,}")

print(f"\nüéØ Confidence Analysis:")
print(f"   Average confidence: {avg_confidence:.1%}")
print(f"   High confidence (>90%): {high_conf_count:,}/{len(predicted_labels):,} ({high_conf_count/len(predicted_labels)*100:.1f}%)")
print(f"   Medium confidence (70-90%): {medium_conf_count:,}/{len(predicted_labels):,} ({medium_conf_count/len(predicted_labels)*100:.1f}%)")
print(f"   Low confidence (<70%): {low_conf_count:,}/{len(predicted_labels):,} ({low_conf_count/len(predicted_labels)*100:.1f}%)")

# ================================================
# STEP 13: SAVE MODEL TO GOOGLE DRIVE
# ================================================
print("\nüíæ Saving trained model to Google Drive...")

# Define save path in Google Drive
model_save_path = f"{BASE_PATH}/trained_model"

# Save model and tokenizer
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"‚úÖ Model saved to: {model_save_path}")

# ================================================
# STEP 14: CREATE COMPREHENSIVE METADATA
# ================================================
print("\nüìã Creating training metadata...")

# Create comprehensive training metadata
training_metadata = {
    # Performance metrics
    'test_accuracy': float(test_accuracy),
    'test_f1_score_weighted': float(test_f1),
    'test_f1_score_macro': float(test_f1_macro),
    'test_loss': float(test_results['eval_loss']),
    'training_loss': float(train_result.training_loss),

    # Confidence analysis
    'average_confidence': float(avg_confidence),
    'high_confidence_predictions': int(high_conf_count),
    'medium_confidence_predictions': int(medium_conf_count),
    'low_confidence_predictions': int(low_conf_count),

    # Training details
    'training_time_minutes': training_duration / 60,
    'training_start_time': training_start_time.isoformat(),
    'training_end_time': training_end_time.isoformat(),
    'training_environment': 'Google_Colab',
    'gpu_used': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU',
    'training_approach': 'ultra_conservative_anti_overfitting',

    # Model configuration
    'model_config': CONFIG,
    'model_name': CONFIG['model_name'],
    'max_length': CONFIG['max_length'],
    'batch_size': CONFIG['batch_size'],
    'learning_rate': CONFIG['learning_rate'],
    'num_epochs': CONFIG['num_epochs'],

    # Model statistics
    'total_parameters': total_params,
    'trainable_parameters': trainable_params,
    'model_size_mb': total_params * 4 / 1e6,

    # Dataset information
    'dataset_sizes': {
        'train': len(train_dataset),
        'validation': len(val_dataset),
        'test': len(test_dataset),
        'total': len(train_dataset) + len(val_dataset) + len(test_dataset)
    },

    # Performance breakdown by class
    'confusion_matrix': conf_matrix.tolist(),
    'class_labels': ['REAL', 'FAKE'],

    # Anti-overfitting measures taken
    'overfitting_prevention': {
        'epochs_used': CONFIG['num_epochs'],
        'learning_rate_reduction': 'reduced_from_2e-5_to_5e-6',
        'weight_decay_increased': 'increased_from_0.01_to_0.1',
        'dropout_increased': 'added_extra_dropout_0.3',
        'early_stopping': f"patience_{CONFIG['early_stopping_patience']}",
        'gradient_clipping': CONFIG['max_grad_norm']
    }
}

# Save metadata to Google Drive
metadata_file = f"{model_save_path}/training_metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(training_metadata, f, indent=2)

print("‚úÖ Training metadata saved")

# ================================================
# STEP 15: CRITICAL MODEL TEST WITH EXPECTED SAMPLES
# ================================================
print("\nüß™ Testing trained model with sample predictions...")
print("üéØ These should predict CORRECTLY if overfitting is fixed!")

# Test with examples we expect specific results from
test_samples = [
    ("Scientists at Harvard University published groundbreaking research in Nature magazine about climate change impacts.", "REAL"),
    ("SHOCKING: Local doctors hate this one weird trick that cures everything! Click now to discover the secret!", "FAKE"),
    ("The Federal Reserve announced new interest rate policies following economic indicators released this quarter.", "REAL"),
    ("Scientists at MIT have developed a revolutionary new battery technology that could charge electric vehicles in just 30 seconds.", "REAL"),
    ("You won't believe what happened next! This mom's simple trick will change your life forever!", "FAKE")
]

model.eval()
print("üìù Sample Predictions (Overfitting Test):")
print("-" * 80)

correct_predictions = 0
total_samples = len(test_samples)

for i, (sample_text, expected) in enumerate(test_samples, 1):
    # Tokenize
    inputs = tokenizer(
        sample_text,
        return_tensors='pt',
        truncation=True,
        padding=True,
        max_length=CONFIG['max_length']
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Predict
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
        prediction = torch.argmax(probabilities, dim=1).item()
        confidence = torch.max(probabilities).item()

    pred_text = 'FAKE' if prediction == 1 else 'REAL'
    is_correct = "‚úÖ" if pred_text == expected else "‚ùå"

    if pred_text == expected:
        correct_predictions += 1

    print(f"Sample {i}: {is_correct}")
    print(f"   Text: {sample_text[:70]}...")
    print(f"   Expected: {expected} ‚Üí Predicted: {pred_text} (Confidence: {confidence:.1%})")
    print()

# Overall sample test results
sample_accuracy = correct_predictions / total_samples
print(f"üéØ Sample Test Results: {correct_predictions}/{total_samples} correct ({sample_accuracy:.1%})")

if sample_accuracy >= 0.8:
    print("üéâ EXCELLENT: Model appears to be generalizing well!")
elif sample_accuracy >= 0.6:
    print("‚úÖ GOOD: Model shows reasonable generalization")
else:
    print("‚ö†Ô∏è CONCERN: Model may still be overfitting or undertrained")

# ================================================
# STEP 16: FINAL SUMMARY
# ================================================
print("\n" + "="*80)
print("üéâ ULTRA-CONSERVATIVE BERT TRAINING COMPLETED!")
print("="*80)

print(f"üìä FINAL MODEL PERFORMANCE:")
print(f"   üéØ Test Accuracy: {test_accuracy:.1%} (Target: 85-95%)")
print(f"   üìà F1-Score: {test_f1:.3f}")
print(f"   ‚è±Ô∏è Training Time: {training_duration/60:.1f} minutes")
print(f"   üß† Model Parameters: {total_params:,}")
print(f"   üíæ Model Size: ~{total_params * 4 / 1e6:.0f}MB")
print(f"   üéØ Epochs: {CONFIG['num_epochs']} (ultra-conservative)")
print(f"   üìö Sample Test Accuracy: {sample_accuracy:.1%}")

# Health check
if test_accuracy < 0.99 and sample_accuracy >= 0.6:
    print(f"\nüéâ SUCCESS INDICATORS:")
    print(f"   ‚úÖ Test accuracy < 99% (avoiding overfitting)")
    print(f"   ‚úÖ Sample predictions reasonable")
    print(f"   ‚úÖ Model appears to generalize well")
else:
    print(f"\n‚ö†Ô∏è POTENTIAL ISSUES:")
    if test_accuracy >= 0.99:
        print(f"   ‚ùå Test accuracy ‚â• 99% (possible overfitting)")
    if sample_accuracy < 0.6:
        print(f"   ‚ùå Poor sample predictions (model issues)")

print(f"\nüìÅ FILES SAVED TO GOOGLE DRIVE:")
print(f"   üìÇ Model Directory: {model_save_path}")
print(f"   ü§ñ Model File: pytorch_model.bin (~440MB)")
print(f"   ‚öôÔ∏è Config Files: config.json, tokenizer files")
print(f"   üìä Training Report: training_metadata.json")

print(f"\nüì• DOWNLOAD INSTRUCTIONS:")
print(f"   1. üì± Open Google Drive ‚Üí fake_news_project/trained_model/")
print(f"   2. üì¶ Right-click 'trained_model' folder ‚Üí Download")
print(f"   3. üìÇ Extract to your MacBook: models/bert_fake_news/")
print(f"   4. üß™ Test with: python -m agents.bert_classifier")

print(f"\nüéØ EXPECTED LOCAL TEST RESULTS:")
print(f"   ‚Ä¢ MIT battery article should predict: REAL")
print(f"   ‚Ä¢ Test accuracy should be: 85-95% (not 100%)")
print(f"   ‚Ä¢ Predictions should be reasonable and correct")

print("\nüèÜ Your anti-overfitting BERT model is ready!")
print("="*80)



üöÄ Setting up Google Colab environment...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.8/13.8 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m24.6/24.6 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m883.7/883.7 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m664.8/664.8 MB

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

‚úÖ Tokenizer loaded successfully


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ BERT model loaded successfully
   üìä Total parameters: 109,483,778
   üéØ Trainable parameters: 109,483,778
   üíæ Model size: ~438MB
   üõ°Ô∏è Extra dropout added to prevent overfitting

üì¶ Creating PyTorch datasets...
üì¶ Dataset created: 27416 samples, max_length=512
üì¶ Dataset created: 5876 samples, max_length=512
üì¶ Dataset created: 5876 samples, max_length=512
‚úÖ All datasets created successfully

‚öôÔ∏è Configuring ultra-conservative training arguments...
‚úÖ Ultra-conservative training arguments configured

üèãÔ∏è Setting up ultra-conservative BERT trainer...
‚úÖ Ultra-conservative trainer initialized and ready

üöÄ STARTING ULTRA-CONSERVATIVE BERT TRAINING (1 EPOCH)!
üìÖ Expected duration: 6-10 minutes on Tesla T4 GPU
üéØ Training for ONLY 1 epoch with tiny learning rate
üõ°Ô∏è Maximum overfitting prevention enabled!
‚òï Time for a quick coffee!
üïê Training started at: 18:27:26


Step,Training Loss,Validation Loss,Accuracy,F1
200,0.2169,0.452527,0.792716,0.78382
400,0.0319,0.240435,0.921375,0.920886


Step,Training Loss,Validation Loss,Accuracy,F1
200,0.2169,0.452527,0.792716,0.78382
400,0.0319,0.240435,0.921375,0.920886
600,0.0102,0.227501,0.941627,0.941427
800,0.0129,0.288567,0.929033,0.928674



üéâ ULTRA-CONSERVATIVE TRAINING COMPLETED!
üïê Training finished at: 18:41:54
‚è±Ô∏è Total training time: 14.5 minutes
üìä Final training loss: 0.1439

üìä Evaluating trained model on test set...


üéØ FINAL TEST SET RESULTS:
üìà Accuracy: 0.9479 (94.79%)
üìä F1-Score (Weighted): 0.9478
üìä F1-Score (Macro): 0.9478
üìâ Test Loss: 0.2060
‚úÖ GOOD: Accuracy in healthy range - likely good generalization

üìã Confusion Matrix:
   True REAL ‚Üí Predicted REAL: 2,938
   True REAL ‚Üí Predicted FAKE: 0
   True FAKE ‚Üí Predicted REAL: 306
   True FAKE ‚Üí Predicted FAKE: 2,632

üéØ Confidence Analysis:
   Average confidence: 98.6%
   High confidence (>90%): 5,658/5,876 (96.3%)
   Medium confidence (70-90%): 141/5,876 (2.4%)
   Low confidence (<70%): 77/5,876 (1.3%)

üíæ Saving trained model to Google Drive...
‚úÖ Model saved to: /content/drive/MyDrive/fake_news_project/trained_model

üìã Creating training metadata...
‚úÖ Training metadata saved

üß™ Testing trained model with sample predictions...
üéØ These should predict CORRECTLY if overfitting is fixed!
üìù Sample Predictions (Overfitting Test):
-----------------------------------------------------------------------------