## üìã Step 1: Check GPU & System Info

In [1]:
import torch
import psutil

print("üîç Checking GPU...")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    print(f"‚úÖ GPU Available: {gpu_name}")
    print(f"   CUDA Version: {torch.version.cuda}")
    print(f"   PyTorch Version: {torch.__version__}")
    print(f"   GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

    if 'A100' in gpu_name or 'V100' in gpu_name:
        print("\n‚ö° High-end GPU detected! Estimated time: 4-8 hours")
    elif 'T4' in gpu_name:
        print("\n‚è∞ T4 GPU detected. Estimated time: 10-12 hours")
    else:
        print(f"\n‚è∞ {gpu_name} detected. Estimated time: 8-10 hours")
else:
    print("‚ùå No GPU available!")
    print("   Please enable GPU: Runtime > Change runtime type > T4 GPU")
    raise RuntimeError("GPU required for training")

ram_gb = psutil.virtual_memory().total / 1e9
print(f"\nüíæ RAM Available: {ram_gb:.2f} GB")
if ram_gb < 12:
    print("‚ö†Ô∏è WARNING: Low RAM. Training may be slower.")

üîç Checking GPU...
‚úÖ GPU Available: Tesla T4
   CUDA Version: 12.6
   PyTorch Version: 2.9.0+cu126
   GPU Memory: 15.83 GB

‚è∞ T4 GPU detected. Estimated time: 10-12 hours

üíæ RAM Available: 13.61 GB


## üì¶ Step 2: Install Dependencies

In [2]:
%%capture
# Install all required packages (output suppressed for cleaner display)
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers>=4.35.0
!pip install -q datasets>=2.14.0
!pip install -q scikit-learn>=1.3.0
!pip install -q matplotlib seaborn pandas numpy
!pip install -q pillow>=10.0.0
!pip install -q timm>=0.9.0
!pip install -q accelerate>=0.24.0

print("‚úÖ All dependencies installed successfully!")

## üìÇ Step 3: Clone Repository

In [None]:
import os

if not os.path.exists('/content/FarmFederate-Advisor'):
    print("üì• Cloning repository...")
    !git clone https://github.com/Solventerritory/FarmFederate-Advisor.git
    print("‚úÖ Repository cloned successfully")
else:
    print("‚úÖ Repository already exists")
    print("   Pulling latest changes...")
    !cd FarmFederate-Advisor && git pull

os.chdir('/content/FarmFederate-Advisor/backend')
print(f"\nüìÇ Working directory: {os.getcwd()}")

üì• Cloning repository...
Cloning into 'FarmFederate-Advisor'...
remote: Enumerating objects: 2132, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 2132 (delta 6), reused 18 (delta 6), pack-reused 2108 (from 1)[K
Receiving objects: 100% (2132/2132), 182.90 MiB | 16.06 MiB/s, done.
Resolving deltas: 100% (361/361), done.


## üíæ Step 4: Mount Google Drive (Optional - Recommended)

**Why mount Drive?**
- Saves results even if Colab disconnects
- Easy access to plots and metrics
- Can resume training if interrupted

In [None]:
from google.colab import drive

try:
    drive.mount('/content/drive')

    # Create output directories
    output_dir = '/content/drive/MyDrive/FarmFederate_Results'
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(f"{output_dir}/checkpoints", exist_ok=True)
    os.makedirs(f"{output_dir}/plots", exist_ok=True)
    os.makedirs(f"{output_dir}/results", exist_ok=True)

    print(f"‚úÖ Google Drive mounted successfully")
    print(f"üìÅ Results will be saved to: {output_dir}")

    DRIVE_MOUNTED = True
except Exception as e:
    print(f"‚ö†Ô∏è Could not mount Drive: {e}")
    print("   Training will continue, but results won't be backed up to Drive")
    DRIVE_MOUNTED = False

## üöÄ Step 5: Start Complete Training

**This will train:**
- 13 LLM models (T5, GPT-2, BERT, RoBERTa, ALBERT, DistilBERT, XLNet, ELECTRA, DeBERTa)
- 13 ViT models (ViT, DeiT, Swin, BEiT, ConvNeXt, ResNet, EfficientNet, RegNet, MobileNet)
- 2 VLM models (CLIP-Base, CLIP-Large)

**Each model trains in:**
1. **Federated mode** (5 clients, 10 rounds)
2. **Centralized mode** (10 epochs)

**Auto-saves after each model completes!**

---

**‚ö†Ô∏è Training will take 6-12 hours depending on GPU.**

**If interrupted:** Just re-run this cell - training will automatically resume from last checkpoint!

In [None]:
print("="*80)
print("üöÄ STARTING COMPLETE TRAINING PIPELINE")
print("="*80)
print("\nüìä Training Configuration:")
print("   - 28 models (13 LLM + 13 ViT + 2 VLM)")
print("   - 56 total runs (federated + centralized)")
print("   - Auto-checkpoint after each model")
print("   - Results saved continuously")
print("\n‚è±Ô∏è This will take several hours. Progress is logged below.")
print("   You can check results/ folder for completed models.\n")
print("="*80)

# Run the complete training script
!python federated_complete_training.py

print("\n" + "="*80)
print("‚úÖ TRAINING PIPELINE COMPLETED!")
print("="*80)

## üìä Step 6: Copy Results to Google Drive

In [None]:
if DRIVE_MOUNTED:
    print("üì§ Copying results to Google Drive...\n")

    # Copy results
    !cp -r ../results/* /content/drive/MyDrive/FarmFederate_Results/results/ 2>/dev/null
    print("‚úÖ Results copied")

    # Copy plots
    !cp -r ../plots/* /content/drive/MyDrive/FarmFederate_Results/plots/ 2>/dev/null
    print("‚úÖ Plots copied")

    # Copy checkpoints
    !cp -r ../checkpoints/* /content/drive/MyDrive/FarmFederate_Results/checkpoints/ 2>/dev/null
    print("‚úÖ Checkpoints copied")

    print("\nüìÅ All results saved to: /content/drive/MyDrive/FarmFederate_Results/")
    print("   You can access these files from your Google Drive!")
else:
    print("‚ö†Ô∏è Drive not mounted. Results are in local Colab storage:")
    print("   - /content/FarmFederate-Advisor/results/")
    print("   - /content/FarmFederate-Advisor/plots/")
    print("   - /content/FarmFederate-Advisor/checkpoints/")

## üìà Step 7: View Training Results Summary

In [None]:
import json
from IPython.display import display, Image, HTML

# Load results
try:
    with open('../results/all_results.json', 'r') as f:
        results = json.load(f)

    print("="*80)
    print("üìä TRAINING RESULTS SUMMARY")
    print("="*80)
    print(f"\n‚úÖ Successfully trained {len(results)} models\n")

    # Separate federated and centralized
    fed_results = [r for r in results if 'centralized' not in r['model_name']]
    cent_results = [r for r in results if 'centralized' in r['model_name']]

    # Top 5 models
    print("üèÜ TOP 5 FEDERATED MODELS:")
    print("-" * 80)
    sorted_fed = sorted(fed_results, key=lambda x: x['final_metrics']['f1_macro'], reverse=True)
    for i, r in enumerate(sorted_fed[:5], 1):
        model_type = r['config']['model_type'].upper()
        print(f"{i}. {r['config']['name']:30} ({model_type:3}) - F1: {r['final_metrics']['f1_macro']:.4f}, Acc: {r['final_metrics']['accuracy']:.4f}")

    print("\nüèÜ TOP 5 CENTRALIZED MODELS:")
    print("-" * 80)
    sorted_cent = sorted(cent_results, key=lambda x: x['final_metrics']['f1_macro'], reverse=True)
    for i, r in enumerate(sorted_cent[:5], 1):
        model_type = r['config']['model_type'].upper()
        base_name = r['config']['name']
        print(f"{i}. {base_name:30} ({model_type:3}) - F1: {r['final_metrics']['f1_macro']:.4f}, Acc: {r['final_metrics']['accuracy']:.4f}")

    # Overall comparison
    avg_fed_f1 = sum(r['final_metrics']['f1_macro'] for r in fed_results) / len(fed_results)
    avg_cent_f1 = sum(r['final_metrics']['f1_macro'] for r in cent_results) / len(cent_results)

    print("\nüìä OVERALL COMPARISON:")
    print("-" * 80)
    print(f"Average Federated F1:    {avg_fed_f1:.4f}")
    print(f"Average Centralized F1:  {avg_cent_f1:.4f}")
    print(f"Difference:              {avg_fed_f1 - avg_cent_f1:+.4f}")

    if avg_fed_f1 > avg_cent_f1:
        print("\n‚ú® Federated learning outperforms centralized on average!")
    else:
        gap = (avg_cent_f1 - avg_fed_f1) / avg_cent_f1 * 100
        print(f"\nüìâ Federated is {gap:.2f}% behind centralized (privacy-accuracy tradeoff)")

    print("\n" + "="*80)

except FileNotFoundError:
    print("‚ùå Results file not found. Make sure training completed successfully.")

## üìä Step 8: Display Comparison Plots

In [None]:
import glob
from IPython.display import Image, display

print("üìä Displaying comparison plots...\n")

# Get all plot files
plot_files = sorted(glob.glob('../plots/*.png'))

if plot_files:
    print(f"Found {len(plot_files)} plots\n")

    # Display first 10 plots
    for i, plot_path in enumerate(plot_files[:10], 1):
        plot_name = plot_path.split('/')[-1]
        print(f"\n{'='*80}")
        print(f"Plot {i}: {plot_name}")
        print('='*80)
        display(Image(filename=plot_path, width=1000))

    if len(plot_files) > 10:
        print(f"\n... and {len(plot_files) - 10} more plots available in ../plots/")
else:
    print("‚ùå No plots found. Check if training completed successfully.")

## üì• Step 9: Download Results (Optional)

In [None]:
# Zip all results for easy download
print("üì¶ Creating results archive...\n")

!cd /content/FarmFederate-Advisor && zip -r FarmFederate_Results.zip results plots checkpoints -q

print("‚úÖ Archive created: FarmFederate_Results.zip")
print("\nüì• Downloading...")

from google.colab import files
files.download('/content/FarmFederate-Advisor/FarmFederate_Results.zip')

print("\n‚úÖ Download started! Check your browser downloads.")

## üîÑ Troubleshooting & Tips

### If Training is Interrupted:
Just re-run Step 5. The system automatically:
- Detects completed models
- Skips them
- Continues from where it stopped

### Out of Memory Error:
The batch sizes are optimized for T4 GPU. If you get OOM errors:
1. Restart runtime
2. Training will resume from last checkpoint

### Check Training Progress:
```python
# Run this in a new cell to check progress
!ls -lh ../results/*.json | wc -l
```

### Access Results from Drive:
After training, access your results at:
https://drive.google.com/drive/MyDrive/FarmFederate_Results