# 🚀 Hyperspectral Plastic Classification Pipeline
## Google Colab Pro+ Edition

This notebook runs the complete pipeline for hyperspectral plastic classification.

**Features:**
- 🎯 6 Model Architectures (CNN, ResNet, Deep, Inception, LSTM, Transformer)
- ⚡ GPU Acceleration (CUDA)
- 📊 Complete Training & Inference Pipeline
- 💾 Automatic Results Download

**Requirements:**
- Google Colab Pro+ (for best performance)
- Data uploaded to Google Drive
- ~20-30 GB free space on Drive

## 📋 Step 1: Setup & Mount Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Set working directory
import os
os.chdir('/content/drive/MyDrive/plastic-type-classification')
print("✓ Working directory:", os.getcwd())

## 🎮 Step 2: Check GPU Availability

In [None]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("GPU Memory:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")
else:
    print("⚠️ No GPU detected! Go to Runtime > Change runtime type > GPU")

## 📦 Step 3: Install Dependencies (if needed)

In [None]:
# Most packages are pre-installed in Colab, but verify
!pip install -q scipy tqdm matplotlib pillow
print("✓ Dependencies installed")

## 📂 Step 4: Verify Data Structure

In [None]:
# Check required folders and files
import os
from pathlib import Path

required = [
    'training_dataset/header.json',
    'Ground_Truth/labels.png',
    'Ground_Truth/labels.json',
    'Inference_dataset1/header.json',
    'run_pipeline_config.py'
]

print("Checking data structure...")
all_good = True
for item in required:
    exists = Path(item).exists()
    status = "✓" if exists else "✗"
    print(f"{status} {item}")
    if not exists:
        all_good = False

# Count band files
train_bands = len(list(Path('training_dataset').glob('ImagesStack*.png')))
infer_bands = len(list(Path('Inference_dataset1').glob('ImagesStack*.png')))

print(f"\n✓ Training bands: {train_bands}")
print(f"✓ Inference bands: {infer_bands}")

if all_good and train_bands == 458 and infer_bands == 458:
    print("\n✅ All data files present!")
else:
    print("\n❌ Some files are missing. Please upload your data to Google Drive.")

## ⚙️ Step 5: Configure Pipeline Parameters

Edit these parameters to customize your training:

In [None]:
# ==================== CONFIGURATION ====================

# Mode
MODE = "full"  # Options: "full", "normalize", "train", "inference"

# Normalization (skip if already normalized)
SKIP_NORMALIZE = False  # Set to True if data already normalized
LOWER_PERCENTILE = 2
UPPER_PERCENTILE = 98

# Preprocessing
SPECTRAL_BINNING = 2      # 2, 5, 10, or None
SPATIAL_BINNING = None    # 2, 4, 8, or None
WAVELENGTH_RANGE = None   # e.g., (450, 700) or None
DENOISE = False
DENOISE_METHOD = "gaussian"  # "gaussian" or "median"
DENOISE_STRENGTH = 1.0

# Model
MODEL_TYPE = "resnet"  # Options: "cnn", "resnet", "deep", "inception", "lstm", "transformer"
DROPOUT = 0.3

# Training
EPOCHS = 50
LEARNING_RATE = 0.001
BATCH_SIZE = 512  # Colab can handle larger batches
VAL_RATIO = 0.2

# ======================================================

print("Configuration:")
print(f"  Mode: {MODE}")
print(f"  Model: {MODEL_TYPE}")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Spectral Binning: {SPECTRAL_BINNING}")
print(f"  Spatial Binning: {SPATIAL_BINNING}")

## 🚀 Step 6: Run Pipeline

In [None]:
# Build command
cmd = f"python run_pipeline_config.py --mode {MODE}"

if SKIP_NORMALIZE:
    cmd += " --skip-normalize"

cmd += f" --lower-percentile {LOWER_PERCENTILE}"
cmd += f" --upper-percentile {UPPER_PERCENTILE}"

if SPECTRAL_BINNING:
    cmd += f" --spectral-binning {SPECTRAL_BINNING}"

if SPATIAL_BINNING:
    cmd += f" --spatial-binning {SPATIAL_BINNING}"

if WAVELENGTH_RANGE:
    cmd += f" --wavelength-range {WAVELENGTH_RANGE[0]} {WAVELENGTH_RANGE[1]}"

if DENOISE:
    cmd += f" --denoise --denoise-method {DENOISE_METHOD} --denoise-strength {DENOISE_STRENGTH}"

cmd += f" --model-type {MODEL_TYPE}"
cmd += f" --dropout {DROPOUT}"
cmd += f" --epochs {EPOCHS}"
cmd += f" --lr {LEARNING_RATE}"
cmd += f" --batch-size {BATCH_SIZE}"
cmd += f" --val-ratio {VAL_RATIO}"

print("Running command:")
print(cmd)
print("\n" + "="*60)

# Run pipeline
!{cmd}

## 📊 Step 7: View Results

In [None]:
import json
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path

# Load training history
history_path = Path('output/training/training_history.json')
if history_path.exists():
    with open(history_path, 'r') as f:
        history = json.load(f)
    
    # Plot training curves
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    epochs = range(1, len(history['train_losses']) + 1)
    
    # Loss
    ax1.plot(epochs, history['train_losses'], 'b-', label='Train Loss')
    ax1.plot(epochs, history['val_losses'], 'r-', label='Val Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training and Validation Loss')
    ax1.legend()
    ax1.grid(True)
    
    # Accuracy
    ax2.plot(epochs, history['train_accs'], 'b-', label='Train Acc')
    ax2.plot(epochs, history['val_accs'], 'r-', label='Val Acc')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.set_title('Training and Validation Accuracy')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nBest Validation Accuracy: {history['best_val_acc']:.2f}% (Epoch {history['best_epoch']})")
else:
    print("Training history not found. Run training first.")

# Show inference predictions
pred_path = Path('output/inference/predictions.png')
if pred_path.exists():
    print("\n" + "="*60)
    print("Inference Predictions:")
    print("="*60)
    
    img = Image.open(pred_path)
    plt.figure(figsize=(12, 8))
    plt.imshow(img)
    plt.axis('off')
    plt.title('Predicted Plastic Types')
    plt.tight_layout()
    plt.show()
else:
    print("\nPredictions not found. Run inference first.")

## 📈 Step 8: View Inference Statistics

In [None]:
# Load inference statistics
stats_path = Path('output/inference/inference_statistics.json')
if stats_path.exists():
    with open(stats_path, 'r') as f:
        stats = json.load(f)
    
    print("Inference Statistics:")
    print("="*60)
    print(f"Total pixels: {stats['total_pixels']:,}\n")
    print("Class Distribution:")
    print("-"*60)
    print(f"{'Class':<15} {'Pixels':>12} {'Percentage':>12} {'Confidence':>12}")
    print("-"*60)
    
    for class_id, info in sorted(stats['class_distribution'].items()):
        print(f"{info['class_name']:<15} {info['pixel_count']:>12,} {info['percentage']:>11.2f}% {info['mean_confidence']:>11.3f}")
else:
    print("Statistics not found. Run inference first.")

## 💾 Step 9: Download Results (Optional)

In [None]:
# Create ZIP of results
!zip -r results.zip output/

# Download to local machine
from google.colab import files
files.download('results.zip')

print("✓ Results downloaded as results.zip")

## 🧹 Step 10: Cleanup (Optional)

Remove large temporary files to free up space:

In [None]:
# Remove normalized data (can be regenerated)
!rm -rf training_dataset_normalized/
!rm -rf Inference_dataset1_normalized/

print("✓ Normalized data removed (can be regenerated)")
print("  Model and results are preserved in output/")

## 🔬 Quick Experiments: Try Different Models

After normalizing once, quickly try different models:

In [None]:
# Try different models quickly (data already normalized)
models = ["cnn", "resnet", "deep", "inception", "transformer"]

for model in models:
    print(f"\n{'='*60}")
    print(f"Training with {model.upper()} model")
    print(f"{'='*60}")
    
    cmd = f"python run_pipeline_config.py --mode train --skip-normalize "
    cmd += f"--model-type {model} --epochs 20 --batch-size 512"
    
    !{cmd}
    
    print(f"\n✓ {model.upper()} training complete\n")