## Step 1: Configuration

Update `DATASET_NAME` to match your Kaggle dataset name

In [None]:
import os
import sys
import shutil
import subprocess
from pathlib import Path
import pandas as pd
from datetime import datetime

# ============ CONFIGURATION ============
DATASET_NAME = "fsod-coco-data"  # üëà CHANGE TO YOUR KAGGLE DATASET NAME
NUM_EPISODES = 100               # üëà Change to 1000+ for real training
DEVICE = "cuda"                  # GPU is always available on Kaggle
# =======================================

print("‚úÖ Configuration loaded")
print(f"   Dataset: {DATASET_NAME}")
print(f"   Episodes: {NUM_EPISODES}")
print(f"   Device: {DEVICE}")
print(f"   Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## Step 2: Clone FSOD Repository

In [None]:
print("üì• Setting up FSOD repository...")
os.chdir("/kaggle/working")

# Clone repository
if not os.path.exists("fsod"):
    print("   Cloning repository...")
    result = subprocess.run(
        ["git", "clone", "--depth", "1", 
         "https://github.com/yourusername/fsod.git"],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        print(f"‚ùå Clone failed: {result.stderr}")
        print("   Make sure to replace 'yourusername' with your GitHub username")
    else:
        print("   ‚úì Repository cloned")
else:
    print("   ‚úì Repository already exists")

os.chdir("fsod")
print(f"\n‚úÖ Working directory: {os.getcwd()}")

## Step 3: Install Dependencies

In [None]:
print("üì¶ Installing dependencies...")
result = subprocess.run(
    [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"],
    capture_output=True,
    text=True
)

if result.returncode == 0:
    print("‚úÖ All dependencies installed successfully")
else:
    print(f"‚ùå Installation failed: {result.stderr}")
    sys.exit(1)

## Step 4: Prepare Dataset

In [None]:
print("üìä Preparing dataset...\n")

dataset_path = Path(f"/kaggle/input/{DATASET_NAME}")
print(f"Looking for dataset at: {dataset_path}")

if not dataset_path.exists():
    print(f"\n‚ùå ERROR: Dataset not found!")
    print(f"\nPossible causes:")
    print(f"  1. Wrong dataset name: '{DATASET_NAME}'")
    print(f"  2. Dataset not added to notebook")
    print(f"\nFix: Click 'Add Data' ‚Üí Search your dataset ‚Üí Click 'Add'")
    sys.exit(1)

# Create data directory
os.makedirs("data", exist_ok=True)

# Copy JSON files
print("\nCopying COCO annotations...")
for file in ["train_coco.json", "val_coco.json"]:
    src = dataset_path / file
    dst = Path("data") / file
    if src.exists():
        shutil.copy(src, dst)
        print(f"   ‚úì {file}")
    else:
        print(f"   ‚ö†Ô∏è  Missing: {file}")

# Copy image directories
print("\nCopying images...")
for dir_name in ["train_images", "val_images"]:
    src = dataset_path / dir_name
    if src.exists():
        shutil.copytree(src, f"data/{dir_name}", dirs_exist_ok=True)
        count = len(list(Path(f"data/{dir_name}").glob("*")))
        print(f"   ‚úì {dir_name}: {count} images")
    else:
        print(f"   ‚ö†Ô∏è  Missing: {dir_name}")

print("\n‚úÖ Data preparation complete!")

## Step 5: Check GPU

In [None]:
import torch

print("üéÆ GPU Information:")
print(f"   CUDA Available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print(f"   GPU Name: {torch.cuda.get_device_name(0)}")
    
    props = torch.cuda.get_device_properties(0)
    memory_gb = props.total_memory / 1e9
    print(f"   Memory: {memory_gb:.1f} GB")
    print(f"   CUDA Capability: {props.major}.{props.minor}")
    print(f"\n‚úÖ GPU ready for training!")
else:
    print(f"\n‚ùå No GPU detected!")
    print(f"   Go to top right ‚Üí Accelerator ‚Üí Select 'GPU'")
    print(f"   Then restart this notebook")
    sys.exit(1)

## Step 6: Train Model

This step trains the FSOD model on your dataset.

In [None]:
print("\n" + "="*70)
print("üèãÔ∏è  STARTING TRAINING")
print("="*70)
print(f"Episodes: {NUM_EPISODES}")
print(f"Device: {DEVICE}")
print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70)

# Train
train_cmd = f"python train.py --device {DEVICE} --num_episodes {NUM_EPISODES} --pretrained"
exit_code = os.system(train_cmd)

if exit_code == 0:
    print("\n" + "="*70)
    print("‚úÖ TRAINING COMPLETED SUCCESSFULLY!")
    print("="*70)
    print(f"Model saved to: checkpoints/best_model.pth")
    print(f"Finished at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
else:
    print("\n" + "="*70)
    print("‚ùå TRAINING FAILED!")
    print("="*70)
    sys.exit(1)

## Step 7: Test Inference (Single Image)

In [None]:
print("\nüéØ Testing inference on single image...\n")

# Get sample images
train_images = sorted(Path("data/train_images").glob("*.jpg"))[:5]
val_images = sorted(Path("data/val_images").glob("*.jpg"))[:3]

print(f"Found {len(train_images)} training images")
print(f"Found {len(val_images)} validation images")

if len(train_images) >= 2 and len(val_images) >= 1:
    # Prepare support and query images
    support_imgs = " ".join([str(img) for img in train_images[:2]])
    query_img = str(val_images[0])
    
    print(f"\nSupport images: {train_images[0].name}, {train_images[1].name}")
    print(f"Query image: {val_images[0].name}")
    
    # Run inference
    cmd = f"""python inference.py --mode single \
        --model_path checkpoints/best_model.pth \
        --support_img {support_imgs} \
        --query_image {query_img} \
        --output_dir output \
        --device {DEVICE} \
        --score_threshold 0.3"""
    
    os.system(cmd)
    print("\n‚úÖ Inference test complete!")
else:
    print("\n‚ö†Ô∏è  Not enough images for inference test")
    print(f"   Need: 2 support + 1 query")
    print(f"   Have: {len(train_images)} support + {len(val_images)} query")

## Step 8: Batch Inference (All Validation Images)

In [None]:
print("\nüìä Running batch inference...\n")

# Use first 3 training images as support set
train_images = sorted(Path("data/train_images").glob("*.jpg"))[:3]
support_imgs = " ".join([str(img) for img in train_images])

print(f"Support set: {len(train_images)} images")
for img in train_images:
    print(f"   ‚Ä¢ {img.name}")

print(f"\nProcessing: data/val_images/")

# Run batch inference
cmd = f"""python inference.py --mode batch \
    --model_path checkpoints/best_model.pth \
    --support_img {support_imgs} \
    --query_dir data/val_images/ \
    --output_csv results.csv \
    --device {DEVICE}"""

os.system(cmd)

# Show results
results_csv = Path("results.csv")
if results_csv.exists():
    df = pd.read_csv("results.csv")
    print(f"\n‚úÖ Batch inference complete!")
    print(f"\nüìà Results:")
    print(f"   Total detections: {len(df)}")
    print(f"   Unique images: {df['filename'].nunique()}")
    print(f"   Average score: {df['similarity_score'].mean():.4f}")
    print(f"\n   Top 10 detections:")
    print(df.nlargest(10, 'similarity_score')[['filename', 'class_name', 'similarity_score']].to_string(index=False))
else:
    print("‚ö†Ô∏è  Results file not created")

## Step 9: Download Results

Your trained model and results are ready to download!

In [None]:
print("\n" + "="*70)
print("üì• FILES READY FOR DOWNLOAD")
print("="*70)

# Check what files exist
files_info = []

# Model checkpoint
model_file = Path("checkpoints/best_model.pth")
if model_file.exists():
    size_mb = model_file.stat().st_size / 1e6
    files_info.append(("‚úì Model Checkpoint", "checkpoints/best_model.pth", f"{size_mb:.1f} MB"))

# Inference outputs
output_dir = Path("output")
if output_dir.exists():
    count = len(list(output_dir.glob("*")))
    size_mb = sum(f.stat().st_size for f in output_dir.rglob("*")) / 1e6
    files_info.append((f"‚úì Inference Output", "output/", f"{count} files, {size_mb:.1f} MB"))

# CSV results
csv_file = Path("results.csv")
if csv_file.exists():
    size_mb = csv_file.stat().st_size / 1e6
    files_info.append(("‚úì CSV Results", "results.csv", f"{size_mb:.1f} MB"))

# Print table
print("\nAvailable files:")
for name, path, size in files_info:
    print(f"   {name:<25} {path:<30} {size}")

print("\nüìù How to download:")
print("   1. Click 'Output' tab on the right side")
print("   2. Download the 'fsod' folder")
print("   3. Extract and use locally")

print("\n‚úÖ Training & inference complete!")
print(f"   Finished at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")