In [None]:
# ===============================
# 1️⃣ Mount Google Drive
# ===============================
from google.colab import drive
drive.mount('/content/drive')

# ===============================
# 1.1️⃣ Explore Google Drive Structure
# ===============================
print("📁 Exploring your Google Drive structure:")
!ls /content/drive/MyDrive/
print("\n🔍 If you need to check subdirectories, run:")
print("!ls /content/drive/MyDrive/your-folder-name/")

# ===============================
# 2️⃣ Install YOLOv5
# ===============================
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt

# ===============================
# 3️⃣ Check GPU and Memory
# ===============================
!nvidia-smi
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

# ===============================
# 4️⃣ Unzip Dataset from Roboflow
# ===============================
import os

# ===============================
# 📂 CONFIGURE YOUR DATASET PATHS HERE
# ===============================
# UPDATE THESE PATHS TO YOUR ACTUAL DATASET LOCATION

# Example paths - CHANGE THESE TO MATCH YOUR FILES:
# Option 1: Dataset directly in MyDrive root
DATASET_ZIP_PATH = '/content/drive/MyDrive/Datasets/Pipeline/corrosion_images.zip'

# Option 2: Dataset in a subfolder
# DATASET_ZIP_PATH = '/content/drive/MyDrive/datasets/your-dataset-name.zip'

# Option 3: Dataset in nested folders
# DATASET_ZIP_PATH = '/content/drive/MyDrive/AI_Projects/YOLO/corrosion_data.zip'

DATASET_EXTRACT_PATH = '/content/'
DATASET_NAME = 'your-extracted-folder-name'  # This will be the folder name after unzipping

print("🎯 Current dataset configuration:")
print(f"   📦 Looking for zip at: {DATASET_ZIP_PATH}")
print(f"   📂 Will extract to: {DATASET_EXTRACT_PATH}")
print(f"   📁 Expected folder name: {DATASET_NAME}")
print("\n⚠️  IMPORTANT: Update the paths above before running training!")

# Check if dataset zip exists
if os.path.exists(DATASET_ZIP_PATH):
    print(f"✅ Found dataset at: {DATASET_ZIP_PATH}")
    print("📤 Extracting dataset...")
    !unzip {DATASET_ZIP_PATH} -d {DATASET_EXTRACT_PATH}
    print("✅ Dataset extracted successfully!")
else:
    print(f"❌ Dataset not found at: {DATASET_ZIP_PATH}")
    print("\n🔧 TROUBLESHOOTING:")
    print("1. Check if your zip file exists in Google Drive")
    print("2. Update DATASET_ZIP_PATH variable above")
    print("3. Make sure the path starts with '/content/drive/MyDrive/'")
    print("\n📁 Available files in MyDrive root:")
    !ls /content/drive/MyDrive/ | head -20

# ===============================
# 5️⃣ Verify Dataset Structure
# ===============================
DATASET_FULL_PATH = f'{DATASET_EXTRACT_PATH}{DATASET_NAME}'
DATA_YAML_PATH = f'{DATASET_FULL_PATH}/data.yaml'

print(f"Checking dataset structure at: {DATASET_FULL_PATH}")
!ls {DATASET_FULL_PATH}

# Check if data.yaml exists
if os.path.exists(DATA_YAML_PATH):
    print("✅ data.yaml found!")
    print("📋 Dataset configuration:")
    !cat {DATA_YAML_PATH}
else:
    print(f"❌ data.yaml not found at: {DATA_YAML_PATH}")
    print("Please check your dataset structure and update paths accordingly")

# ===============================
# 6️⃣ Train YOLOv5 with On-the-Fly Augmentation
# ===============================
# Only proceed if dataset is properly set up
if os.path.exists(DATA_YAML_PATH):
    print("🚀 Starting YOLOv5 training...")

    # YOLOv5 built-in augmentations: flip, rotation, scale, HSV changes, mosaic, mixup
    # hyp.scratch-high.yaml provides aggressive augmentation settings
    !python train.py \
      --img 640 \
      --batch 8 \
      --epochs 100 \
      --data {DATA_YAML_PATH} \
      --weights yolov5s.pt \
      --hyp data/hyps/hyp.scratch-high.yaml \
      --project /content/drive/MyDrive/yolo_runs \
      --name corrosion_model \
      --save-period 10

    print("🎉 Training completed!")
else:
    print("⚠️ Cannot start training - dataset configuration issues detected")

# ===============================
# 7️⃣ Validate Trained Model
# ===============================
BEST_WEIGHTS_PATH = '/content/drive/MyDrive/yolo_runs/corrosion_model/weights/best.pt'

if os.path.exists(BEST_WEIGHTS_PATH):
    print("✅ Found trained weights! Running validation...")
    !python val.py \
      --weights {BEST_WEIGHTS_PATH} \
      --data {DATA_YAML_PATH} \
      --img 640 \
      --task val

    print("📊 Validation completed!")

    # Show results location
    print(f"📁 Your trained model is saved at: {BEST_WEIGHTS_PATH}")
    print("📈 Training results and plots are in: /content/drive/MyDrive/yolo_runs/corrosion_model/")
else:
    print("❌ Training weights not found. Check if training completed successfully.")

# ===============================
# 8️⃣ Test Inference (Optional)
# ===============================
# Uncomment the lines below to test your model on sample images
"""
# Test on webcam (if available)
!python detect.py --weights {BEST_WEIGHTS_PATH} --source 0

# Test on images folder
!python detect.py --weights {BEST_WEIGHTS_PATH} --source /path/to/test/images --save-txt --save-conf

# Test on single image
!python detect.py --weights {BEST_WEIGHTS_PATH} --source /path/to/single/image.jpg --save-txt --save-conf
"""

print("="*50)
print("🔧 TROUBLESHOOTING TIPS:")
print("- If you get CUDA out of memory: reduce batch size to 4 or 2")
print("- If training is slow: try using a smaller model like yolov5n.pt")
print("- If accuracy is low: increase epochs or adjust augmentation parameters")
print("- Check /content/drive/MyDrive/yolo_runs/corrosion_model/train/weights/ for all saved weights")
print("\n📂 GOOGLE DRIVE DATASET SETUP GUIDE:")
print("1. Upload your dataset zip to Google Drive")
print("2. Note the exact path (e.g., MyDrive/datasets/my_data.zip)")
print("3. Update DATASET_ZIP_PATH variable at the top")
print("4. Update DATASET_NAME to match your extracted folder")
print("\n🎯 COMMON GOOGLE DRIVE PATHS:")
print("- Root folder: /content/drive/MyDrive/filename.zip")
print("- In subfolder: /content/drive/MyDrive/folder/filename.zip")
print("- Multiple levels: /content/drive/MyDrive/AI/Projects/data.zip")
print("="*50)