# SmartCrop AI - Complete Training Pipeline


**Steps:**
1. Mount Google Drive
2. Setup project directory
3. Install dependencies
4. Verify dataset structure
5. Train models
6. Run predictions
7. Export models


## Step 1: Mount Google Drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')


## Step 2: Setup Project Directory


In [None]:
import os

# Set your Google Drive folder path
PROJECT_DIR = '/content/drive/MyDrive/SmartCrop-AI'
os.chdir(PROJECT_DIR)

# Extract project if needed (uncomment if you uploaded as zip)
# !unzip -q smartcrop-ai-colab.zip -d .

# Navigate to AI directory
os.chdir('smartcrop-ai/ai')
print(f"Current directory: {os.getcwd()}")
!ls -la


## Step 3: Install Dependencies


In [None]:
# Install PyTorch with CUDA support
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install computer vision libraries
!pip install -q opencv-python albumentations ultralytics segment-anything

# Install model export tools
!pip install -q onnx onnxruntime tensorflow

# Install data processing libraries
!pip install -q pandas scikit-learn scikit-image

# Install visualization libraries
!pip install -q matplotlib seaborn grad-cam

# Install utilities
!pip install -q pyyaml omegaconf tqdm requests

print("\n✓ All dependencies installed!")


In [None]:
# Verify installation and GPU
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    !nvidia-smi
else:
    print("⚠️  No GPU detected. Training will be slow on CPU.")
    print("Go to Runtime → Change runtime type → GPU")


## Step 4: Verify Dataset Structure


In [None]:
# Verify dataset structure
import os
from pathlib import Path

data_dir = Path('data/raw')
print("Checking dataset structure...")
print(f"Train folder exists: {(data_dir / 'train').exists()}")
print(f"Val folder exists: {(data_dir / 'val').exists()}")
print(f"Test folder exists: {(data_dir / 'test').exists()}")

# Count samples
if (data_dir / 'train').exists():
    train_crops = [d.name for d in (data_dir / 'train').iterdir() if d.is_dir()]
    print(f"\n✓ Found {len(train_crops)} crops in training set")
    print(f"Sample crops: {train_crops[:5]}")
    
    # Count images in first crop
    if train_crops:
        first_crop = data_dir / 'train' / train_crops[0]
        diseases = [d.name for d in first_crop.iterdir() if d.is_dir()]
        if diseases:
            sample_count = len(list((first_crop / diseases[0]).glob('*.jpg'))) + \
                          len(list((first_crop / diseases[0]).glob('*.JPG')))
            print(f"Sample: {train_crops[0]}/{diseases[0]} has {sample_count} images")

print("\n✓ Dataset is ready for training!")


## Step 5: (Optional) Reduce Dataset Size


In [None]:
# (Optional) Reduce dataset for faster training
# Skip this cell if you want to use the full dataset
# This keeps small classes intact and reduces large classes

# Uncomment the line below to run reduction:
# !python scripts/reduce_dataset.py

# When prompted, type 'y' to proceed
print("Skipping dataset reduction. Uncomment the line above to reduce dataset size.")


In [None]:
# This cell is not needed since dataset is already organized
# Dataset structure is already in data/raw/train/, data/raw/val/, data/raw/test/
pass


In [None]:
# Dataset is already organized - no need to run this
# If you need to reorganize, uncomment below:
# !python scripts/organize_datasets.py
pass


In [None]:
# This cell is not needed - reduction is in Step 5 above
pass


## Step 6: Train MobileNetV3 Model


In [None]:
# Train MobileNetV3 (on-device model)
# This will take 30-60 minutes depending on dataset size

!python train.py --model mobilenet_v3 --data-dir data/raw --epochs 10 --batch-size 32 --lr 0.001

print("\n✓ Training completed!")


In [None]:
# Check training results
!ls -lh outputs/models/checkpoints/
!tail -50 outputs/logs/training.log


## Step 7: Export Model


In [None]:
# Export MobileNetV3 to mobile formats
!python export_model.py --model mobilenet_v3 --checkpoint outputs/models/checkpoints/mobilenet_v3_best.pth

# Verify exports
!ls -lh outputs/models/*.tflite 2>/dev/null || echo "No TFLite files"
!ls -lh outputs/models/*.onnx 2>/dev/null || echo "No ONNX files"


## Step 8: Run Predictions


In [None]:
# Upload a test image
from google.colab import files
uploaded = files.upload()

# Get uploaded filename
import os
image_file = list(uploaded.keys())[0]
print(f"Testing on: {image_file}")


In [None]:
# Run prediction with heatmap
!python predict.py --image {image_file} --model outputs/models/checkpoints/mobilenet_v3_best.pth --model-type mobilenet_v3 --heatmap --output outputs/result.jpg

# Display result
from IPython.display import Image, display
display(Image('outputs/result.jpg'))


## Step 9: Save Models to Google Drive


In [None]:
# Create models directory in Drive
!mkdir -p /content/drive/MyDrive/SmartCrop-AI/models

# Copy trained models
!cp -r outputs/models/checkpoints/* /content/drive/MyDrive/SmartCrop-AI/models/

# Copy exported models
!cp outputs/models/*.tflite /content/drive/MyDrive/SmartCrop-AI/models/ 2>/dev/null || echo "No TFLite files"
!cp outputs/models/*.onnx /content/drive/MyDrive/SmartCrop-AI/models/ 2>/dev/null || echo "No ONNX files"

print("✓ Models saved to Google Drive!")
print("Location: /content/drive/MyDrive/SmartCrop-AI/models/")
