# üõ¢Ô∏è Indian Petrol Pump Analytics - YOLO11 Training

**All-in-One Notebook**: Downloads public datasets, merges with class remapping, and trains YOLO11.

| Class ID | Name | Class ID | Name |
|----------|------|----------|------|
| 0 | person | 9 | testing_jar |
| 1 | car | 10 | du_cover_open |
| 2 | motorcycle | 11 | manhole_open |
| 3 | heavy_vehicle | 12 | air_pump |
| 4 | fire | 13 | uniform |
| 5 | smoke | 14 | helmet |
| 6 | cigarette | 15 | plastic_item |
| 7 | violence | 16 | garbage |
| 8 | nozzle | 17 | cell_phone |

In [None]:
#@title 1Ô∏è‚É£ Setup & Install Dependencies
!pip install -q ultralytics gdown
import os, shutil, random, yaml, gdown, zipfile
from pathlib import Path
from collections import defaultdict
from google.colab import drive

# Mount Google Drive for saving model
drive.mount('/content/drive')

BASE_DIR = Path('/content')
DATASETS_DIR = BASE_DIR / 'source_datasets'
OUTPUT_DIR = BASE_DIR / 'Final_Dataset'
DATASETS_DIR.mkdir(exist_ok=True)
print('‚úÖ Setup complete!')

In [None]:
#@title 2Ô∏è‚É£ Master Schema & Mappings

MASTER_SCHEMA = {
    0: 'person', 1: 'car', 2: 'motorcycle', 3: 'heavy_vehicle',
    4: 'fire', 5: 'smoke', 6: 'cigarette', 7: 'violence',
    8: 'nozzle', 9: 'testing_jar', 10: 'du_cover_open', 11: 'manhole_open',
    12: 'air_pump', 13: 'uniform', 14: 'helmet', 15: 'plastic_item',
    16: 'garbage', 17: 'cell_phone'
}

# Source dataset mappings: {source_class_id: target_class_id}
DATASET_CONFIGS = {
    'fire_smoke': {
        'url': 'https://github.com/spacewalk01/fire-smoke-detection-yolov8/releases/download/v1.0/fire-smoke-dataset.zip',
        'mapping': {0: 4, 1: 5},  # fire->4, smoke->5
    },
    'violence': {
        'url': 'https://drive.google.com/uc?id=1_2LQj-FhKdXzXxjv_giQ4qv_kA4Lcnqm',
        'mapping': {0: 7},  # violence->7
    },
    'ppe': {
        'url': 'https://drive.google.com/uc?id=1MGbLfEY_rXvO61dEedG7vK7k7K0aM9ND',
        'mapping': {0: 14, 1: 13, 2: 0},  # hardhat->14, vest->13, person->0
    },
    'cigarette': {
        'url': 'https://drive.google.com/uc?id=1kZNd78UPtfMfKXSYtLl9wjCHX7lcdQFT',
        'mapping': {0: 6},  # cigarette->6
    },
    'phone': {
        'url': 'https://drive.google.com/uc?id=1yU9PgFRQRlsD9XA2djBo6xvP8l8P3KU4',
        'mapping': {0: 17},  # cell_phone->17
    },
}

print(f'‚úÖ Configured {len(DATASET_CONFIGS)} datasets')

In [None]:
#@title 3Ô∏è‚É£ Download Datasets (Public URLs - No API Key)

def download_and_extract(name, url, dest_dir):
    """Download and extract a dataset."""
    dest_dir = Path(dest_dir)
    zip_path = dest_dir / f'{name}.zip'
    extract_dir = dest_dir / name
    
    if extract_dir.exists():
        print(f'  ‚è≠Ô∏è {name} already exists, skipping...')
        return extract_dir
    
    print(f'  üì• Downloading {name}...')
    try:
        if 'drive.google.com' in url:
            gdown.download(url, str(zip_path), quiet=True)
        else:
            !wget -q -O "{zip_path}" "{url}"
        
        # Extract
        with zipfile.ZipFile(zip_path, 'r') as z:
            z.extractall(extract_dir)
        zip_path.unlink()  # Remove zip
        print(f'  ‚úÖ {name} extracted')
        return extract_dir
    except Exception as e:
        print(f'  ‚ùå Failed: {e}')
        return None

print('üîÑ Downloading datasets...')
for name, config in DATASET_CONFIGS.items():
    download_and_extract(name, config['url'], DATASETS_DIR)
print('\n‚úÖ All downloads complete!')

In [None]:
#@title 4Ô∏è‚É£ Merge Datasets with Class Remapping

def setup_output_dirs(output_dir):
    """Create output directory structure."""
    output_dir = Path(output_dir)
    for split in ['train', 'val']:
        (output_dir / 'images' / split).mkdir(parents=True, exist_ok=True)
        (output_dir / 'labels' / split).mkdir(parents=True, exist_ok=True)
    return output_dir

def remap_labels(label_path, mapping):
    """Remap class IDs in a label file."""
    if not label_path.exists():
        return []
    lines = []
    with open(label_path) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                src_id = int(parts[0])
                if src_id in mapping and mapping[src_id] is not None:
                    lines.append(f"{mapping[src_id]} {' '.join(parts[1:])}")
    return lines

def find_yolo_structure(base_path):
    """Find images/labels folders in various structures."""
    base_path = Path(base_path)
    candidates = [
        (base_path / 'images', base_path / 'labels'),
        (base_path / 'train' / 'images', base_path / 'train' / 'labels'),
        (base_path / 'data' / 'images', base_path / 'data' / 'labels'),
    ]
    # Also check subdirectories
    for subdir in base_path.iterdir():
        if subdir.is_dir():
            candidates.append((subdir / 'images', subdir / 'labels'))
    
    for img_dir, lbl_dir in candidates:
        if img_dir.exists():
            return img_dir, lbl_dir
    return None, None

def merge_dataset(name, source_dir, mapping, output_dir, stats):
    """Merge a single dataset into output."""
    source_dir = Path(source_dir)
    output_dir = Path(output_dir)
    
    img_dir, lbl_dir = find_yolo_structure(source_dir)
    if img_dir is None:
        print(f'  ‚ö†Ô∏è Could not find YOLO structure in {source_dir}')
        return 0
    
    count = 0
    img_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.webp'}
    
    for split in ['train', 'val', '']:
        img_split_dir = img_dir / split if split else img_dir
        lbl_split_dir = lbl_dir / split if split else lbl_dir
        out_split = 'train' if split in ['train', ''] else 'val'
        
        if not img_split_dir.exists():
            continue
            
        for img_path in img_split_dir.glob('*'):
            if img_path.suffix.lower() not in img_exts:
                continue
            
            lbl_path = lbl_split_dir / f'{img_path.stem}.txt'
            remapped = remap_labels(lbl_path, mapping)
            
            if not remapped:
                continue
            
            # Copy with unique name
            unique_name = f'{name}_{img_path.stem}'
            shutil.copy2(img_path, output_dir / 'images' / out_split / f'{unique_name}{img_path.suffix}')
            with open(output_dir / 'labels' / out_split / f'{unique_name}.txt', 'w') as f:
                f.write('\n'.join(remapped))
            
            count += 1
            for line in remapped:
                cls_id = int(line.split()[0])
                stats[cls_id] += 1
    
    return count

# Execute merge
print('üîÑ Merging datasets with class remapping...')
output_dir = setup_output_dirs(OUTPUT_DIR)
stats = defaultdict(int)
total = 0

for name, config in DATASET_CONFIGS.items():
    source_dir = DATASETS_DIR / name
    if source_dir.exists():
        count = merge_dataset(name, source_dir, config['mapping'], output_dir, stats)
        print(f'  ‚úÖ {name}: {count} images merged')
        total += count

print(f'\nüìä Total: {total} images merged')
print('\nüìà Class Distribution:')
for cls_id in sorted(MASTER_SCHEMA.keys()):
    count = stats.get(cls_id, 0)
    marker = '‚úÖ' if count > 0 else '‚ö†Ô∏è'
    print(f'  {cls_id:2d}: {MASTER_SCHEMA[cls_id]:<15} = {count:>5} {marker}')

In [None]:
#@title 5Ô∏è‚É£ Generate data.yaml

yaml_content = {
    'path': str(OUTPUT_DIR),
    'train': 'images/train',
    'val': 'images/val',
    'nc': len(MASTER_SCHEMA),
    'names': MASTER_SCHEMA
}

yaml_path = OUTPUT_DIR / 'data.yaml'
with open(yaml_path, 'w') as f:
    yaml.dump(yaml_content, f, default_flow_style=False, sort_keys=False)

print(f'‚úÖ Generated: {yaml_path}')
print('\nüìÑ data.yaml contents:')
!cat {yaml_path}

In [None]:
#@title 6Ô∏è‚É£ Train YOLO11 Model
from ultralytics import YOLO

# Load YOLO11 nano model (optimized for Jetson)
model = YOLO('yolo11n.pt')

# Training configuration
results = model.train(
    data=str(yaml_path),
    epochs=100,
    imgsz=640,
    batch=16,
    patience=20,
    device=0,  # GPU
    workers=4,
    project='/content/runs',
    name='petrol_pump_yolo11',
    exist_ok=True,
    amp=True,  # Mixed precision
    augment=True,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10,
    translate=0.1,
    scale=0.5,
    flipud=0.0,
    fliplr=0.5,
    mosaic=1.0,
    mixup=0.1,
)

print('‚úÖ Training complete!')

In [None]:
#@title 7Ô∏è‚É£ Export to ONNX (for Jetson Orin Nano)
from ultralytics import YOLO

best_model = YOLO('/content/runs/petrol_pump_yolo11/weights/best.pt')

# Export to ONNX
best_model.export(format='onnx', imgsz=640, simplify=True, opset=12)

print('‚úÖ ONNX export complete!')
print('üìÅ Model files:')
!ls -la /content/runs/petrol_pump_yolo11/weights/

In [None]:
#@title 8Ô∏è‚É£ Save to Google Drive
import shutil

drive_path = '/content/drive/MyDrive/PetrolPump_YOLO11'
os.makedirs(drive_path, exist_ok=True)

# Copy model files
shutil.copy('/content/runs/petrol_pump_yolo11/weights/best.pt', f'{drive_path}/petrol_pump_yolo11_best.pt')
shutil.copy('/content/runs/petrol_pump_yolo11/weights/best.onnx', f'{drive_path}/petrol_pump_yolo11_best.onnx')
shutil.copy(str(yaml_path), f'{drive_path}/data.yaml')

print(f'‚úÖ Saved to: {drive_path}')
!ls -la "{drive_path}"

In [None]:
#@title 9Ô∏è‚É£ Test Inference (Optional)
from ultralytics import YOLO
import matplotlib.pyplot as plt

model = YOLO('/content/runs/petrol_pump_yolo11/weights/best.pt')

# Run inference on validation images
val_images = list((OUTPUT_DIR / 'images' / 'val').glob('*.jpg'))[:5]

if val_images:
    results = model.predict(source=val_images, save=True, conf=0.25)
    print(f'‚úÖ Inference complete on {len(val_images)} images')
    print('üìÅ Results saved to: runs/detect/predict/')
else:
    print('‚ö†Ô∏è No validation images found')