Libraries

In [10]:
!pip install -U ipywidgets


Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jupyterlab_widgets~=3.0.15
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.6/216.6 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
Collecting widgetsnbextension~=4.0.14
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
  Attempting uninstall: widgetsnbextension
    Found existing installation: widgetsnbextension 3.0.8
    Uninstalling widgetsnbextension-3.0.8:
      Successfully uninstalled widgetsnbextension-3.0.8
  Attempting uninstall: jupyterlab_widgets
   

In [1]:
import albumentations as A
import cv2
import os
import shutil
import yaml
from pathlib import Path
from ultralytics import YOLO
import numpy as np
from tqdm import tqdm
import tempfile
import json
from datetime import datetime

Defining transforms

In [2]:
def get_simple_transforms():
    return A.Compose([
        # Essential color transforms for smoke/fire
        A.RandomBrightnessContrast(
            brightness_limit=0.2,
            contrast_limit=0.2,
            p=0.6
        ),
        A.HueSaturationValue(
            hue_shift_limit=10,
            sat_shift_limit=20,
            val_shift_limit=10,
            p=0.5
        ),

        # Blur for smoke simulation
        A.GaussianBlur(blur_limit=(3, 5), p=0.3),

        # Basic geometric
        A.HorizontalFlip(p=0.5),
        A.Rotate(limit=10, p=0.3),

        # Resize to ensure consistent input size
        A.Resize(640, 640, p=1.0),

    ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

In [3]:
def get_azure_writable_path():
    """
    Get a writable path in Azure ML Studio
    """
    # Try different writable locations in Azure ML
    writable_locations = [
        "/tmp",                    # Temporary directory (usually writable)
        "/home/azureuser",         # User home directory
        os.path.expanduser("~"),   # Home directory expansion
        ".",                       # Current working directory
        "./temp"                   # Local temp directory
    ]
    
    for location in writable_locations:
        try:
            test_dir = os.path.join(location, "test_write_permission")
            os.makedirs(test_dir, exist_ok=True)
            
            # Test write permission
            test_file = os.path.join(test_dir, "test.txt")
            with open(test_file, 'w') as f:
                f.write("test")
            
            # Clean up test
            os.remove(test_file)
            os.rmdir(test_dir)
            
            print(f"✅ Found writable location: {location}")
            return location
            
        except (PermissionError, OSError) as e:
            print(f"❌ Cannot write to {location}: {e}")
            continue
    
    # Fallback to temp directory
    temp_dir = tempfile.mkdtemp()
    print(f"⚠️ Using temporary directory: {temp_dir}")
    return temp_dir

Augmentation of every image

In [4]:
def augment_single_image(image_path, label_path, output_dir, transform, num_augmentations=2):
    """
    Augment a single YOLO format image and label pair
    """
    try:
        # Read image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Could not read image: {image_path}")
            return
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Read YOLO format labels
        bboxes = []
        class_labels = []
        
        if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
            with open(label_path, 'r') as f:
                lines = f.readlines()
            
            for line in lines:
                parts = line.strip().split()
                if len(parts) >= 5:
                    class_id = int(parts[0])
                    x_center, y_center, width, height = map(float, parts[1:5])
                    bboxes.append([x_center, y_center, width, height])
                    class_labels.append(class_id)
        
        # If no bboxes, still augment the image (background images)
        if not bboxes:
            bboxes = []
            class_labels = []
        
        # Create augmented versions
        base_name = Path(image_path).stem
        
        for i in range(num_augmentations):
            try:
                transformed = transform(
                    image=image,
                    bboxes=bboxes,
                    class_labels=class_labels
                )
                
                aug_image = transformed['image']
                aug_bboxes = transformed['bboxes']
                aug_labels = transformed['class_labels']
                
                # Save augmented image
                aug_image_path = os.path.join(output_dir, 'images', f"{base_name}_aug_{i}.jpg")
                os.makedirs(os.path.dirname(aug_image_path), exist_ok=True)
                aug_image_bgr = cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR)
                cv2.imwrite(aug_image_path, aug_image_bgr)
                
                # Save augmented labels
                aug_label_path = os.path.join(output_dir, 'labels', f"{base_name}_aug_{i}.txt")
                os.makedirs(os.path.dirname(aug_label_path), exist_ok=True)
                
                with open(aug_label_path, 'w') as f:
                    for bbox, label in zip(aug_bboxes, aug_labels):
                        f.write(f"{label} {bbox[0]:.6f} {bbox[1]:.6f} {bbox[2]:.6f} {bbox[3]:.6f}\n")
                        
            except Exception as e:
                print(f"Error creating augmentation {i} for {image_path}: {e}")
                continue
                
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

Create augment dataset

In [9]:
def augment_dataset(original_dataset_path, augmented_dataset_path, num_augmentations=2):
    """
    Augment entire YOLO dataset
    """
    print("Starting dataset augmentation...")
    print(f"Source: {original_dataset_path}")
    print(f"Target: {augmented_dataset_path}")
    
    # Create output directories with error handling
    try:
        os.makedirs(augmented_dataset_path, exist_ok=True)
        print(f"✅ Created output directory: {augmented_dataset_path}")
    except PermissionError as e:
        print(f"❌ Permission error creating {augmented_dataset_path}: {e}")
        raise
    
    # Get transform
    transform = get_simple_transforms()
    
    # Process each split (train, val, test)
    for split in ['train', 'val', 'test']:
        print(f"\nProcessing {split} split...")
        
        # Create directories
        split_images_dir = os.path.join(augmented_dataset_path, split, 'images')
        split_labels_dir = os.path.join(augmented_dataset_path, split, 'labels')
        
        try:
            os.makedirs(split_images_dir, exist_ok=True)
            os.makedirs(split_labels_dir, exist_ok=True)
            print(f"✅ Created {split} directories")
        except PermissionError as e:
            print(f"❌ Permission error creating {split} directories: {e}")
            continue
        
        # Source directories
        src_images_dir = os.path.join(original_dataset_path, split, 'images')
        src_labels_dir = os.path.join(original_dataset_path, split, 'labels')
        
        if not os.path.exists(src_images_dir):
            print(f"❌ Source directory not found: {src_images_dir}")
            continue
        
        # Get all image files
        image_files = []
        for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
            image_files.extend(Path(src_images_dir).glob(ext))
        
        print(f"Found {len(image_files)} images in {split}")
        
        if len(image_files) == 0:
            print(f"⚠️ No images found in {src_images_dir}")
            continue
        
        # First, copy original files
        print(f"Copying original {split} files...")
        copied_count = 0
        for img_path in tqdm(image_files):
            try:
                # Copy image
                dst_img = os.path.join(split_images_dir, img_path.name)
                shutil.copy2(img_path, dst_img)
                
                # Copy label if exists
                label_path = os.path.join(src_labels_dir, img_path.stem + '.txt')
                if os.path.exists(label_path):
                    dst_label = os.path.join(split_labels_dir, img_path.stem + '.txt')
                    shutil.copy2(label_path, dst_label)
                else:
                    # Create empty label file
                    dst_label = os.path.join(split_labels_dir, img_path.stem + '.txt')
                    open(dst_label, 'w').close()
                
                copied_count += 1
                
            except Exception as e:
                print(f"Error copying {img_path}: {e}")
                continue
        
        print(f"✅ Copied {copied_count}/{len(image_files)} original files")
        
        # Then create augmented versions (only for training set)
        if split == 'train':
            print(f"Creating {num_augmentations} augmentations per training image...")
            augmented_count = 0
            
            for img_path in tqdm(image_files[:20]):  # Limit to first 100 for testing
                try:
                    label_path = os.path.join(src_labels_dir, img_path.stem + '.txt')
                    augment_single_image(
                        str(img_path), 
                        label_path, 
                        os.path.join(augmented_dataset_path, split),
                        transform, 
                        num_augmentations
                    )
                    augmented_count += 1
                except Exception as e:
                    print(f"Error augmenting {img_path}: {e}")
                    continue
            
            print(f"✅ Augmented {augmented_count} training images")
    
    print("Dataset augmentation completed!")


Defining data.yaml for augmented dataset

In [6]:
def create_augmented_data_yaml(original_yaml_path, augmented_dataset_path, output_yaml_path):
    """
    Create data.yaml file for augmented dataset
    """
    try:
        # Read original yaml
        with open(original_yaml_path, 'r') as f:
            data = yaml.safe_load(f)
        
        # Update paths to use absolute paths
        data['train'] = os.path.abspath(os.path.join(augmented_dataset_path, 'train', 'images'))
        data['val'] = os.path.abspath(os.path.join(augmented_dataset_path, 'val', 'images'))
        data['test'] = os.path.abspath(os.path.join(augmented_dataset_path, 'test', 'images'))
        
        # Ensure output directory exists
        os.makedirs(os.path.dirname(output_yaml_path), exist_ok=True)
        
        # Save new yaml
        with open(output_yaml_path, 'w') as f:
            yaml.dump(data, f, default_flow_style=False)
        
        print(f"✅ Created augmented data.yaml at: {output_yaml_path}")
        
        # Verify the yaml file
        with open(output_yaml_path, 'r') as f:
            verify_data = yaml.safe_load(f)
            print(f"Verification - Train path: {verify_data.get('train', 'Not found')}")
            print(f"Verification - Val path: {verify_data.get('val', 'Not found')}")
            print(f"Verification - Test path: {verify_data.get('test', 'Not found')}")
        
        return output_yaml_path
        
    except Exception as e:
        print(f"❌ Error creating data.yaml: {e}")
        raise

In [7]:
def explore_current_directory():
    """
    Explore current directory to help find dataset
    """
    print("🔍 Exploring current directory structure...")
    current = os.getcwd()
    print(f"Current working directory: {current}")
    
    # List current directory
    print("\nCurrent directory contents:")
    try:
        for item in os.listdir("."):
            if os.path.isdir(item):
                print(f"  📁 {item}/")
                # Check if it looks like a dataset
                if item.lower() in ['dataset', 'data', 'yolo', 'train', 'val', 'test']:
                    print(f"     → Potential dataset directory!")
            else:
                print(f"  📄 {item}")
    except Exception as e:
        print(f"❌ Error listing directory: {e}")
    
    # Check parent directory
    print("\nParent directory contents:")
    try:
        for item in os.listdir(".."):
            if os.path.isdir(os.path.join("..", item)):
                print(f"  📁 ../{item}/")
                if item.lower() in ['dataset', 'data', 'yolo']:
                    print(f"     → Potential dataset directory!")
    except Exception as e:
        print(f"❌ Error listing parent directory: {e}")

In [8]:
def find_dataset_path():
    """
    Find the actual dataset path in Azure ML
    """
    print("🔍 Searching for dataset...")
    
    # Common Azure ML dataset locations
    possible_paths = [
        "./dataset",               # <- your correct location
        "./dataset/data",
        "../dataset",
        "../dataset/data",
        "/tmp/dataset",
        "/home/azureuser/dataset",
    ]
    
    for path in possible_paths:
        if os.path.exists(path):
            print(f"✅ Found dataset at: {path}")
            
            # Check if it has proper YOLO structure
            has_train = os.path.exists(os.path.join(path, 'train'))
            has_val = os.path.exists(os.path.join(path, 'val'))
            has_test = os.path.exists(os.path.join(path, 'test'))
            has_yaml = os.path.exists(os.path.join(path, 'data.yaml'))
            
            print(f"  📁 train/: {'✅' if has_train else '❌'}")
            print(f"  📁 val/: {'✅' if has_val else '❌'}")
            print(f"  📁 test/: {'✅' if has_test else '❌'}")
            print(f"  📄 data.yaml: {'✅' if has_yaml else '❌'}")
            
            if has_train and has_val and has_test and has_yaml:
                return path
            else:
                print(f"  ⚠️ Missing YOLO structure, continuing search...")
        else:
            print(f"❌ Not found: {path}")
    
    print("❌ No valid dataset found!")
    return None

Train with albumentations

In [None]:
def train_with_albumentations():
    """
    Complete training pipeline with Albumentations for Azure ML
    """
    print("=== YOLO Training with Albumentations (Azure ML) ===")
    
    # Get writable location
    writable_path = get_azure_writable_path()
    
    # Find the actual dataset location
    original_dataset = find_dataset_path()
    if not original_dataset:
        print("❌ Cannot proceed without dataset!")
        return None
    
    original_yaml = os.path.join(original_dataset, "data.yaml")
    
    # New paths (writable location)
    augmented_dataset = os.path.join(writable_path, "augmented_dataset")
    augmented_yaml = os.path.join(augmented_dataset, "data.yaml")
    
    print(f"Original dataset: {original_dataset}")
    print(f"Augmented dataset: {augmented_dataset}")
    print(f"Working directory: {writable_path}")
    
    # Verify original dataset exists
    if not os.path.exists(original_dataset):
        print(f"❌ Original dataset not found: {original_dataset}")
        return None
    
    if not os.path.exists(original_yaml):
        print(f"❌ Original data.yaml not found: {original_yaml}")
        return None
    
    # Step 1: Create augmented dataset
    if not os.path.exists(augmented_dataset):
        print("\nStep 1: Creating augmented dataset...")
        try:
            augment_dataset(original_dataset, augmented_dataset, num_augmentations=2)
        except Exception as e:
            print(f"❌ Failed to create augmented dataset: {e}")
            return None
    else:
        print("\nStep 1: Augmented dataset already exists, skipping...")
    
    # Step 2: Create data.yaml for augmented dataset
    print("\nStep 2: Creating data.yaml for augmented dataset...")
    try:
        create_augmented_data_yaml(original_yaml, augmented_dataset, augmented_yaml)
    except Exception as e:
        print(f"❌ Failed to create data.yaml: {e}")
        return None
    
    # Step 3: Verify dataset structure
    print("\nStep 3: Verifying dataset structure...")
    train_images = os.path.join(augmented_dataset, 'train', 'images')
    val_images = os.path.join(augmented_dataset, 'val', 'images')
    test_images = os.path.join(augmented_dataset, 'test', 'images')
    
    if os.path.exists(train_images):
        train_count = len(list(Path(train_images).glob('*.jpg')))
        print(f"✅ Training images: {train_count}")
    else:
        print(f"❌ Training images directory not found: {train_images}")
        return None
    
    if os.path.exists(val_images):
        val_count = len(list(Path(val_images).glob('*.jpg')))
        print(f"✅ Validation images: {val_count}")
    else:
        print(f"❌ Validation images directory not found: {val_images}")
        return None

    if os.path.exists(test_images):
        test_count = len(list(Path(test_images).glob('*.jpg')))
        print(f"✅ Testing images: {test_count}")
    else:
        print(f"❌ Testing images directory not found: {test_images}")
        return None
    
    # Step 4: Train with Albumentations-augmented dataset
    print("\nStep 4: Training with Albumentations-augmented dataset...")
    
    try:
        # Load your pre-trained model
        model_path = "runs_best_no_aug/runs/detect/baseline_builtin2/weights/best.pt" #modify with model's path
        
        if os.path.exists(model_path):
            print(f"Loading model from: {model_path}")
            albumentations_model = YOLO(model_path)
        else:
            print("Model path not found, using yolov8s.pt")
            albumentations_model = YOLO("yolov8s.pt")
        
        print(f"Using YAML file: {augmented_yaml}")
        with open(augmented_yaml) as f:
            print(f.read())
        
        # Train the model
        albumentations_results = albumentations_model.train(
            data=augmented_yaml,
            epochs=3,
            batch=16,
            augment=False,  # Disable built-in since we pre-augmented
            patience=15,
            workers=0,
            device="cpu",
            name="albumentations_augmented",
            verbose=True,
            project=os.path.join(writable_path, "runs")  # Save to writable location
        )
        
        print(f"✅ Albumentations training completed!")
        
        # Extract results
        if hasattr(albumentations_results, 'results_dict'):
            map50 = albumentations_results.results_dict.get('metrics/mAP50(B)', 'N/A')
            precision = albumentations_results.results_dict.get('metrics/precision(B)', 'N/A')
            recall = albumentations_results.results_dict.get('metrics/recall(B)', 'N/A')
            
            print(f"Final Results:")
            print(f"  mAP50: {map50}")
            print(f"  Precision: {precision}")
            print(f"  Recall: {recall}")
        
        # Save results summary
        results_file = os.path.join(writable_path, "training_results.json")
        results_summary = {
            'timestamp': datetime.now().isoformat(),
            'model_path': model_path,
            'augmented_dataset': augmented_dataset,
            'results': albumentations_results.results_dict if hasattr(albumentations_results, 'results_dict') else str(albumentations_results)
        }
        
        with open(results_file, 'w') as f:
            json.dump(results_summary, f, indent=2)
        
        print(f"✅ Results saved to: {results_file}")
        
        return albumentations_results
        
    except Exception as e:
        print(f"❌ Training failed: {e}")
        import traceback
        traceback.print_exc()
        return None

def test_transforms():
    """
    Test that transforms work correctly
    """
    print("Testing Albumentations transforms...")
    
    try:
        transform = get_simple_transforms()
        
        # Create dummy data
        dummy_image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
        dummy_bboxes = [[0.5, 0.5, 0.2, 0.3], [0.3, 0.7, 0.1, 0.1]]  # YOLO format
        dummy_labels = [0, 1]  # smoke, fire
        
        result = transform(
            image=dummy_image, 
            bboxes=dummy_bboxes, 
            class_labels=dummy_labels
        )
        
        print("✅ Transform test successful!")
        print(f"Input image shape: {dummy_image.shape}")
        print(f"Output image shape: {result['image'].shape}")
        print(f"Input bboxes: {dummy_bboxes}")
        print(f"Output bboxes: {result['bboxes']}")
        print(f"Labels preserved: {result['class_labels']}")
        
        return True
        
    except Exception as e:
        print(f"❌ Transform test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_write_permissions():
    """
    Test write permissions in different locations
    """
    print("Testing write permissions in Azure ML...")
    
    locations = [
        "/tmp",
        "/home/azureuser", 
        os.path.expanduser("~"),
        ".",
        "./temp"
    ]
    
    for location in locations:
        try:
            test_dir = os.path.join(location, "permission_test")
            os.makedirs(test_dir, exist_ok=True)
            
            test_file = os.path.join(test_dir, "test.txt")
            with open(test_file, 'w') as f:
                f.write("test write permission")
            
            # Clean up
            os.remove(test_file)
            os.rmdir(test_dir)
            
            print(f"✅ {location} - WRITABLE")
            
        except Exception as e:
            print(f"❌ {location} - NOT WRITABLE: {e}")

# Main execution
if __name__ == "__main__":
    print("🔥 YOLO Smoke & Fire Detection with Albumentations (Azure ML) 🔥")
    print()
    
    # Test permissions first
    print("Testing write permissions...")
    test_write_permissions()
    print()
    
    # Explore directory structure
    print("Exploring directory structure...")
    explore_current_directory()
    print()
    
    # Test transforms
    if test_transforms():
        print("\n" + "="*50)
        print("Starting training pipeline...")
        
        # First, let's find the dataset
        dataset_path = find_dataset_path()
        if dataset_path:
            # Run training pipeline
            results = train_with_albumentations()
            
            if results:
                print("\n🎉 Training completed successfully!")
            else:
                print("\n❌ Training failed. Check the logs above.")
        else:
            print("\n❌ Cannot proceed without dataset. Please check your dataset location.")
    else:
        print("❌ Fix transform issues before proceeding with training.")

🔥 YOLO Smoke & Fire Detection with Albumentations (Azure ML) 🔥

Testing write permissions...
Testing write permissions in Azure ML...
✅ /tmp - WRITABLE
✅ /home/azureuser - WRITABLE
✅ /home/azureuser - WRITABLE
✅ . - WRITABLE
✅ ./temp - WRITABLE

Exploring directory structure...
🔍 Exploring current directory structure...
Current working directory: /mnt/batch/tasks/shared/LS_root/mounts/clusters/practica1/code/Users/practica/setdedate

Current directory contents:
  📄 .amlignore
  📄 .amlignore.amltmp
  📁 .ipynb_aml_checkpoints/
  📄 albumentations.ipynb
  📄 albumentations.ipynb.amltmp
  📁 dataset/
     → Potential dataset directory!
  📄 dataset.py
  📄 dataset.py.amltmp
  📄 dataset.zip
  📁 downloaded_dataset/
  📁 fire-smoke-data-1/
  📁 runs/
  📁 runs_best_no_aug/
  📄 runs_best_no_aug.zip
  📁 smoke_fire_detect_dataset1-1/
  📁 temp/
  📄 test.ipynb
  📄 test.ipynb.amltmp
  📄 test1.py
  📄 test1.py.amltmp
  📄 test2.py
  📄 test2.py.amltmp
  📄 unzip_dataset.py
  📄 unzip_dataset.py.amltmp
  📄 unzip_

        1/3         0G       1.12     0.7755      1.164          2        640: 100%|██████████| 896/896 [3:00:34<00:00, 12.09s/it]  
        2/3         0G      1.239      0.935      1.234          6        640: 100%|██████████| 896/896 [3:08:08<00:00, 12.60s/it]  s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  99%|█████████▉| 96/97 [08:04<00:06,  6.60s/it]
        3/3         0G      1.206     0.8865      1.214          3        640: 100%|██████████| 896/896 [3:13:10<00:00, 12.94s/it]  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  99%|█████████▉| 96/97 [08:26<00:06,  6.89s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 97/97 [07:37<00:00,  4.72s/it]


In [16]:
!cp -r /tmp/augmented_dataset/data.yaml augmented_dataset2

In [15]:
!cp -r /tmp/runs/albumentations_augmented5 runs_albumentations_augmented5

In [17]:
import yaml

yaml_path = "/tmp/augmented_dataset/data.yaml"

# Load and display YAML as a Python dict
try:
    with open(yaml_path, "r") as file:
        data = yaml.safe_load(file)
        print("📄 YAML Contents:")
        for key, value in data.items():
            print(f"{key}: {value}")
except FileNotFoundError:
    print(f"❌ File not found: {yaml_path}")
except Exception as e:
    print(f"⚠️ Error reading file: {e}")


📄 YAML Contents:
names: ['smoke', 'fire']
nc: 2
path: /mnt/batch/tasks/shared/LS_root/mounts/clusters/computeextract/code/Users/practica/setdedate/dataset/data
test: /tmp/augmented_dataset/test/images
test_count: 4306
train: /tmp/augmented_dataset/train/images
train_count: 14122
val: /tmp/augmented_dataset/val/images
val_count: 3099
