In [None]:
# !pip install ultralytics
# !pip install roboflow
# !pip install ruamel.yaml

In [None]:
# import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ultralytics
from ultralytics import YOLO
from roboflow import Roboflow
from IPython.display import Image 
from kaggle_secrets import UserSecretsClient
import yaml
from pprint import pprint
import cv2
import os 
import warnings 
warnings.filterwarnings("ignore")
ultralytics.checks()

# Data inspection and validation

In [None]:
image_sample = cv2.imread("/kaggle/input/smoke-fire-detection-yolo/data/train/images/AoF00005.jpg")
x , y , c = image_sample.shape 
print(f"The dimension af image {y} x {x} and {c} channels")

In [None]:
BASE_PATH = "/kaggle/input/smoke-fire-detection-yolo/data"

splits = ["train", "val", "test"]

def check_split(split):
    img_dir = os.path.join(BASE_PATH, split, "images")
    lbl_dir = os.path.join(BASE_PATH, split, "labels")

    images = [f for f in os.listdir(img_dir) if f.lower().endswith((".jpg", ".png", ".jpeg"))]
    labels = [f for f in os.listdir(lbl_dir) if f.endswith(".txt")]

    img_names = set(os.path.splitext(f)[0] for f in images)
    lbl_names = set(os.path.splitext(f)[0] for f in labels)

    missing_labels = img_names - lbl_names
    missing_images = lbl_names - img_names

    print(f"\n {split}")
    print(f"Images : {len(images)}")
    print(f"Labels : {len(labels)}")

    if not missing_labels and not missing_images:
        print("All images and labels are matched")
    else:
        print(f"Missing labels for {len(missing_labels)} images")
        print(f"Missing Images for {len(missing_images)} labels")

for split in splits:
    check_split(split)

# YAML Cofiguration for YOLO
YOLO uses a Yaml file to define dataset paths and class name 

In [None]:
# original YAML file
path_yaml_file = "/kaggle/input/smoke-fire-detection-yolo/data.yaml"

print("Content of Original yaml file")
with open(path_yaml_file, 'r') as file:
    data = yaml.safe_load(file)
    print(data)

In [None]:
# valid path
input_file_path = '/kaggle/input/smoke-fire-detection-yolo/data.yaml'
output_file_path = '/kaggle/working/data.yaml'

with open(input_file_path, 'r') as file:
    data = yaml.safe_load(file)

base_path = '/kaggle/input/smoke-fire-detection-yolo/data'
data['path'] = base_path
data['train'] = f"{base_path}/train/images"
data['val'] = f"{base_path}/val/images"
data['test'] = f"{base_path}/test/images"

with open(output_file_path, 'w') as file:
    yaml.safe_dump(data, file, sort_keys=False)

print(f"Updated YAML saved to: {output_file_path}")
pprint(data)

# Checking Dataset Balance
To ensure the model learns effectively, we check the distribution of classes across the dataset splits(train, val, test)

In [None]:
# To check the Balance Data
from collections import defaultdict

def read_yolo(dataset_metadata):
    train_path = dataset_metadata['train'].replace('images', 'labels')
    val_path = dataset_metadata['val'].replace('images', 'labels')
    test_path = dataset_metadata['test'].replace('images', 'labels')

    split_stats = {
        'train': defaultdict(int),
        'val' : defaultdict(int),
        'test' : defaultdict(int)
    }
    def process_labels(label_dir, split_name):
        if not os.path.exists(label_dir):
            raise FileNotFoundError(f"Labels folder not found: {label_dir}")


        for label_file in os.listdir(label_dir):
            if not label_file.endswith(".txt"):
                continue


            label_path = os.path.join(label_dir, label_file)


            with open(label_path, 'r') as f:
                lines = [line.strip() for line in f.readlines() if line.strip()]


            # Images withiout Objects
            if not lines:
                split_stats[split_name]['background'] += 1
                continue


            present_classes = set()
            for line in lines:
                class_id = int(line.split()[0])
                present_classes.add(class_id)


            if 0 in present_classes and 1 in present_classes:
                split_stats[split_name]['both'] += 1
            elif 1 in present_classes:
                split_stats[split_name]['fire_only'] += 1
            elif 0 in present_classes:
                split_stats[split_name]['smoke_only'] += 1
            else:
                split_stats[split_name]['background'] += 1
            
    process_labels(train_path, 'train')
    process_labels(val_path, 'val')
    process_labels(test_path, 'test')

    return split_stats
            

In [None]:
dataset_metadata = {'names': ['smoke', 'fire'],
'nc': 2,
'path': '/kaggle/input/smoke-fire-detection-yolo/data',
'test': '/kaggle/input/smoke-fire-detection-yolo/data/test/images',
'test_count': 4306,
'train': '/kaggle/input/smoke-fire-detection-yolo/data/train/images',
'train_count' : 14122,
'val' : '/kaggle/input/smoke-fire-detection-yolo/data/val/images',
'val_count' :3099}

# Fire and Smoke Distribution report
we summarize the class distribution in each dataset split to understand balance

In [None]:
stats = read_yolo(dataset_metadata)

print("Fire / Smoke Distribution Report\n")

for split in ['train', 'val', 'test']:
    s = stats[split]
    total = sum(s.values())

    print(f"{split.upper()} SET ({total} images)")
    print(f"- Fire only     : {s['fire_only']} ({s['fire_only']/total:.1%})")
    print(f"- Smoke only    : {s['smoke_only']} ({s['smoke_only']/total:.1%})")
    print(f"- Both present  : {s['both']} ({s['both']/total:.1%})")
    print(f"- Background    : {s['background']} ({s['background']/total:.1%})")
    print()

In [None]:
# Data Visualization
labels = ['Fire Only', 'Smoke Only', 'Both', 'Background']

for split in ['train', 'val', 'test']:
    values = [
        stats[split]['fire_only'],
        stats[split]['smoke_only'],
        stats[split]['both'],
        stats[split]['background']
        
    ]
    plt.figure()
    plt.bar(labels, values)
    plt.title(f"{split.upper()} Dataset Distribution")
    plt.xlabel("Category")
    plt.ylabel("Number of Images")
    plt.xticks(rotation=15)
    plt.show()

# YOLOv8 Model Training
Train a YOLOv8 model for fire and smoke detection using the prepared dataset and YAML configuration

In [None]:
#%%capture cell
!yolo task=detect mode=train \
data=/kaggle/working/data.yaml \
model=yolov8s.pt \
epochs=150 \
imgsz=640 \
batch=32 \
patience=20 \
verbose=False

# Confusion Matrix Visualization
After training, we visualize the confusion matrix to evaluate how well the model distinguishes between fire and smoke classes

In [None]:
Image("/kaggle/working/runs/detect/train/confusion_matrix.png", width=600)

In [None]:
Image("/kaggle/working/runs/detect/train/labels.jpg", width=600)

In [None]:
Image("/kaggle/working/runs/detect/train/results.png", width=600)

In [None]:
Image("/kaggle/working/runs/detect/train/train_batch0.jpg", width=600)


In [None]:
Image("/kaggle/working/runs/detect/train/val_batch0_pred.jpg", width=600)


In [None]:
import shutil
import os

# This script copies the best weights to a visible location
source_path = '/kaggle/working/runs/detect/train3/weights/best.pt'
destination_path = '/kaggle/working/best_fire_smoke_model.pt'

if os.path.exists(source_path):
    shutil.copy(source_path, destination_path)
    print(f"Success! Model saved to: {destination_path}")
else:
    print("Error: Could not find the weight file. Check if training finished.")

In [None]:
from ultralytics import YOLO

# Load your saved model
model = YOLO('/kaggle/working/best_fire_smoke_model.pt')

# Run prediction on a new image
results = model.predict(source='path_to_your_image.jpg', save=True)