In [None]:
# Step 1: Download the dataset from Kaggle
# Run the below shell commands in a notebook or Colab environment for Kaggle setup
!mkdir -p ~/.kaggle  # Ensure the directory exists
!cp kaggle.json ~/.kaggle/  # Copy the API key file to the correct location
!chmod 600 ~/.kaggle/kaggle.json  # Set permissions for the file
!kaggle datasets download amerzishminha/forest-fire-smoke-and-non-fire-image-dataset --unzip  # Download and unzip the dataset


In [None]:
import os
import pandas as pd

# Define the path to the downloaded and unzipped dataset directory
dataset_dir = './forest-fire-smoke-and-non-fire-image-dataset/train'  # Change this path to reflect your dataset structure after unzipping

# Initialize a dictionary to store file extensions and their counts
file_types = {}

# Traverse through the dataset directory and count file extensions
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        # Get the file extension
        file_extension = os.path.splitext(file)[1].lower()  # Use lower to avoid case sensitivity
        if file_extension in file_types:
            file_types[file_extension] += 1
        else:
            file_types[file_extension] = 1

# Convert the result to a DataFrame for better visualization
file_types_df = pd.DataFrame(list(file_types.items()), columns=['File Extension', 'Count'])
file_types_df = file_types_df.sort_values(by='Count', ascending=False)

# Print the result
print(file_types_df)


In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

# Directories for images
train_dir = './forest-fire-smoke-and-non-fire-image-dataset/train'

# Function to get image counts by class
def get_image_counts(directory):
    class_counts = {}
    for subdir in os.listdir(directory):
        subdir_path = os.path.join(directory, subdir)
        if os.path.isdir(subdir_path):
            class_counts[subdir] = len([f for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))])
    return class_counts

# Function to get image dimensions distribution
def get_image_dimensions(directory):
    dimensions = []
    for subdir in os.listdir(directory):
        subdir_path = os.path.join(directory, subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, file)
                if os.path.isfile(file_path):
                    try:
                        with Image.open(file_path) as img:
                            dimensions.append(img.size)
                    except Exception as e:
                        continue
    return dimensions

# Get class counts
class_counts = get_image_counts(train_dir)

# Get image dimensions
image_dimensions = get_image_dimensions(train_dir)

# Plot the class distribution
def plot_class_distribution(class_counts):
    class_names = list(class_counts.keys())
    counts = list(class_counts.values())

    plt.figure(figsize=(8, 6))
    plt.bar(class_names, counts, color='skyblue')
    plt.title('Class Distribution')
    plt.xlabel('Class')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45)
    plt.show()

# Plot the image dimensions distribution
def plot_image_dimensions(image_dimensions):
    df = pd.DataFrame(image_dimensions, columns=['Width', 'Height'])
    plt.figure(figsize=(8, 6))
    plt.hist(df['Width'], bins=30, alpha=0.5, label='Width', color='blue')
    plt.hist(df['Height'], bins=30, alpha=0.5, label='Height', color='green')
    plt.title('Image Dimensions Distribution')
    plt.xlabel('Pixels')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()

# Plot class distribution and image dimensions distribution
plot_class_distribution(class_counts)
plot_image_dimensions(image_dimensions)


In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Activation, Dropout, Flatten, Conv2D, MaxPooling2D, LSTM, GRU, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras import mixed_precision, regularizers
from tensorflow.keras import backend as K
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers.experimental import preprocessing
import keras_tuner as kt  # Import Keras Tuner for hyperparameter tuning
from PIL import Image

# Enable device placement logging to verify that operations are running on the GPU
tf.debugging.set_log_device_placement(True)

# Set the device to GPU explicitly to ensure TensorFlow uses the GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Check if GPU is available and enable memory growth to avoid allocating all memory
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled for", gpus)
    except RuntimeError as e:
        print("Error setting memory growth on GPU:", e)

# Enable mixed precision policy
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Set directories for training and validation
base_dir = './forest-fire-smoke-and-non-fire-image-dataset/'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')


def count_images(root_dir):
    total_images = 0
    for base, dirs, files in os.walk(root_dir):
        if files:
            print(f"Folder {base} contains {len(files)} images")
            total_images += len(files)
    return total_images

train_images = count_images(train_dir)
test_images = count_images(test_dir)

print(f"Total images in train: {train_images}")
print(f"Total images in test: {test_images}")


def convert_images(root_dir):
    count = 0
    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                with Image.open(file_path) as img:
                    # Convert image to RGB and save as JPG
                    rgb_img = img.convert('RGB')
                    new_file_path = os.path.splitext(file_path)[0] + '.jpg'
                    rgb_img.save(new_file_path, 'JPEG')
                count += 1
            except Exception as e:
                print(f"Error converting {file_path}: {e}")

    print(f"Processed {count} images.")

# Convert images in train and test directories
convert_images(train_dir)
convert_images(test_dir)

In [None]:
# Classes
classes = ['non fire', 'fire', 'Smoke']

# Data augmentation and loading using ImageDataGenerator
train_datagen = ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    brightness_range=[0.2, 1.0],  # Added brightness augmentation
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
    rescale=1.0 / 255.0,
    validation_split=0.1  # 10% validation split
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Flow from directory to load images batch-wise
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Do not shuffle the test set
)

from tensorflow.keras.regularizers import l2

# Define the model building function for Keras Tuner
def build_model(hp):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
    base_model.trainable = False  # Freeze layers initially

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(len(classes), activation='softmax', dtype='float32')  # Ensuring softmax layer is float32 for stability
    ])

    # Hyperparameter tuning for learning rate
    lr = hp.Float('learning_rate', min_value=1e-5, max_value=1e-1, sampling='log')

    # Optimizer setup
    opt = Adam(learning_rate=lr)

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

# Initialize the Keras Tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=3,  # Number of models to try
    executions_per_trial=1,  # Number of times to repeat each model
    directory='kt_search',  # Directory to store the tuner results
    project_name='fire_detection_tuning'
)

# Callbacks for early stopping and learning rate reduction
early_stop = EarlyStopping(monitor="val_loss", patience=5, mode="min", verbose=1)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1)

# Search for the best hyperparameters
tuner.search(train_generator, validation_data=validation_generator, epochs=10, callbacks=[early_stop, reduce_lr])

# Retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Train the best model with early stopping and dynamic learning rate
H = best_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20,  # More epochs to let early stopping find the best one
    steps_per_epoch=train_generator.samples // 32,
    validation_steps=validation_generator.samples // 32,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

# Saving the best model
best_model_output_path = 'output/fire_detection_best_model.h5'
print(f"[INFO] serializing best model to '{best_model_output_path}'...")
best_model.save(best_model_output_path)

# Plotting loss and accuracy
N = np.arange(0, len(H.history["loss"]))
plt.figure(figsize=(12, 8))

plt.subplot(121)
plt.title("Losses")
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")

plt.subplot(122)
plt.title("Accuracies")
plt.plot(N, H.history["accuracy"], label="train_acc")
plt.plot(N, H.history["val_accuracy"], label="val_acc")

plt.legend()
plt.savefig("output/training_plot.png")

# Load the best model for testing
print("[INFO] loading best model...")
best_model = load_model(best_model_output_path)

# Test the model and evaluate
predictions = best_model.predict(test_generator)
y_true = test_generator.classes
y_pred = np.argmax(predictions, axis=1)

import seaborn as sns
# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.savefig("output/confusion_matrix.png")
plt.show()

# Classification report
print(classification_report(y_true, y_pred, target_names=classes))

# Test the model on random samples and save the results
for i in range(10):
    img, label = test_generator.next()
    org_img = img[0] * 255.0
    pred = best_model.predict(np.expand_dims(img[0], axis=0))[0]
    result = classes[np.argmax(pred)]
    org_img = cv2.resize(org_img, (500, 500))
    cv2.putText(org_img, result, (35, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 255, 0), 3)
    cv2.imwrite(f'output/testing/{i}.png', org_img)

# Clear session to free memory
K.clear_session()
# Load the best model for testing
print("[INFO] loading best model...")
model = load_model('output/fire_detection_best_model.h5')

# Prepare test data using ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important for consistency in evaluation
)

# Predict using the test data
print("[INFO] evaluating model...")
test_loss, test_acc = model.evaluate(test_generator, verbose=1)
print(f"Test Accuracy: {test_acc:.2f}")
print(f"Test Loss: {test_loss:.2f}")

# Generate predictions for further analysis like confusion matrix
predictions = model.predict(test_generator)
y_true = test_generator.classes
y_pred = np.argmax(predictions, axis=1)

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

# Classification report
print(classification_report(y_true, y_pred, target_names=classes))
