In [1]:
import warnings
warnings.simplefilter(action='ignore')

# Import standard libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Import Keras libraries
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
# from keras.layers.convolutional import Conv2D
# from keras.layers.convolutional import MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator

# Import sklearn packages
from sklearn.model_selection import train_test_split

# Import other utils
import glob
import os
from PIL import Image
from keras.preprocessing.image import img_to_array

Using TensorFlow backend.


# Final Project - Classifying Fire Images with CNN+MLP Model
## Andrey Novichkov - June 16, 2020

# What is the goal of this project?
- I want to take in input images, some with fire, and some without, and build a Neural Net that will be able, with high accuracy, classify the images as having fire or without.

## Inputs:
- A folder with images that contain fire
- A folder with images that do not contain fire
- Link to input data -> https://github.com/cair/Fire-Detection-Image-Dataset

## Let's go step by step:
1. Import the data into a dataframe
2. Split df into train and test
3. Create a data generator that will generate x_batch and y_batch, where x_batch is the image converted into a numpy array, and y_batch, which are the categorical labels: **1 for fire, 0 for no fire**
4. Define CNN+MLP model
5. Compile the model
6. Use fit_generator to train the model
7. Evaluate the model
8. Apply data augmentation and rebuild, retrain and re-evaluate model
9. Apply hyperparameter optimization and rebuild, retrain and re-evaluate model
10. Compare results

In [2]:
# Define constants
FIRE_IMAGE_DIR = 'fire_images'
FIRE_IMAGE_CLASS = '1'

NORMAL_IMAGE_DIR = 'normal_images'
NORMAL_IMAGE_CLASS = '0'

DF_COLS = ['folder', 'filename', 'label']

REDUCED_IMAGE_SIZE = 1024
NUM_CLASSES = 2

## Import data into Dataframe

In [3]:
# Import data into dataframes
fire_list = []
normal_list = []

for file in glob.glob(f'{FIRE_IMAGE_DIR}/*'):
    fire_list.append([FIRE_IMAGE_DIR, os.path.basename(file), FIRE_IMAGE_CLASS])

for file in glob.glob(f'{NORMAL_IMAGE_DIR}/*'):
    normal_list.append([NORMAL_IMAGE_DIR, os.path.basename(file), NORMAL_IMAGE_CLASS])
    
fire_df = pd.DataFrame(fire_list, columns=DF_COLS)
normal_df = pd.DataFrame(normal_list, columns=DF_COLS)

In [4]:
# Concatenate the two DF's
df = pd.concat([fire_df, normal_df]).reset_index()
df[:2]

Unnamed: 0,index,folder,filename,label
0,0,fire_images,dsc_01001.jpg,1
1,1,fire_images,burning-charcoal-briquettes.jpg,1


In [5]:
# Split df into train and test
df_train, df_test = train_test_split(df, test_size=.2, random_state=0)
df_train = df_train.reset_index().drop(['level_0', 'index'], axis=1)
df_test = df_test.reset_index().drop(['level_0', 'index'], axis=1)

In [7]:
# Define data generator
def data_generator(df, batch_size):
    while True:
        x_batch = np.zeros((batch_size, REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE, 3))
        y_batch = np.zeros((batch_size, 1))
        global_i = 0

        for batch_index in range(len(df)//batch_size):
            start_batch_index = batch_index*batch_size
            end_batch_index = (batch_index+1)*batch_size
            local_i = 0

            for filename, label in zip(df['filename'][start_batch_index:end_batch_index], df['label'][start_batch_index:end_batch_index]):
                folder = df['folder'][global_i]
                img = Image.open(os.path.join(folder, filename))
                img = img.resize((REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE))
                x_batch[local_i] = img_to_array(img)
                y_batch[local_i] = label
                global_i += 1
                local_i += 1
        
            yield x_batch, np_utils.to_categorical(y_batch, NUM_CLASSES)

In [14]:
# Define CNN+MLP model
model = Sequential()
model.add(Conv2D(4, kernel_size=(3,3), strides=(1,1), activation='relu', input_shape=(REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE, 3)))
model.add(Conv2D(4, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(.5))

model.add(Conv2D(4, kernel_size=(3,3), activation='relu'))
model.add(Conv2D(4, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(.5))

model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='sigmoid'))

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 1022, 1022, 4)     112       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 1020, 1020, 4)     148       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 510, 510, 4)       0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 510, 510, 4)       0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 508, 508, 4)       148       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 506, 506, 4)       148       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 253, 253, 4)       0         
__________

In [16]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [20]:
batch_size = 50
history = model.fit_generator(data_generator(df_train, batch_size), steps_per_epoch=len(df_train)//batch_size, epochs=2, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
# Looks like model stopped learning after some time, still a good model tho

In [None]:
# Define data generator
def data_generator(df, batch_size):
    while True:
        x_batch = np.zeros((batch_size, REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE, 3))
        y_batch = np.zeros((batch_size, 1))
        global_i = 0

        for batch_index in range(len(df)//batch_size):
            start_batch_index = batch_index*batch_size
            end_batch_index = (batch_index+1)*batch_size
            local_i = 0

            for filename, label in zip(df['filename'][start_batch_index:end_batch_index], df['label'][start_batch_index:end_batch_index]):
                folder = df['folder'][global_i]
                img = Image.open(os.path.join(folder, filename))
                img = img.resize((REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE))
                x_batch[local_i] = img_to_array(img)
                y_batch[local_i] = label
                global_i += 1
                local_i += 1
        
            yield datagenx_batch, np_utils.to_categorical(y_batch, NUM_CLASSES)

In [None]:
def get_x_y_train_from_df(df):
     x_batch = np.zeros((batch_size, REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE, 3))
        y_batch = np.zeros((batch_size, 1))
        global_i = 0

        for index in range(len(df)):
            start_batch_index = index
            end_batch_index = index + 1
            local_i = 0

            for filename, label in zip(df['filename'][start_batch_index:end_batch_index], df['label'][start_batch_index:end_batch_index]):
                folder = df['folder'][global_i]
                img = Image.open(os.path.join(folder, filename))
                img = img.resize((REDUCED_IMAGE_SIZE, REDUCED_IMAGE_SIZE))
                x_batch[local_i] = img_to_array(img)
                y_batch[local_i] = label
                global_i += 1
                local_i += 1
        
            yield datagenx_batch, np_utils.to_categorical(y_batch, NUM_CLASSES)

In [None]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit()