### Imports

In [27]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import keras
from keras.models import Sequential
from keras.layers import Dense, Input, Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization, Activation
from keras.optimizers import SGD
from keras.initializers import RandomNormal
from keras.models import load_model

In [28]:
keras.backend.clear_session()

### Data Preparation

In [29]:
files = glob.glob("./dataset/fire-images/*.*")
ls_fire = []
for i in files:
    ls_fire.append(['fire-images', i.split("/")[3], '1'])

In [30]:
df_fire = pd.DataFrame(ls_fire, columns=['folder', 'filename', 'label'])
df_fire.head()

Unnamed: 0,folder,filename,label
0,fire-images,92983122.jpg,1
1,fire-images,wildfire-coverage-521x308.jpg,1
2,fire-images,201307-restroom-fire-safety-fire1.jpg,1
3,fire-images,X-Files_Office_on_fire.jpg,1
4,fire-images,tree-fire.jpg,1


In [31]:
files = glob.glob("./dataset/normal-images-?/*.*")
ls_normal = []
for i in files:
    folder, filename = i.split("/")[2], i.split("/")[3]
    ls_normal.append([f'{folder}', filename, '0'])

In [32]:
df_normal = pd.DataFrame(ls_normal, columns=['folder', 'filename', 'label'])
df_normal.head()

Unnamed: 0,folder,filename,label
0,normal-images-5,rooms-hotel-tbilisi-suite-bed-and-bathtub-city...,0
1,normal-images-5,unnamed.jpg,0
2,normal-images-5,Tromso.-Photo-credits-Bard-Loken-Innovatioin-N...,0
3,normal-images-5,tumblr_inline_nj85l1oMaC1t5js1c.jpg,0
4,normal-images-5,visa_requirements_for_norway-56e03f4b96539.jpg,0


In [33]:
df = df_fire.append(df_normal, ignore_index=True)
X = df[["folder", "filename"]]
y = df["label"]

In [8]:
df.to_csv("./dataset/fires.csv")

In [34]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=['label']), df['label'],
    test_size=0.25, random_state=0
)

X_train['label'] = pd.Series((int(y) for y in y_train), index=X_train.index)
X_test['label'] = pd.Series((int(y) for y in y_test), index=X_test.index)

print("X train\t", X_train.shape, "\tX test", X_test.shape)
print("y train\t", y_train.shape, "\ty test", y_test.shape)

X train	 (488, 3) 	X test (163, 3)
y train	 (488,) 	y test (163,)


#### Hyper-parameters initialization

In [10]:
img_size = 1024
batch_size = 10
epochs = 10

#### `fit_generator`

In [11]:
from PIL import Image
from keras.preprocessing.image import img_to_array

# Create data generator for training
def data_gen(df, batch_size):
    while True:
        x_batch = np.zeros((batch_size, 1024, 1024, 3))
        y_batch = np.zeros((batch_size, 1))
        for j in range(len(df)//batch_size):
            b = 0
            for m, k in zip(df['filename'].values[j*batch_size:(j+1)*batch_size], df['label'].values[j*batch_size:(j+1)*batch_size]):
                # print(k, 'dataset/{}/{}'.format(df[df['filename'] == m]['folder'].values[0], m))
                img = Image.open('dataset/{}/{}'.format(df[df['filename'] == m]['folder'].values[0], m)).convert('RGB')
                image_red = img.resize((img_size, img_size))
                x_batch[b] = img_to_array(image_red)
                y_batch[b] = k
                b += 1
            yield (x_batch, y_batch)

### CNN

#### Baseline model

In [12]:
baseline = Sequential()

baseline.add(Conv2D(32, kernel_size=(3,3), input_shape=(img_size, img_size, 3), activation="relu"))
baseline.add(Flatten())
baseline.add(Dense(1, activation='sigmoid'))

In [13]:
baseline.compile(
    optimizer="adam", 
    loss='binary_crossentropy', 
    metrics=["accuracy"]
)

baseline.fit_generator(
    generator=data_gen(X_train, batch_size=batch_size), 
    steps_per_epoch=len(X_train)//batch_size, 
    epochs=epochs,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fbf55485780>

In [14]:
loss, accuracy = baseline.evaluate_generator(
    generator=data_gen(X_test, batch_size), 
    steps=len(X_test)//batch_size, verbose=0
)
print("Accuracy = {:.5f}".format(accuracy))

Accuracy = 0.86250


#### Model 

In [15]:
model = Sequential()

# model.add(Conv2D(32, kernel_size=(3,3), input_shape=(1024, 1024, 3), activation="relu"))
model.add(Conv2D(32, kernel_size=(3,3), input_shape=(1024, 1024, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(3,3)))

model.add(Conv2D(32, activation="relu", kernel_size=(3,3)))
model.add(MaxPooling2D(pool_size=(2,2)))

# model.add(Conv2D(32, activation="relu", kernel_size=(3,3)))
# model.add(MaxPooling2D(pool_size=(3,3)))

model.add(Dropout(0.5))
model.add(Flatten())

model.add(Dense(64, activation="relu"))

model.add(Dense(1, activation='sigmoid'))

In [16]:
model.compile(
    optimizer="adam", 
    loss='binary_crossentropy', 
    metrics=["accuracy"]
)

model.fit_generator(
    generator=data_gen(X_train, batch_size=batch_size), 
    steps_per_epoch=len(X_train)//batch_size, 
    epochs=epochs,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fbec0eb0898>

In [17]:
loss, accuracy = model.evaluate_generator(
    generator=data_gen(X_test, batch_size), 
    steps=len(X_test)//batch_size, verbose=0
)
print("Accuracy = {:.5f}".format(accuracy))

Accuracy = 0.81250


Looks like our model doesn't perform any better after we add more layers. This may be due to how imbalanced our dataset is: we have nearly five times more images without fire than with fire.

In [18]:
print("# of images w/ fire\t", len(df[df["label"] == "1"]))
print("# of images w/o fire\t", len(df[df["label"] == "0"]))

# of images w/ fire	 110
# of images w/o fire	 541


### Balancing the dataset with Up-sampling
*Up-sampling is the process of randomly duplicating observations from the minority class in order to reinforce its signal.*

Source: https://elitedatascience.com/imbalanced-classes

In [19]:
from sklearn.utils import resample

# Upsample minority class
df_fire_upsampled = resample(
    df_fire, 
    replace=True,     # sample with replacement
    n_samples=541,    # to match majority class
    random_state=123  # reproducible results
) 

df_upsampled = df_normal.append(df_fire_upsampled)

df_upsampled["label"].value_counts()

0    541
1    541
Name: label, dtype: int64

In [21]:
up_X_train, up_X_test, up_y_train, up_y_test = train_test_split(
    df_upsampled.drop(columns=['label']), df_upsampled['label'],
    test_size=0.25, random_state=0
)

up_X_train['label'] = pd.Series((int(y) for y in up_y_train), index=up_X_train.index)
up_X_test['label'] = pd.Series((int(y) for y in up_y_test), index=up_X_test.index)

print("upsampled X train\t", up_X_train.shape, "\tupsampled X test", up_X_test.shape)
print("upsampled y train\t", up_y_train.shape, "\tupsampled y test", up_y_test.shape)

upsampled X train	 (811, 3) 	upsampled X test (271, 3)
upsampled y train	 (811,) 	upsampled y test (271,)


In [35]:
model_up = Sequential()

# model.add(Conv2D(32, kernel_size=(3,3), input_shape=(1024, 1024, 3), activation="relu"))
model_up.add(Conv2D(32, kernel_size=(3,3), input_shape=(1024, 1024, 3)))
model_up.add(BatchNormalization())
model_up.add(Activation("relu"))
model_up.add(MaxPooling2D(pool_size=(3,3)))

model_up.add(Conv2D(32, activation="relu", kernel_size=(3,3)))
model_up.add(MaxPooling2D(pool_size=(2,2)))

# model.add(Conv2D(32, activation="relu", kernel_size=(3,3)))
# model.add(MaxPooling2D(pool_size=(3,3)))

model_up.add(Dropout(0.5))
model_up.add(Flatten())

model_up.add(Dense(64, activation="relu"))

model_up.add(Dense(1, activation='sigmoid'))

In [36]:
model_up.compile(
    optimizer="adam", 
    loss='binary_crossentropy', 
    metrics=["accuracy"]
)

model_up.fit_generator(
    generator=data_gen(up_X_train, batch_size=batch_size), 
    steps_per_epoch=len(up_X_train)//batch_size, 
    epochs=epochs,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fbec36912b0>

In [38]:
loss, accuracy = model_up.evaluate_generator(
    generator=data_gen(up_X_test, batch_size), 
    steps=len(up_X_test)//batch_size, verbose=0
)
print("Accuracy = {:.5f}".format(accuracy))

Accuracy = 0.91111


---
### Scratch

#### not working w/ modified `fit_generator`

In [None]:
#@title
baseline.compile(
    optimizer="adam", 
    loss='binary_crossentropy', 
    metrics=["accuracy"]
)

baseline.fit_generator(
    generator=data_gen_2(X_train, y_train, batch_size=batch_size), 
    steps_per_epoch=len(X_train)//batch_size, 
    epochs=epochs,
    verbose=1
)

In [None]:
#@title
loss, accuracy = baseline.evaluate_generator(
    generator=data_gen(X_test, y_test, batch_size), 
    steps=len(X_test)//batch_size, verbose=0
)
print("Accuracy = {:.5f}".format(accuracy))

#### Modified `fit_generator`

In [None]:
#@title
from PIL import Image
from keras.preprocessing.image import img_to_array

def data_gen_2(X, y, batch_size):
    print(X)
    while True:
    # for i in range(5):
        x_batch = np.zeros((batch_size, img_size, img_size, 3))
        y_batch = np.zeros((batch_size, 1))
        for j in range(len(X)//batch_size):
            b = 0
            for m, k in zip(range(len(X[j*batch_size:(j+1)*batch_size])), y[j*batch_size:(j+1)*batch_size]):
                # print(k, 'dataset/{}/{}'.format(X["folder"].iloc[m], X["filename"].iloc[m]))
                img = Image.open('dataset/{}/{}'.format(X["folder"].iloc[m], X["filename"].iloc[m])).convert("RGB")
                image_red = img.resize((img_size, img_size))
                x_batch[b] = img_to_array(image_red)
                y_batch[b] = k
                b += 1
            yield (x_batch, y_batch)

#### git clone dataset

In [None]:
!git clone https://github.com/TasfiaAddrita/DS-2.2-Deep-Learning.git

In [None]:
!mv DS-2.2-Deep-Learning/final-project/dataset/ .

In [None]:
!rm -rf DS-2.2-Deep-Learning/