In [166]:
import pandas as pd 
import os

from sklearn.model_selection import train_test_split

import cv2
from PIL import Image

from tqdm import tqdm

### Read data in & create train/test splits

In [167]:
# get pathways 

dir_path = "../data/classification_images/"
success_uids = os.listdir(dir_path)

df = pd.read_csv('../data/building_data.csv')

In [168]:
df['label'] = df['classification'].map({'no-damage': 0, 'minor-damage': 1, 'major-damage': 2, 'destroyed': 3, 'un-classfied': 4})

# These are the class labels we expect the CNN to be able to recognize
df['classification'].value_counts()

no-damage        16068
minor-damage      2082
major-damage      1959
destroyed         1877
un-classified      598
Name: classification, dtype: int64

In [169]:
# create training/testing splits 
strat_train, strat_test = train_test_split(df, train_size=.8, stratify=df['classification'])
strat_train.reset_index(inplace=True)
strat_train['im'] = None
strat_test.reset_index(inplace=True)
strat_test['im'] = None

### Processing Data to correct shapes

In [170]:
import torch
import tensorflow.keras as keras
import numpy as np

# Checking if CUDA is available
flag_cuda = torch.cuda.is_available()

if not flag_cuda:
    print('Using CPU')
else:
    print('Using GPU')

Using CPU


In [171]:
for l in tqdm(range(0, len(strat_train))): 
    path_image = '../data/train/images/' + strat_train['image_name'][l].split(".json")[0] + ".png"
    img = cv2.imread(path_image)

    resize = cv2.resize(img, (34, 34), interpolation = cv2.INTER_AREA)
    # resize = [item for sublist in resize for item in sublist]
    img = resize / 255
    strat_train['im'][l] = img

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strat_train['im'][l] = img
100%|██████████| 18067/18067 [03:50<00:00, 78.50it/s]


In [172]:
print(strat_train['im'][0].shape)
type(strat_train['im'][0])


(34, 34, 3)


numpy.ndarray

In [173]:
x_train = strat_train['im']
x_train = np.concatenate(x_train).reshape(len(strat_train), 34, 34, 3)
print("This is the training data shape: ")
x_train.shape

This is the training data shape: 


(18067, 34, 34, 3)

In [174]:
for l in tqdm(range(0, len(strat_test))): 
    path_image = '../data/train/images/' + strat_test['image_name'][l].split(".json")[0] + ".png"
    img = cv2.imread(path_image)

    resize = cv2.resize(img, (34, 34), interpolation = cv2.INTER_AREA)
    # resize = [item for sublist in resize for item in sublist]
    img = resize / 255
    strat_test['im'][l] = img

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strat_test['im'][l] = img
100%|██████████| 4517/4517 [00:56<00:00, 79.27it/s]


In [175]:
x_test = strat_test['im']
x_test = np.concatenate(x_test).reshape(len(strat_test), 34, 34, 3)
print("This is the training data shape: ")
x_test.shape

This is the training data shape: 


(4517, 34, 34, 3)

In [176]:
# Turn our scalar targets into binary categories
num_classes = 5
y_train = keras.utils.to_categorical(strat_train['label'], num_classes)
y_test = keras.utils.to_categorical(strat_test['label'], num_classes)

  return np.asarray(self._values, dtype)


## Creating the CNN

In [177]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense,
    Conv2D,
    MaxPool2D,
    Flatten,
    Dropout,
    BatchNormalization,
)

model = Sequential()
model.add(Conv2D(75, (3, 3), strides=1, padding="same", activation="relu", 
                 input_shape=(34, 34, 3)))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(50, (3, 3), strides=1, padding="same", activation="relu"))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Flatten())
model.add(Dense(units=512, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(units=num_classes, activation="softmax"))

In [178]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 34, 34, 75)        2100      
                                                                 
 batch_normalization_3 (Batc  (None, 34, 34, 75)       300       
 hNormalization)                                                 
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 17, 17, 75)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 17, 17, 50)        33800     
                                                                 
 dropout_2 (Dropout)         (None, 17, 17, 50)        0         
                                                                 
 batch_normalization_4 (Batc  (None, 17, 17, 50)      

In [184]:
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])

In [185]:
history = model.fit(x_train, y_train, epochs=20, verbose=1, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [191]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter( y=history.history['val_loss'], name="val_loss"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter( y=history.history['loss'], name="loss"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter( y=history.history['val_accuracy'], name="val accuracy"),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter( y=history.history['accuracy'], name="accuracy"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Loss/Accuracy of Sequential CNN Model"
)

# Set x-axis title
fig.update_xaxes(title_text="Epoch")

# Set y-axes titles
fig.update_yaxes(title_text="<b>primary</b> Loss", secondary_y=False)
fig.update_yaxes(title_text="<b>secondary</b> Accuracy", secondary_y=True)

fig.show()
