In [1]:
import matplotlib.pyplot as plt

from skimage import data
from skimage.color import rgb2gray
from skimage import io

import glob
import os
import pandas as pd
import numpy as np

from keras_preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#Import from keras_preprocessing not from keras.preprocessing
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
from tensorflow.keras import optimizers

%matplotlib inline

In [2]:
dataset_path=os.getcwd()+os.sep+"images"
imgs=glob.glob(dataset_path+os.sep+"*.jpg")
len(imgs)

637

## Goal #1 Create a pipeline to preprocesses all the images
1. convert images to grayscale
2. Normalize the images
3. Perform Data Augmentation
4. Image Standardization

## Create a image generator to apply augmentations to the imagery
- standardizes the image sizes 
- applies zooms, flips, rotations to the imagery

In [3]:

train_datagen=ImageDataGenerator(rescale=1./255.,
rotation_range=20,
zoom_range=0.05,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
horizontal_flip=True,
fill_mode="nearest",
validation_split=0.20
)

test_datagen=ImageDataGenerator(rescale=1./255.)


# Read in the  csv file and transform in df
1. read in the csv
2. one hot encode the sorted labels
3. Update the one hot encoded labels to strings

In [9]:
dataset_path=os.getcwd()+os.sep+"images"
labels=["good","bad"]
df =  pd.read_csv("main.csv")
print(df.columns)
if "index" in df.columns:
    print("Dropping column index")
    df.drop(['index'],axis=1,inplace=True)
if "Unnamed: 0"in df.columns:
    print("Dropping column Unnamed: 0")
    df.drop(['Unnamed: 0'],axis=1,inplace=True)
df

Index(['Unnamed: 0', 'Filename', 'Sorted'], dtype='object')
Dropping column Unnamed: 0


Unnamed: 0,Filename,Sorted
0,2009-12-01-18-36-33_L5_rgb.jpg,good
1,2017-10-11-18-52-32_L8_rgb.jpg,bad
2,2014-02-05-18-53-15_L8_rgb.jpg,good
3,2003-10-14-18-23-57_L5_rgb.jpg,good
4,2013-12-12-18-47-38_L8_rgb.jpg,good
...,...,...
646,2021-09-21-19-04-08_S2.jpg,bad
647,2021-09-26-19-04-09_S2.jpg,bad
648,2021-10-01-19-04-08_S2.jpg,bad
649,2021-10-11-19-04-07_S2.jpg,bad


In [10]:
### map each label(good or bad) to an integer
mapping = {}
for x in range(len(labels)):
  mapping[labels[x]] = x
# outputs {'good': 0, 'bad': 1}
print(mapping)
# integer representation
for x in range(len(df['Sorted'])):
  df['Sorted'][x] = mapping[df['Sorted'][x]]
print("\n",df['Sorted'])

{'good': 0, 'bad': 1}

 0      0
1      1
2      0
3      0
4      0
      ..
646    1
647    1
648    1
649    1
650    1
Name: Sorted, Length: 651, dtype: object


In [12]:

# Converted the sorted column to string otherwises data generator will not work
df["Sorted"]=df["Sorted"].astype(str)
# Split the dataframe into a train and test set into a .75 and .25 training and test set respectively
traindf=df.iloc[:500,:] # get the first 500 rows
testdf=df.iloc[500:,:] # get the  remaining 150 rows

# Get the x and y column names from the csv file
x_col_name=df.columns[0]
y_col_name=df.columns[1]

print(traindf)
print(testdf)

                                              Filename Sorted
0                       2009-12-01-18-36-33_L5_rgb.jpg      0
1                       2017-10-11-18-52-32_L8_rgb.jpg      1
2                       2014-02-05-18-53-15_L8_rgb.jpg      0
3                       2003-10-14-18-23-57_L5_rgb.jpg      0
4                       2013-12-12-18-47-38_L8_rgb.jpg      0
..                                                 ...    ...
495  2018-12-10-15-27-23_L8_2022-03-31__09_hr_48_mi...      0
496  2018-12-10-15-27-23_L8_2022-03-31__09_hr_49_mi...      1
497                         2018-12-10-18-46-21_L8.jpg      1
498                         2018-12-11-15-41-39_S2.jpg      0
499  2018-12-11-15-41-39_S2_2022-03-31__09_hr_49_mi...      1

[500 rows x 2 columns]
                                              Filename Sorted
500  2018-12-11-15-41-39_S2_2022-03-31__09_hr_50_mi...      0
501  2018-12-11-15-41-39_S2_2022-03-31__09_hr_51_mi...      0
502                         2018-12-11-18-36-0

## Flow the images from the dataframe
- resize the images from 934 x 294 to 900x294
- shuffle the images
- divide the data into a training and validation subset
- set a random seed
- set the column names to check 

1. TASK: investigate class_mode
2. TASK: modify the images target size

In [13]:
train_generator=train_datagen.flow_from_dataframe(
dataframe=traindf,
directory=dataset_path,
x_col=x_col_name, #image filenames
y_col=y_col_name,   # class names in this case good/bad
subset="training",
batch_size=37,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(900,250))


Found 400 validated image filenames belonging to 2 classes.


In [14]:
valid_generator=train_datagen.flow_from_dataframe(
dataframe=traindf,
directory=dataset_path,
x_col=x_col_name, #image filenames
y_col=y_col_name,   # class names in this case good/bad
subset="validation", #only difference is this
batch_size=37, #296/8=37
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(900,250))

Found 100 validated image filenames belonging to 2 classes.


In [15]:

test_generator=test_datagen.flow_from_dataframe(
dataframe=testdf,
directory="./images/",
x_col=x_col_name, #image filenames
y_col=None,
batch_size=14, #98/7=14
seed=42,
shuffle=False,  #shuffle must be false for validation dataset otherwise labels will not match
class_mode=None,
target_size=(900,250))

Found 151 validated image filenames.


## Create the Model
Create a simple Sequential Model

In [10]:
# def prepare_model():
#     model =  keras.Sequential()
#     model.add(Conv2D(32,kernel_size=(3,3), padding= 'same',activation='relu',input_shape=(900, 250, 3)))
#     model.add(Activation('relu'))
#     model.add(Conv2D(32, (3, 3)))
#     model.add(Activation('relu'))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))
#     model.add(Conv2D(64, (3, 3), padding='same'))
#     model.add(Activation('relu'))
#     model.add(Conv2D(64, (3, 3)))
#     model.add(Activation('relu'))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))
#     model.add(Flatten())
#     model.add(Dense(512))
#     model.add(Activation('relu'))
#     model.add(Dropout(0.5))
#     model.add(Dense(10, activation='softmax'))
#     model.compile(optimizers.RMSprop(learning_rate=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [16]:
def prepare_model():
    model = keras.Sequential()
    model.add(Conv2D(32,kernel_size=(3,3),activation='relu',input_shape=(900, 250, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, (3, 3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, (3, 3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(16, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))
    model.compile(loss="binary_crossentropy",optimizer="adam",metrics=['accuracy'])
    return model

In [17]:
model = prepare_model()
model.fit(train_generator,
                    validation_data = train_generator,
                    steps_per_epoch = train_generator.n//train_generator.batch_size,
                    validation_steps = valid_generator.n//valid_generator.batch_size,
                    epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2f2dbab93c8>

In [18]:
score = model.evaluate(valid_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.6200941205024719
Test accuracy: 0.8500000238418579


In [None]:
# Write the test scores to a file
import csv
csv_file_path=os.getcwd()+os.sep+"test_results"+os.sep+"modelScores.csv"
if not os.path.exists(csv_file_path):
    with open(csv_file_path, 'w', newline='') as outcsv:
        writer = csv.writer(outcsv)
        writer.writerow(["Accuracy", "Test Loss", "Model Description"])
elif os.path.exists(csv_file_path):
    with open(csv_file_path, 'a', newline='') as outcsv:
            writer = csv.writer(outcsv)
            writer.writerow([score[1], score[0], "redid entire model after research"])


In [None]:
# model =  keras.Sequential()
# model.add(Conv2D(32, (3, 3), padding='same',
#                  input_shape=(32,32,3)))
# model.add(Activation('relu'))
# model.add(Conv2D(32, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
# model.add(Conv2D(64, (3, 3), padding='same'))
# model.add(Activation('relu'))
# model.add(Conv2D(64, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
# model.add(Flatten())
# model.add(Dense(512))
# model.add(Activation('relu'))
# model.add(Dropout(0.5))
# model.add(Dense(10, activation='softmax'))
# model.compile(optimizers.RMSprop(learning_rate=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [None]:
# STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
# STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
# STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
# model.fit(train_generator,
#                     steps_per_epoch=STEP_SIZE_TRAIN,
#                     validation_data=valid_generator,
#                     validation_steps=STEP_SIZE_VALID,
#                     epochs=10
# )