# First, set up the training images

## Choose train/test set

In [202]:
# choose set: balanced, imbalanced, all_tanks
set = "imbalanced"

## Resize, don't set to grayscale

In [None]:
from PIL import Image, ImageOps
import os
def resize_multiple_images(src_path, dst_path):
    # Here src_path is the location where images are saved.
    for filename in os.listdir(src_path):
        try:
            img=Image.open(src_path+filename)
            new_img = img.resize((64,64))
            if not os.path.exists(dst_path):
                os.makedirs(dst_path)
            new_img.save(dst_path+filename)
            # print('Resized, grayed, and saved {} successfully.'.format(filename))
        except:
            continue



src_path = "cnn_training_images/YES/"
dst_path = "cnn_training_images/FINAL/"
resize_multiple_images(src_path, dst_path)

src_path = "cnn_training_images/NO1/"
dst_path = "cnn_training_images/NO2/"
resize_multiple_images(src_path, dst_path)

# Now, input training data to array

In [254]:
# import images into X and Y arrays
import cv2
from PIL import Image
import os
import numpy as np
import re
def get_data(path, lab):
    all_images_as_array=[]
    label=[]
    for filename in os.listdir(path):
        if "jpg" in filename:
           if "_194" not in filename and "_195" not in filename:
                im = cv2.imread(path + filename)
                red = im[:,:,2]
                mean = np.mean(red)
                std = np.std(red)

                # for 2.5/97.5-percentile
                # if mean > 133.5 and mean < 236 and std > 9.6 and std < 34:
                # for 1/99-percentile
                # if mean > 127.57779687499999 and mean <  246.38062499999998 and std > 7.848734708324726 and std < 38.60616608163898: 
                # for 99.9 percentile
                if mean > 120.5 and mean < 249 and std > 5.5 and std < 40:

                    try:
                        img=cv2.imread(path + filename)
                        all_images_as_array.append(img)
                        label.append(lab)
                    except:
                        continue
    return np.array(all_images_as_array), np.array(label)


path_to_pos = "training_images/" + set + "/YES2/"
path_to_neg = "training_images/" + set + "/NO2/"


# input each class
X1 , y1 = get_data(path_to_pos, 1)
X0, y0 = get_data(path_to_neg, 0)

# combine classes
X = np.concatenate((X1, X0), axis = 0)
y = np.concatenate((y1, y0), axis = 0)


print(X.shape)
print(y.shape)

print("Mean of y: ", np.mean(y))
print("Y count_nonzero: ", np.count_nonzero(y))

X = X.astype('float32')
y = y.astype('float32')

print(X.dtype)

(2175, 64, 64, 3)
(2175,)
Mean of y:  0.12873563218390804
Y count_nonzero:  280
float32


In [257]:
# scale X
X = X/255

# Train the model

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [246]:
# Convolutional Neural Network https://machinelearningmastery.com/keras-functional-api-deep-learning/

# Convolutional Neural Network
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.models import Sequential

input_shape = (64, 64, 3)
nClasses = 2

def createModel():
    model = Sequential()
    # The first two layers with 32 filters of window size 3x3
    model.add(Conv2D(32, (3, 3), padding='same', activation='tanh', input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='tanh'))
    model.add(Conv2D(64, (3, 3), activation='tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='tanh'))
    model.add(Conv2D(64, (3, 3), activation='tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512, activation='tanh'))
    model.add(Dropout(0.5))
    model.add(Dense(nClasses, activation='sigmoid'))
    
    return model

model = createModel()
model.summary()

# plot_model(model, to_file='convolutional_neural_network.png')


Model: "sequential_31"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_92 (Conv2D)           (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_93 (Conv2D)           (None, 62, 62, 32)        9248      
_________________________________________________________________
max_pooling2d_45 (MaxPooling (None, 31, 31, 32)        0         
_________________________________________________________________
dropout_16 (Dropout)         (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_94 (Conv2D)           (None, 31, 31, 64)        18496     
_________________________________________________________________
conv2d_95 (Conv2D)           (None, 29, 29, 64)        36928     
_________________________________________________________________
max_pooling2d_46 (MaxPooling (None, 14, 14, 64)      

In [258]:
# train test split
from keras.utils import to_categorical
y_binary = to_categorical(y)

print(y_binary.shape)



X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.1, stratify=y_binary, random_state=0)
print(y_train.shape)
print(y_train)

(2175, 2)
(1957, 2)
[[1. 0.]
 [0. 1.]
 [1. 0.]
 ...
 [0. 1.]
 [1. 0.]
 [1. 0.]]


In [None]:
# computer class weights, define metrics
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced',
                                                 y_train)
print('Class weights: ',class_weights)

In [None]:
# Set training process params
batch_size = 256
epochs = 50

# Set the training configurations: optimizer, loss function, accuracy metrics
model.compile(optimizer='rmsprop', loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

class_weight = {0: .12, 1: .8}

history = model.fit(X_train,
                     y_train,
                     batch_size=batch_size, 
                     epochs=epochs, verbose=1, 
                     validation_data=(X_test, y_test),
                     class_weight = class_weight
          )




In [None]:
# Plot accuracy

import keras
from matplotlib import pyplot as plt
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Plot loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Check the model results on the test set
model1.evaluate(X_test, y_test)

In [201]:
history = model.fit(X_train, y_train, batch_size=5000, epochs=2, validation_data = (X_test, y_test),  verbose=1)

Train on 360 samples, validate on 91 samples
Epoch 1/2
Epoch 2/2


In [None]:
print(X_train)


In [None]:
# computer class weights, define metrics
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y),
                                                 y)
print('Class weights: ',class_weights)


import keras.backend as K

def f1_score(y_true, y_pred):

    # Count positive samples.
    c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
    c3 = K.sum(K.round(K.clip(y_true, 0, 1)))

    # If there are no true samples, fix the F1 score at 0.
    if c3 == 0:
        return 0

    # How many selected items are relevant?
    precision = c1 / c2

    # How many relevant items are selected?
    recall = c1 / c3

    # Calculate f1_score
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f1_score

# MLP

# input data to train array

In [None]:
# import images into X and Y arrays

nsamples, nx, ny = X.shape
X = X.reshape((nsamples, nx*ny))
print(X.shape)

In [None]:
####### split data into train and test sets
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## Resample train array

In [None]:
################ resample train array ###########
from collections import Counter
print("Y train array: ")
print(sorted(Counter(y_train).items()))

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

over = SMOTE(sampling_strategy = .5)
under = RandomUnderSampler(sampling_strategy=1)
X_train, y_train = over.fit_resample(X_train, y_train)
X_train, y_train = under.fit_resample(X_train, y_train)
print("Train data resampled.")

from collections import Counter
print("Y train array: ")
print(sorted(Counter(y_train).items()))

print("Y test array: ")
print(sorted(Counter(y_test).items()))

## Define model

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier

# define and fit scaler
scaler = preprocessing.StandardScaler()


################## define and run model itself, after you've found good params ########
mlp = MLPClassifier(hidden_layer_sizes= (100, ),
                    activation = 'relu',
                    solver = 'lbfgs',
                    alpha = 1e-5,
                    learning_rate = 'constant',
                    random_state = 0)

pipe = Pipeline(steps =[('scaler',scaler) , ('MLPClassifier', mlp)])



# Fit and test

In [None]:
# run it
cutoff = 0.5

pipe.fit(X_train, y_train)

# test model
# set threshold for positive output in the predict_proba line

from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score

y_true, y_pred = y_test, (pipe.predict_proba(X_test)[:,1] >= cutoff).astype(bool)

In [None]:
model = tf.keras.models.Sequential([
    
    # This is the first convolution
    
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(80, 80)),
    
    tf.keras.layers.MaxPooling2D(2,2)
    
    # Second
    
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    
    tf.keras.layers.MaxPooling2D(2,2),
    
    
    # Third
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    
    tf.keras.layers.MaxPooling2D(2,2),
    
    # Fourth
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    
    tf.keras.layers.MaxPooling2D(2,2),
    
    # Fifth
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    
    tf.keras.layers.MaxPooling2D(2,2),
    
    # Flatten the results to feed into a DNN
    
    tf.keras.layers.Flatten(),
    
    # 512 neuron hidden layer
    
    tf.keras.layers.Dense(512, activation='relu'),
    
    
    # Only 1 output neuron. It will contain a value from 0-1.
    
    tf.keras.layers.Dense(1, activation='sigmoid')
    