## Making Keras Model

think about hyper parameters like Filter size, number of filters, which type of padding to use, which activatioon functions to use etc.

## Libraries

In [1]:
#                                                   LIBRARIES
import os

# Image preprocesssing- processing
import cv2
from PIL import Image
import numpy as np

# Neural Network libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout

In [2]:
CSV_INPUT_FOLDER = ".." + os.sep + ".." + os.sep + "csv" + os.sep
TRAIN_CSV = CSV_INPUT_FOLDER + "train_list.csv"
TEST_CSV = CSV_INPUT_FOLDER + "test_list.csv"
VALIDATION_CSV = CSV_INPUT_FOLDER + "validation_list.csv"

In [3]:
CSV_INPUT_FOLDER = os.path.abspath(CSV_INPUT_FOLDER)
TRAIN_CSV = os.path.abspath(TRAIN_CSV)
TEST_CSV = os.path.abspath(TEST_CSV)
VALIDATION_CSV = os.path.abspath(VALIDATION_CSV)

In [4]:
def read_csvs(train_csv = TRAIN_CSV, test_csv = TEST_CSV, validation_csv = VALIDATION_CSV):
    train_data = []
    test_data = []
    validation_data = []

    # convert back to numpy array

    train = np.loadtxt(train_csv,dtype=str,
                                     delimiter=',', usecols=(0, 1), unpack=True)

    # np.random.shuffle(train)

    train_d, train_labels = train

    for data in train_d:
        data = ".." + os.sep + ".." + data
        img=cv2.imread(data)

        # convert image to array
        img = np.array(img)

        img = Image.fromarray(img, 'RGB')

        train_data.append(np.array(img))

    assert len(train_data) == len(train_labels)


    test_d, test_labels = np.loadtxt(test_csv,dtype=str,
                                     delimiter=',', usecols=(0, 1), unpack=True)
    for data in test_d:
        data = ".." + os.sep + ".." + data
        img=cv2.imread(data)

        # convert image to array
        img = np.array(img)

        img = Image.fromarray(img, 'RGB')

        test_data.append(np.array(img))

    validation_d, validation_labels = np.loadtxt(validation_csv,dtype=str,
                                     delimiter=',', usecols=(0, 1), unpack=True)
    for data in validation_d:
        data = ".." + os.sep + ".." + data
        img=cv2.imread(data)

        # convert image to array
        img = np.array(img)

        img = Image.fromarray(img, 'RGB')

        validation_data.append(np.array(img))

    train_labels = np.array(train_labels)
    test_labels = np.array(test_labels)
    validation_labels = np.array(validation_labels)

    train_data=np.array(train_data)
    test_data=np.array(test_data)
    validation_data=np.array(validation_data)

    return train_data, train_labels, test_data, test_labels, validation_data, validation_labels

train_data, train_labels, test_data, test_labels, validation_data, val_labels = read_csvs(TRAIN_CSV, TEST_CSV, VALIDATION_CSV)

In [5]:
num_classes = 3
train_data, train_labels, test_data, test_labels, validation_data, val_labels = read_csvs(TRAIN_CSV, TEST_CSV, VALIDATION_CSV)

In [6]:
def transform_names_into_numbers(train_labels_name, test_labels_name, validation_labels_name):
    train_labels = test_labels = validation_labels = []
    # train_labels
    for idx,label in enumerate(train_labels_name):
        if label == 'Bear':
            train_labels.append(0)
        elif label == 'Chicken':
            train_labels.append(1)
        elif label == 'Turtle':
            train_labels.append(2)

    # test_labels
    for idx,label in enumerate(test_labels_name):
        if label == 'Bear':
            test_labels.append(0)
        elif label == 'Chicken':
            test_labels.append(1)
        elif label == 'Turtle':
            test_labels.append(2)

    # validation_labels
    for idx,label in enumerate(validation_labels_name):
        if label == 'Bear':
            validation_labels.append(0)
        elif label == 'Chicken':
            validation_labels.append(1)
        elif label == 'Turtle':
            validation_labels.append(2)

    train_labels = np.array(train_labels)
    test_labels = np.array(test_labels)
    validation_labels = np.array(validation_labels)

    return train_labels, test_labels, validation_labels

In [7]:
# One hot encoding for labels
train_labels = keras.utils.to_categorical(train_labels,num_classes)
test_labels = keras.utils.to_categorical(test_labels,num_classes)
validation_labels = keras.utils.to_categorical(val_labels,num_classes)

In [8]:
# import sequential model and all the required layers
def create_model():
    #make model
    model=Sequential()
    # Pairs of Conv2D layer and MaxPool2D Layer with increasing filter sizes ( 16,32 ,64). This helps to make image grow more in depthwise and become more flatten.
    # Maxpool: great as they optimize the training time

    # capas de calculo -> ir de menor a mayor, suelen ser potencias / multiplos de 2 (las layers)

    model.add(Conv2D(filters=64,kernel_size=3,padding="same",activation="relu",input_shape=(200,200,3))) # parte imp = relu
    model.add(MaxPooling2D(pool_size=2))

    model.add(Conv2D(filters=128,kernel_size=3,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))

    model.add(Conv2D(filters=256,kernel_size=3,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=4))

    model.add(Dropout(0.2))
    model.add(Flatten()) # without flatten output shape = (50, 50, 6) -> flatten = (None, 6) which we need to get layer output


    # capas de clasificacion

    model.add(Dense(200,activation="relu"))

    # Final dense layer with num_classes nodes = categories of animals we have in the set
    # Softmax activation is used to give scores to these categories which lie between 0 and 1.
    model.add(Dense(num_classes,activation="softmax"))
    model.summary()

    # compile the model
    # We use loss function as categorical_crossentropy and Adam optimizer

    # if binary data -> loss = Binary Cross Entropy and activation = sigmoid
    model.compile(loss='categorical_crossentropy', optimizer='adam',
                      metrics=['accuracy'])

    return model

In [9]:
model = create_model()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 200, 200, 64)      1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 100, 100, 64)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 100, 100, 128)     73856     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 50, 50, 128)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 50, 50, 256)       295168    
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 12, 12, 256)      0

In [None]:
# training
# batch potencias de 2, a mayor bacths mayor estabilidad del grandiente
model.fit(train_data,train_labels,batch_size=32 ,epochs=15,verbose=1)

Epoch 1/15


In [None]:
# test the model
score = model.evaluate(test_data, test_labels, verbose=1)
print('\n', 'Test accuracy:', score[1])

In [None]:
# save the model

'''# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")'''

model.save('../models/model_200x200.h5') # Saves the entire model to a single artifact


In [None]:
def convert_to_array(img):
    im = cv2.imread(img)
    img = Image.fromarray(im, 'RGB')
    return np.array(img)

def get_animal_name(label):
    if label==0:
        return "Bear"
    if label==1:
        return "Chicken"
    if label==2:
        return "Turtle"

def predict_animal(file):
    print("Predicting .................................")
    # ar = convert_to_array(file)
    file = file/255
    a = []
    a.append(file)
    a = np.array(a)
    score = model.predict(a,verbose=1)
    label_index=np.argmax(score)
    acc=np.max(score)
    animal=get_animal_name(label_index)
    print("The predicted Animal is a "+animal+" with accuracy =    "+str(acc))

    return label_index, animal

In [None]:
def validation(val_data = validation_data, val_lab = val_labels):
    # function not finished and not working
    good_predicted = 0
    bad_predicted = 0
    for idx, data in enumerate(val_data):
        label_index, animal = predict_animal(data)
        if label_index == val_lab[idx]:
            good_predicted += 1
        else:
            bad_predicted += 1

    print("Good predicted: " + str(good_predicted))
    print("Bad predicted: " + str(bad_predicted))

# validation(validation_data, val_labels)

In [109]:
# predict image
file = "../datasets/animals/data_resized/Chicken/60.jpg"
file = cv2.imread(file)
predict_animal(file)

Predicting .................................
The predicted Animal is a Bear with accuracy =    0.38710973


(0, 'Bear')

In [None]:
# tensor board -> api to see in real time evolution of the model (from tensorflow)