In [155]:
from os import listdir, remove
from os.path import isfile, join
from PIL import Image
import numpy as np
import pandas as pd
import os
import cv2
import collections
from tensorflow.keras import Sequential
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator, ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Flatten
from sklearn.model_selection import train_test_split

In [143]:
# GENERAL VARS
img_shape = (150, 150)

In [144]:
def read_folders_color(path, start=0, end=100):
    elems = {}
    for file_name in listdir(path):
        file_path = join(path, file_name)
        if not isfile(file_path):
            print(f'... {file_name} ...')
            elems[file_name] = read_paths_cv2_color(file_path, start, end)
    return elems

def read_paths_cv2_color(path, start=0, end=100):
    animals = []
    limit = start
    for file_name in listdir(path):
        file_path = join(path, file_name)
        if limit >= end:
            break
        if not isfile(file_path):
            print(f'WARN - ignoring {file_name}')
            continue

        try:
            img = img = cv2.imread(file_path)
            img_resized = cv2.resize(img, img_shape, interpolation=cv2.INTER_AREA)
            if any('' in x for x in img_resized):
                print(f'ERROR - {file_path} - empty content')
            animals.append(img_resized)
        except:
            print(f'ERROR - {file_path}')
        limit += 1

    return animals

In [145]:
print('start reading imgs....')
elems = read_folders_color('data/img_train/', end=1000)
print('end reading imgs')
print('data transformed!')

start reading imgs....
... cat ...
ERROR - data/img_train/cat/.DS_Store
... butterfly ...
ERROR - data/img_train/butterfly/.DS_Store
... dog ...
ERROR - data/img_train/dog/.DS_Store
... sheep ...
ERROR - data/img_train/sheep/.DS_Store
... spider ...
ERROR - data/img_train/spider/.DS_Store
... chicken ...
ERROR - data/img_train/chicken/.DS_Store
... horse ...
ERROR - data/img_train/horse/.DS_Store
... squirrel ...
ERROR - data/img_train/squirrel/.DS_Store
... cow ...
ERROR - data/img_train/cow/.DS_Store
... elephant ...
ERROR - data/img_train/elephant/.DS_Store
end reading imgs
data transformed!


In [146]:
# define a dict with animal - number
animal_names = {
    'butterfly': 0,
    'cat': 1,
    'chicken': 2,
    'cow': 3,
    'dog': 4,
    'elephant': 5,
    'horse': 6,
    'sheep': 7,
    'spider': 8,
    'squirrel': 9
}

In [147]:
# create an array with all data to get the train - test
full_array = []
targets = []
for k,v in elems.items():
    subset = []
    for subv in v:
        subset.append(subv / 255)
    full_array = full_array + subset.copy()
    targets = targets + ([animal_names[k]] * len(v))
print('transformed to 0..1')
full_array = np.asarray(full_array)
targets = np.asarray(targets)
print('saved')

In [148]:
X_train, X_test, y_train, y_test = train_test_split(full_array, targets, test_size=0.2, random_state=73)

In [149]:
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(img_shape[0], img_shape[1], 3)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(10, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss=SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [150]:
#model.fit(X_train, y_train, epochs=6)
model.load_weights('data/checkpoints/animals_model')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x150de0b90>

In [None]:
# save the model
model.save_weights('data/checkpoints/animals_model')

In [196]:
##################
# STARTING TESTS #
##################
print('start reading imgs....')
test_elems = read_paths_cv2_color('data/img_test/cat', start=100, end=110)
print('end reading imgs')

test_array = []
for elem in test_elems:
    test_array.append(elem / 255)

test_array = np.asarray(test_array)
print('data transformed!')

start reading imgs....
end reading imgs
data transformed!


In [217]:
for pos in range(len(test_array)):
    #print(np.asarray(test).shape)
    counts = np.bincount(model.predict_classes(np.asarray([test_array[pos]])))
    mx = np.argmax(counts)
    print(f'seems to be... {mx} with a {counts[mx] / sum(counts) * 100}%')

seems to be... 1 with a 100.0%
seems to be... 1 with a 100.0%
seems to be... 2 with a 100.0%
seems to be... 9 with a 100.0%
seems to be... 7 with a 100.0%
seems to be... 8 with a 100.0%
seems to be... 1 with a 100.0%
seems to be... 1 with a 100.0%
seems to be... 1 with a 100.0%
seems to be... 8 with a 100.0%


In [215]:
#collections.Counter(model.predict_classes(np.asarray(test_array[0])))
counts = np.bincount(model.predict_classes(np.asarray([test_array[7]])))
mx = np.argmax(counts)
print(f'seems to be... {mx} with a {counts[mx] / sum(counts) * 100}%')

seems to be... 1 with a 100.0%


In [189]:
counts[mx] / sum(counts)

0.8948948948948949

# OPTION 2

In [None]:
##################
# OPTION 2
# rebase pixels from 0 - 255 to 0 - 1 vals

In [None]:
train_image_generator = ImageDataGenerator(rescale=1./255)
validation_image_generator = ImageDataGenerator(rescale=1./255)

In [None]:
train_dir = 'data/img_train/'
validation_dir = 'data/img_test'
train_data_gen = train_image_generator.flow_from_directory(batch_size=10000,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(150, 150),
                                                           class_mode='binary')

val_data_gen = validation_image_generator.flow_from_directory(batch_size=1000,
                                                              directory=validation_dir,
                                                              target_size=(150, 150),
                                                              class_mode='binary')

In [None]:
# create model
# Conv2D 
# test 2
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(10, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
history = model.fit_generator(
    train_data_gen,
    steps_per_epoch=10000,
    epochs=15,
    validation_data=val_data_gen,
    validation_steps=1000
)