In [None]:
import os
import random
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import plot_model
import pandas as pd
import tqdm
import numpy as np
import json
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import warnings
from PIL import Image
from plotly import tools
%matplotlib inline 
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly
import datetime
import timeit
import time
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
warnings.filterwarnings('ignore')
from progressbar import ProgressBar
from tensorflow.keras.preprocessing.image import ImageDataGenerator
%pylab inline

In [None]:
DIR_TEST = 'D:/Cours/4_IABD/Data/herbarium-2020_data/nybg2020/test/'
DIR_TRAIN = 'D:/Cours/4_IABD/Data/herbarium-2020_data/nybg2020/train/'
META_DATA_TRAIN = DIR_TRAIN+'metadata.json'
META_DATA_TEST = DIR_TRAIN+'metadata.json'

In [None]:
def get_run_time():
    
    return timeit.default_timer()

def convert_run_time(start, stop):
    time_exec = stop - start
    return str(datetime.timedelta(seconds=time_exec))

In [None]:
with open(META_DATA_TRAIN, 'r') as json_file:
    data = json.load(json_file)
    for key in data:
        print("-",key, len(data[key]))

In [None]:
df_images = pd.DataFrame(data['images'])
df_categories = pd.DataFrame(data['categories'])
df_annotations = pd.DataFrame(data['annotations'])

In [None]:
assert len(df_annotations) == len(df_images)
df_images_annotations = pd.merge(df_images, df_annotations, left_on='id', right_on='image_id', how='right').drop('image_id', axis=1)
df_images_annotations = df_images_annotations.sort_values(['category_id'])

In [None]:
df_images_annotations.head(4)

In [None]:
def plot_samples(df, size=(22,22), nb_samples=20, cmap=False):
    
    plt.figure(figsize=size)
    for i in range(nb_samples):
        random_sample = df.iloc[np.random.random_integers(i,len(df) - 1)]
        
        if cmap:
            random_sample_img = Image.open(DIR_TRAIN+random_sample['file_name']).convert("L")
        else:
            random_sample_img = mpimg.imread(DIR_TRAIN+random_sample['file_name'])
            
        arr = np.asarray(random_sample_img)
        plt.subplot(5,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        
        if cmap:
            plt.imshow(arr, cmap='gray', vmin=0, vmax=255)
        else:
            plt.imshow(random_sample_img, cmap=plt.cm.binary)
            
        plt.xlabel("CTG_ID : "+str(random_sample['category_id']))
    plt.show()
    
plot_samples(df_images_annotations, size=(24,24), nb_samples=20, cmap="gray")

In [None]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df_images_annotations, test_size=0.2, random_state=0)
print(len(df_train.category_id.unique()), len(df_test.category_id.unique()))

In [None]:
def read_image(sample_path:str, resize_to=False) -> np.ndarray:
 
        
    im = Image.open(sample_path)
    if resize_to:
        im = im.resize(resize_to)
    im = np.array(im) / 255.0
    im = im.astype("float32")

    return im


In [None]:
def reshape_input(img: np.ndarray) -> np.ndarray:
    
    img.reshape(img.shape[0] * img.shape[1] * img.shape[2]) #
    return img

In [None]:
def images_datagen(df: pd.DataFrame, categorys:int,  DIR=False, batch_size=64, resize_to=False) -> (np.ndarray, np.ndarray):
    
    assert DIR != False
    array_imgs = []
    array_labels = []
    
    while True:
        for i in range(batch_size):
            index_random_sample = np.random.random_integers(i,len(df) - 1) # get random row
            array_imgs.append(reshape_input(read_image(DIR+df.iloc[index_random_sample]['file_name'], resize_to=resize_to)))
            array_labels.append(df.iloc[index_random_sample]['category_id'])
        
        yield np.array(array_imgs), tf.keras.utils.to_categorical(array_labels, num_classes=categorys)
        

In [None]:
def hyper_params(df):
        
    modelParams = {"Layers":[
#                     {"neurons":1 , "activation":"tanh", "dropOut":False},
                    {"neurons":len(df.category_id.unique()), "activation":"softmax", "dropOut":False}
                    ],
                   "loss":"categorical_crossentropy",
                   "optimizer":"SGD(lr=0.001)",
                   "metrics":"accuracy",
                   "learningRate":.001,
                   "epochs":1,
                   "batchSize":2000,
                   "testName":"Simple Linear Model"
                 }
    return modelParams

In [None]:
def create_model(params):
    
    model = Sequential()
    model.add(Flatten(input_shape=(64,64,3)))
    for layers in params['Layers']:
        if layers['dropOut'] == False:
            model.add(Dense(layers['neurons'], activation=layers['activation']))
        else:
            model.add(Dense(layers['neurons'], activation=layers['activation']))
            model.add(Dropout(layers['dropOut']))
    
    opt = tf.keras.optimizers.SGD(learning_rate=0.1)
    model.compile(loss=params['loss'],
                   optimizer=opt,
                   metrics=['accuracy'])
    
    model.summary()
    return model

In [None]:
def fit_model_gen(model, params, nb_classes,  DIR_NAME, df_train, df_test):
    
#     start_time = time.time()
#     tensorboard = TensorBoard(log_dir='D:\logsProject\{}'.format(logName()))
    ################################ CALCULATE RUNTIME - START ###############################
    history = model.fit(images_datagen(df_train, nb_classes, DIR=DIR_NAME, resize_to=(64,64)),
                       epochs=params['epochs'],
                       steps_per_epoch=params['batchSize'], verbose=1, validation_data=images_datagen(df_test, nb_classes, DIR=DIR_NAME, resize_to=(64,64)), validation_steps=40)
    ################################ CALCULATE RUNTIME - END ###############################

    return model, history

In [None]:
params = hyper_params(df_images_annotations)
model = create_model(params)

In [None]:
start = get_run_time()
model, history = fit_model_gen(model, params, len(df_images_annotations.category_id.unique()), DIR_TRAIN, df_train, df_test)
stop = get_run_time()
print(convert_run_time(start, stop))