In [None]:
#Imort needed packages

import datetime
import pathlib
import yaml

from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout

from PIL import Image

# Load constantes

In [None]:
with open("config.yaml",'r') as config_file:
    config = yaml.safe_load(config_file)
    IMAGE_WIDTH = config["image_width"]
    IMAGE_HEIGHT = config["image_height"]
    IMAGE_DEPTH = config["image_depth"]
    DATA_DIR= pathlib.Path(config["data_dir"])
    MODELS_DIR = pathlib.Path(config["models_dir"])
    TARGET_NAME= config["target_name"]
    DATA_TRAIN_FILE= config["data_train_file"]
    DATA_TEST_FILE= config["data_test_file"]

# Functions

In [None]:
def build_image_database(path,target):
    """    Build a pandas dataframe with target class and access path to images.

    Parameters:
        - path (Path): Path pattern to read csv file containing images information
        - target(str): The second column to extract from the file

    Return:
        A pandas dataframe,
    -------
    """
    #Load file
    _df= pd.read_csv(path,
            names=["all"],
        )
    #Recover data
    _df["image_id"]=_df["all"].apply(lambda x: x.split(' ')[0])
    _df[target]=_df["all"].apply(lambda x: ' '.join(x.split(' ')[1:]))
    _df[target].unique()

    #Create path
    _df["path"]= _df['image_id'].apply( lambda x: DATA_DIR/"images"/(x+'.jpg')) 
    
    return _df.drop(columns=["all"])

In [None]:
def build_classification_model(df: pd.DataFrame,target: str, images: str):
    """Build a tensorflow model using information from target and images columns in dataframes
    Parameters
    ----------
        df (pandas.dataFrame): dataframe with target and images columns
        target (str): column name for target variable
        images (str): column name for images
    Returns
    ------
    tensorflow model built & compiled
    """
    
    #Compute number of classes for output layer
    nb_classes = df[target].nunique()
    
    # Computer images size for input layer
    size = df[images].iloc[0].shape
    
    # Building the model
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=size))
    model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.25))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.25))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(rate=0.5))
    model.add(Dense(nb_classes , activation='softmax'))

    #Compilation of the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #output layer of nb_classes
    
    return model

In [None]:
def show_image(df,row,target):
    """show the image in the ligne row and the associated target column

    Args:
        df (pandas.dataFrame): the dataframe of images
        row (int): the index of the row
        target (string): the column name of the associated label
    Return
    ------
    None
    """

    assert target in df.columns, f"Column {target} not found in dataframe"
    assert 'path' in df.columns, f"Column path doens't not exit in dataframe"
    _img = plt.imread(df.loc[row,'path'])
    plt.imshow(_img)
    return

In [None]:
def load_resize_image(path,height,width):
    """Load an image and resize it to the target size

    Parameters:
        - path (Path): path to the file to load and resize
        - height (int): the height of the final resized image
        - width(int): the width of the resized image 
    Return
    ------
    numpy.array containing resized image
    """
    return np.array(Image.open(path).resize((width,height)))

In [None]:
def build_x_and_y(df: pd.DataFrame, target: str, images: str):
    """build x tensor and y tensor for model fitting.
    parameters
    ----------
    df(pd.DataFrame): dataframe 
    target(str): name of target column
    images (str): name of resized images column
        
    Returns
    -------
    
    x (numpy.array): tensor of x values
    y (numpy.array): tensor of y values
    """
    
    x= np.array(df[images].to_list())
    y=tf.keras.utils.to_categorical(df[target].astype('category').cat.codes)
    return x,y

In [None]:
def classify_images(images,model,classes_names=None):
    """Classify images through a tensorflow model.
    
    Parameters:
    -----------
    images(np.array): set of images to classify
    model (tensorflow.keras.Model): tensorflow/keras model
    
    Returns
    -------
    predicted classes 
    
    """
    results = model.predict(images)
    classes = np.argmax(results,axis=1)
    if classes_names is not None:
        classes = np.array(classes_names[classes])
    return classes

In [None]:
def save_model(model ,saving_dir=MODELS_DIR,basename=TARGET_NAME,append_time=False):
    """Save tf/Keras model in saving_dir folder

    Parameters
    ----------
    model (tf/Keras model): model to be saved
    saving_dir (path): location to save model file
    basename (str): the basename of the model
    append_time (bool): indicate if the time will be append to the basename
    """
    model_name = f"{basename}{'_' + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if append_time   else ''}"
    model.save(f"{saving_dir}/neural_networks/{model_name}.h5")
    return model_name

# Read train & test file

In [None]:
train_df = build_image_database(DATA_DIR/DATA_TRAIN_FILE,TARGET_NAME)
test_df = build_image_database(DATA_DIR/DATA_TEST_FILE,TARGET_NAME)

In [None]:
# Previous the dataframe 
train_df.head()

In [None]:
test_df.head()

# View some images

In [None]:
show_image(train_df, np.random.randint(0,train_df.shape[0]), TARGET_NAME)

In [None]:
show_image(test_df,np.random.randint(0,test_df.shape[0]),TARGET_NAME)

# Resize Images

In [None]:
#Resize train images
train_df['resized_image'] = train_df.apply(
        lambda r: load_resize_image(r['path'],IMAGE_HEIGHT,IMAGE_WIDTH),
        axis=1)
#Resize test images
test_df['resized_image'] = test_df.apply(
    lambda r: load_resize_image(r['path'],IMAGE_HEIGHT,IMAGE_WIDTH),
    axis=1)

# Split dataset into x and y

In [None]:
X_train,y_train = build_x_and_y(train_df,TARGET_NAME,'resized_image')
X_test,y_test = build_x_and_y(test_df,TARGET_NAME,'resized_image')

# Build & train the model

In [None]:
model = build_classification_model(train_df,TARGET_NAME,"resized_image")

In [None]:
%load_ext tensorboard

!rm -rf ./logs
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
%%time 
epochs = 5
history = model.fit(X_train,y_train,batch_size = 32,epochs = epochs , validation_data = (X_test,y_test),
                   callbacks=[tensorboard_callback]
                   )

In [None]:
%tensorboard  --logdir  logs/fit 

# Predict from the model

In [None]:
classes_names = train_df[TARGET_NAME].astype('category').cat.categories

In [None]:
classify_images(X_test[10:20],model,classes_names)

# Save the model

In [None]:
model_name = save_model(model,MODELS_DIR)

In [None]:
with open(MODELS_DIR/"classes"/f"{model_name}.yaml","w") as classe_file:
    yaml.dump(list(classes_names),classe_file)