In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import cv2
print(tf.__version__)
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


## Folders Setup

In [None]:
path = os.getcwd()
data = os.path.join(path,'data')

def folder_setup():
    """ works for linux only

    """
    df = pd.read_csv("train.csv")
    classes = df["label"].drop_duplicates().values.tolist()
    os.mkdir(os.path.join(path,'data'))
    
    # create classes folder
    for classe in classes :
        os.mkdir(os.path.join(path,'data',classe))

    # copy the image in the right classes folder
    for index, values in df.iterrows():
        os.system(f"cp {os.path.join(path,values[1])} {os.path.join(path,'data',values[2],values[1].split('/')[1])}")  

In [None]:
#folder_setup()

## Classes and weights

In [None]:
class_weight = {}
total = 0
for index,c in enumerate(sorted(os.listdir(data))):
    weight = len(os.listdir(os.path.join(data,c)))
    class_weight[index] = weight
    total += weight

class_weight = dict(map(lambda k_v:(k_v[0],1-k_v[1]/total),class_weight.items()))
classes = sorted(os.listdir(data))
classes

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create the ImageDataGenerator object
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
)

val_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
) 


## Create the dataset for training and validation

In [None]:
img_size = (256,256)
batch_size = 16

train_ds = keras.preprocessing.image_dataset_from_directory(
    data,
    labels="inferred",
    label_mode="categorical",
    color_mode="rgb",
    image_size=img_size,
    batch_size=batch_size,
    validation_split=0.15,
    seed=3777,
    subset="training").prefetch(buffer_size=batch_size)

val_ds = keras.preprocessing.image_dataset_from_directory(
    data,
    labels="inferred",
    label_mode="categorical",
    color_mode="rgb",
    image_size=img_size,
    batch_size=batch_size,
    validation_split=0.85,
    seed=3777,
    subset="validation").prefetch(buffer_size=batch_size)


## Data augmentation

In [None]:
from tensorflow.keras import layers
#data_augmentation = keras.Sequential(
#    [
#        layers.RandomFlip("horizontal"),
#        layers.RandomRotation(0.1),
#    ]
#)

## Display some images

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.axis("off")

## Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D , Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D

### To keep the same shape everywhere

In [None]:
keras.backend.set_image_data_format('channels_last')

### Transfer learning with EfficientNetB4 

In [None]:
from tensorflow.keras.applications import EfficientNetB4
input_shape = (img_size[0],img_size[0],3)

# get efficientNetB4 layer
efficientNetB4 = EfficientNetB4(weights='imagenet', input_shape=input_shape, include_top=False)

# We dont want to change the wait from efficientNetB4
efficientNetB4.trainable = False


model = Sequential([
    efficientNetB4,
    GlobalAveragePooling2D(),
    Dense(19, activation="softmax")
])

### Simple model without any transfer learning

In [None]:
Sequential([
    Conv2D(32, (3,3), padding='same', input_shape=(img_size[0],img_size[0],3), activation="relu"),
    Conv2D(32, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPool2D(pool_size=(2,2)),

    Conv2D(64, (3,3), padding='same', activation="relu"),
    Conv2D(64, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPool2D(pool_size=(2,2)),

    Conv2D(128, (3,3), padding='same', activation="relu"),
    Conv2D(128, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPool2D(pool_size=(2,2)),

    Conv2D(256, (3,3), padding='same', activation="relu"),
    Conv2D(256, (3,3), activation="relu"),
    MaxPool2D(pool_size=(2,2)),

    Conv2D(512, (3,3), padding='same', activation="relu"),
    Conv2D(512, (3,3), activation="relu"),
    
    Flatten(),
    Dense(256,activation="relu"),
    Dropout(0.2),
    Dense(19, activation="softmax")
])

### Display model summary

In [None]:
model.summary()

### Callbacks

#### Learning rates

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(
            monitor = "val_accuracy",
            factor=np.sqrt(0.1),
            patience=10,
            min_lr=0.5e-6)

#### Checkpoint and earlystopping

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='model.hdf5', verbose=1, save_best_only=True)
early_stoping = EarlyStopping(
    monitor="val_accuracy",
    patience=15,
    restore_best_weights=True,
)


### Model compilation and training

In [None]:
model.compile(
    optimizer="adam", 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

In [None]:
history = model.fit(train_ds,
               epochs=150,
               validation_data=val_ds,
               callbacks=[reduce_lr,checkpointer, early_stoping],
               class_weight=class_weight)

## Data visualisation

### Some graphs of the history of training

In [None]:
def plot_history(history):
    """
    Plot the loss & accuracy
    """
    plt.figure(figsize=[10,10])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    
    plt.figure(figsize=[10,10])
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

plot_history(history)

### Make a random prediction

In [None]:
def do_prediction():
    # prediction
    model.load_weights('model.hdf5')
    from random import randint
    name_img = os.listdir(os.path.join(path,'test'))[randint(0,387)]
    random_path_img = os.path.join(path,'test',name_img)
    image_array = cv2.imread(random_path_img, cv2.IMREAD_COLOR) # Loading the image
    image_array = cv2.resize(image_array, (img_size[0], img_size[0])) # Resize the image
    #image = cv2.imread(random_path_img)
    #cv2.imshow('image',image)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    scores = model.predict(np.array([image_array]))
    classes = sorted(os.listdir(data))
    print(f'{name_img} -> {classes[scores.argmax(axis=-1)[0]]} {scores.max()}\n\n')
    for c,p in zip(classes,scores[0]):
        print(f'{c} prediction : {p}')





### Write output file test_prediction.csv

In [None]:
def test_predictions():
    model = keras.models.load_model('model.hdf5')
    with open("test_predictions.csv","w") as file :
        images = []
        test_dir = os.listdir(os.path.join(path,'test'))
        file.write("index,path,pred\n")
        for img in test_dir:
            image_array = cv2.imread(os.path.join(path,'test',img), cv2.IMREAD_COLOR) # Loading the image
            images.append(cv2.resize(image_array, (img_size[0], img_size[0]))) # Resize the image
        scores = model.predict(np.array(images))
        for s,img in zip(scores.argmax(axis=-1),test_dir) :
            classe = classes[s]
            file.write(f"{img.split('.')[0]},train/{img},{classe}\n")

        
test_predictions()
