In [None]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
from sklearn.utils import class_weight

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, Callback
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from classification_models.tfkeras import Classifiers
import efficientnet.tfkeras as efn

In [None]:
import os
import numpy as np 
import pandas as pd 
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
import time
from sklearn.utils import class_weight
from tensorflow.keras.metrics import AUC

from efficientnet.tfkeras import EfficientNetB3
from efficientnet.tfkeras import EfficientNetB2
#from tensorflow.python.keras.applications import InceptionV3

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
# avoid tensorflow to get all gpu memory
physical_devices = tf.config.list_physical_devices('GPU')
try:
    for gpu in physical_devices:
        tf.config.experimental.set_memory_growth(gpu, True)
except:
    logger.info('error accessing gpu devices...')
    pass

### Collecting information to use in models

In [None]:
# I create a dataframe with class and file paths for all images on train folder
root_train = '../data/raw/vehicle/train/train/'
data_train = []

In [None]:
for category in sorted(os.listdir(root_train)):
    for file in sorted(os.listdir(os.path.join(root_train, category))):
        data_train.append((category, os.path.join(root_train, category, file)))

In [None]:
train_df = pd.DataFrame(data_train, columns=['class','file_path']).sample(frac=1.0)
train_df.head()

In [None]:
print(f'There are {len(train_df)} images on train folder')

In [None]:
# I create a dataframe with file paths for all images on test folder
root_test = '../data/raw/vehicle/test/testset/'
data_test = []

In [None]:
for file in sorted(os.listdir(root_test)):
    data_test.append(file)

In [None]:
test_df = pd.DataFrame(data_test, columns=['file_path'])
test_df.head()

In [None]:
print(f'There are {len(test_df)} images on test folder')

In [None]:
auc = AUC(name='auc')

In [None]:
skf = StratifiedKFold(n_splits = 2)

In [None]:
X = train_df['file_path']
y = train_df['class']

In [None]:
networks=[]

In [None]:
totalTime = time.time()

In [None]:
best_acc = []
best_auc = []

In [None]:
batch_size = 50

In [None]:
place = 0

In [None]:
#%% K-folds to files so that they can be loaded for different networks
i = 0
for train_index, test_index in skf.split(X,y):
    
    trainName= f"../data/processed/data_train_fold{i}"
    valName = f"../data/processed/data_val_fold{i}"
        
    data_train = train_df.iloc[train_index]
    data_test = train_df.iloc[test_index]
    
    data_train.to_pickle(trainName)
    data_test.to_pickle(valName)
        
    i += 1

In [None]:
for i in range(0,2):
    
    print("##################################")
    print(f"FOLD {i}")
    print("##################################")
          
    trainName= f"../data/processed/data_train_fold{i}"
    valName = f"../data/processed/data_val_fold{i}"
    
    print(f"Loading: {trainName}")    
    data_train = pd.read_pickle(trainName)
    
    print(f"Loading: {valName}")    
    data_test = pd.read_pickle(valName)
             
    Teaching_time = time.time()
           
    valdatagen = ImageDataGenerator(rescale=1./255)
    
    traindatagen = ImageDataGenerator(
            rescale = 1./255,
            horizontal_flip = True,
            rotation_range = 25,
            height_shift_range = 0.2,
            width_shift_range = 0.2,
            zoom_range = 0.2,
            shear_range = 0.2,
            brightness_range = (0.9, 1.1)
            )
            
    train_generator = traindatagen.flow_from_dataframe(
            dataframe = data_train,
            x_col = 'file_path',
            y_col = 'class',
            target_size = (224,224),
            batch_size = 50            
            )
    
    val_generator = valdatagen.flow_from_dataframe(
            dataframe = data_test,
            x_col = 'file_path',
            y_col = 'class',
            target_size = (224,224),
            batch_size = 50)

In [None]:
#%% Change parameters as needed, trying to keep things simple

for i in range(0,2):

    K.clear_session()    
    
    print("##################################")
    print(f"FOLD {i}")
    print("##################################")
          
    trainName= f"../data/processed/data_train_fold{i}"
    valName = f"../data/processed/data_val_fold{i}"
    
    print(f"Loading: {trainName}")    
    data_train = pd.read_pickle(trainName)
    
    print(f"Loading: {valName}")    
    data_test = pd.read_pickle(valName)
             
    Teaching_time = time.time()
           
    valdatagen = ImageDataGenerator(rescale=1./255)
    
    traindatagen = ImageDataGenerator(
            rescale = 1./255,
            horizontal_flip = True,
            rotation_range = 25,
            height_shift_range = 0.2,
            width_shift_range = 0.2,
            zoom_range = 0.2,
            shear_range = 0.2,
            brightness_range = (0.9, 1.1)
            )
            
    train_generator = traindatagen.flow_from_dataframe(
            dataframe = data_train,
            x_col = 'file_path',
            y_col = 'class',
            target_size = (224,224),
            batch_size = 50            
            )
    
    val_generator = valdatagen.flow_from_dataframe(
            dataframe = data_test,
            x_col = 'file_path',
            y_col = 'class',
            target_size = (224,224),
            batch_size = 50            
            )
    
    pre_network = EfficientNetB3(include_top = False, 
                             weights = 'imagenet', 
                             input_shape = (224,224,3)) 
    network = models.Sequential()
    network.add(pre_network)
    network.add(layers.GlobalMaxPooling2D())
    network.add(layers.Dropout(rate=0.2))
    network.add(layers.Dense(17, activation = 'softmax',kernel_initializer = 'he_uniform'))
    
    network.compile(loss = 'categorical_crossentropy',
                optimizer = optimizers.RMSprop(lr=1e-5),
                metrics = ['acc', auc])
    
    network.summary()
    
    train_steps = int(np.ceil( len(data_train) / 50))
    
    val_steps = int(np.ceil(len(data_test) / 50))
    
    storage_location = "best_acc.hdf5"
    
    auc_recording = "best_auc.hdf5"
    
    recording = ModelCheckpoint(monitor = 'val_acc',  
                            mode = 'max', 
                            filepath = storage_location, 
                            verbose = 1, 
                            save_weights_only = True, 
                            save_best_only=True)
    
    auc_recording = ModelCheckpoint(monitor = 'val_auc',  
                                mode = 'max', 
                                filepath = auc_recording, 
                                verbose = 1, 
                                save_weights_only = True, 
                                save_best_only=True)
    
    intermediary =  ReduceLROnPlateau(monitor='val_acc', 
                               factor=0.5, 
                               patience=3, 
                               verbose=1)
    
    stop = EarlyStopping(monitor = 'val_acc', 
                     patience = 10, 
                     verbose = 1)
    
    # Easy way to fight the class imbalance using sklearn 
    class_weights_lst = class_weight.compute_class_weight('balanced', 
                                                      np.unique(train_generator.classes), 
                                                      train_generator.classes)
    class_weights = dict(zip(np.unique(train_generator.classes), class_weights_lst))
    
    history = network.fit(train_generator, 
                      steps_per_epoch = train_steps, 
                      epochs = 100, 
                      validation_data = val_generator, 
                      validation_steps = val_steps, 
                      verbose = 1, 
                      callbacks = [recording, intermediary, auc_recording, stop], 
                      workers = 6, 
                      max_queue_size = 64, 
                      class_weight = class_weights
                      )
    
    bestauc = max(history.history['val_auc'])
    
    best_auc.append(bestauc)
    
    auc_name = f"01_EffiNetB3_auc_fold_{i}"
    print(f"saving: {aucname}")
    
    network.load_weights(auc_recording)
    network.save(aucname)
    
    network.load_weights(storage_location)
    
    bestacc = max(history.history['val_acc'])
    best_acc.append(bestacc)
    
    name = f"01_EffiNetB3_acc_foldi_{i}.hdf5"
    print(f"saving: {name}")
    network.save(name)

## Testing some models

### Simple model: Sequential

In [None]:
#simple model
model = Sequential([Conv2D(16, 3, padding='same', activation='relu', input_shape=(224, 224, 3)),
                    MaxPooling2D(),
                    Dropout(0.2),
                    Conv2D(32, 3, padding='same', activation='relu'),
                    MaxPooling2D(),
                    Conv2D(64, 3, padding='same', activation='relu'),
                    MaxPooling2D(),
                    Dropout(0.2),
                    Flatten(),
                    Dense(512, activation='relu'),
                    Dense(17, activation='softmax')])

In [None]:
# Compile the model
optimizer = Adam(lr=0.0001)
model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.summary()

In [None]:
# Class weights
class_weights_lst = class_weight.compute_class_weight('balanced', 
                                                  np.unique(train_generator.classes), 
                                                  train_generator.classes)

In [None]:
class_weights_lst

In [None]:
class_weights = dict(zip(np.unique(train_generator.classes), class_weights_lst))

In [None]:
train_df['class'].value_counts()

In [None]:
train_generator.class_indices

In [None]:
class_weights

In [None]:
# Stop training when a monitored quantity has stopped improving
earlyStopping = EarlyStopping(monitor='val_accuracy', patience=5, verbose=1, min_delta=1e-4)

In [None]:
# if training does not improve after specific epochs, reduce the learning rate value by improving training
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=4, verbose=1, min_delta=1e-4)

In [None]:
# Save the best model
file_path = f'../data/results/{model.name}.h5'
best_model = ModelCheckpoint(file_path, 
                             save_best_only=True, 
                             monitor='val_accuracy',
                             verbose=1,
                             save_weights_only=True)

In [None]:
# Train model
history = model.fit(train_generator,
                    steps_per_epoch=22436 // 32,
                    epochs=100,
                    validation_data=validation_generator,
                    validation_steps=5609 // 32,
                    class_weight=class_weights,
                    callbacks=[earlyStopping, reduce_lr, best_model])

In [None]:
# accuracy & validation accuracy
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

In [None]:
# lossvalues & validation lossvalues
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
# Plot training
epochs_range = range(100)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.grid()
plt.title('Training and Validation Loss')

### ResNet50 Model

In [None]:
base_model = tf.keras.applications.resnet50.ResNet50(include_top=False, 
                                                     weights='imagenet', 
                                                     pooling='max')

In [None]:
model2 = Sequential()
model2.add(base_model)
model2.add(Dense(1024, activation='relu'))
model2.add(Dense(17, activation='softmax'))

In [None]:
# Compile the model
model2.compile(optimizer='adam', 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])
model2.summary()

In [None]:
# Save the best model
file_path = model2.name + '.{epoch:02d}-{loss:.2f}-{accuracy:.2f}.hdf5'
best_model2 = tf.keras.callbacks.ModelCheckpoint(file_path, save_best_only=False, monitor='loss')

In [None]:
# Train model
history2 = model2.fit(train_generator,
                    steps_per_epoch=22443 // 50,
                    epochs=100,
                    validation_data=validation_generator,
                    validation_steps=5602 // 50,
                    class_weight=class_weights,
                    callbacks=[earlyStopping, best_model2])

In [None]:
# accuracy & validation accuracy
acc2 = history2.history2['accuracy']
val_acc2 = history2.history2['val_accuracy']

In [None]:
# lossvalues & validation lossvalues
loss2 = history2.history2['loss']
val_loss2 = history2.history2['val_loss']

In [None]:
# Plot training
epochs_range = range(100)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc2, label='Training Accuracy')
plt.plot(epochs_range, val_acc2, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss2, label='Training Loss')
plt.plot(epochs_range, val_loss2, label='Validation Loss')
plt.legend(loc='upper right')
plt.grid()
plt.title('Training and Validation Loss')

### MobileNet model

In [None]:
basemodel, _ = Classifiers.get('mobilenetv2')

base_model3 = basemodel(input_shape=(224, 224, 3),
                       weights='imagenet',
                       include_top=False)
x = GlobalAveragePooling2D()(base_model3.output)
output = Dense(17, activation='softmax')(x)
model3 = Model(inputs=[base_model3.input], outputs=[output])

In [None]:
# Compile the model
model3.compile(optimizer='adam', 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])
model3.summary()

In [None]:
# Save the best model
file_path = model3.name + '.{epoch:02d}-{loss:.2f}-{accuracy:.2f}.hdf5'
best_model3 = tf.keras.callbacks.ModelCheckpoint(file_path, save_best_only=False, monitor='loss')

In [None]:
# Train model
history3 = model3.fit(train_generator,
                    steps_per_epoch=22443 // 50,
                    epochs=100,
                    validation_data=validation_generator,
                    validation_steps=5602 // 50,
                    class_weight=class_weights,
                    callbacks=[earlyStopping, best_model3])

In [None]:
# accuracy & validation accuracy
acc3 = history3.history3['accuracy']
val_acc3 = history3.history3['val_accuracy']

In [None]:
# lossvalues & validation lossvalues
loss3 = history3.history3['loss']
val_loss3 = history3.history3['val_loss']

In [None]:
# Plot training
epochs_range = range(100)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc3, label='Training Accuracy')
plt.plot(epochs_range, val_acc3, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss3, label='Training Loss')
plt.plot(epochs_range, val_loss3, label='Validation Loss')
plt.legend(loc='upper right')
plt.grid()
plt.title('Training and Validation Loss')

### NasNet_keras model

In [None]:
base_model4 = tf.keras.applications.nasnet.NASNetLarge(include_top =False,
                                                       input_shape=(331, 331, 3),
                                                       weights='imagenet', 
                                                       pooling='max')

In [None]:
model4 = Sequential()
model4.add(base_model4)
model4.add(Dense(127, activation='relu'))
model4.add(Dense(17, activation='softmax'))

In [None]:
# Compile the model
model4.compile(optimizer='adam', 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])
model4.summary()

In [None]:
# Save the best model
file_path = model4.name + '.{epoch:02d}-{loss:.2f}-{accuracy:.2f}.hdf5'
best_model4 = tf.keras.callbacks.ModelCheckpoint(file_path, save_best_only=False, monitor='loss')

In [None]:
# Train model
history4 = model4.fit(train_generator,
                    steps_per_epoch=22443 // 50,
                    epochs=100,
                    validation_data=validation_generator,
                    validation_steps=5602 // 50,
                    class_weight=class_weights,
                    callbacks=[earlyStopping, best_model4])

In [None]:
# accuracy & validation accuracy
acc4 = history4.history4['accuracy']
val_acc4 = history4.history4['val_accuracy']

In [None]:
# lossvalues & validation lossvalues
loss4 = history4.history4['loss']
val_loss4 = history4.history4['val_loss']

In [None]:
# Plot training
epochs_range = range(100)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc4, label='Training Accuracy')
plt.plot(epochs_range, val_acc4, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss4, label='Training Loss')
plt.plot(epochs_range, val_loss4, label='Validation Loss')
plt.legend(loc='upper right')
plt.grid()
plt.title('Training and Validation Loss')

### NasNetLarge model

In [None]:
NASNetLarge, _ = Classifiers.get('nasnetlarge')

In [None]:
base_model5 = NASNetLarge(include_top=False,
                          input_shape=(331, 331, 3), 
                          weights='imagenet')
x = GlobalAveragePooling2D()(base_model5.output)
output = Dense(17, activation='softmax')(x)
model5 = Model(inputs=[base_model5.input], outputs=[output])

In [None]:
# Compile the model
model5.compile(optimizer='adam', 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])
model5.summary()

In [None]:
# Save the best model
file_path = model5.name + '.{epoch:02d}-{loss:.2f}-{accuracy:.2f}.hdf5'
best_model5 = tf.keras.callbacks.ModelCheckpoint(file_path, save_best_only=False, monitor='loss')

In [None]:
# Train model
history5 = model5.fit(train_generator,
                    steps_per_epoch=22443 // 50,
                    epochs=100,
                    validation_data=validation_generator,
                    validation_steps=5602 // 50,
                    class_weight=class_weights,
                    callbacks=[earlyStopping, best_model5])

In [None]:
# accuracy & validation accuracy
acc5 = history5.history5['accuracy']
val_acc5 = history5.history5['val_accuracy']

In [None]:
# lossvalues & validation lossvalues
loss5 = history5.history5['loss']
val_loss5 = history5.history5['val_loss']

In [None]:
# Plot training
epochs_range = range(100)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc5, label='Training Accuracy')
plt.plot(epochs_range, val_acc5, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss5, label='Training Loss')
plt.plot(epochs_range, val_loss5, label='Validation Loss')
plt.legend(loc='upper right')
plt.grid()
plt.title('Training and Validation Loss')

### EfficientNet model

In [None]:
base_model6 = efn.EfficientNetB7(include_top=False, 
                                input_shape=(224, 224, 3),
                                weights='imagenet', 
                                pooling='max')

In [None]:
model6 = Sequential()
model6.add(base_model6)
model6.add(Dense(512, activation='relu'))
model6.add(Dense(17, activation='softmax'))

In [None]:
# Compile the model
model6.compile(optimizer='adam', 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])
model6.summary()

In [None]:
# Save the best model
file_path = model6.name + '.{epoch:02d}-{loss:.2f}-{accuracy:.2f}.hdf5'
best_model6 = tf.keras.callbacks.ModelCheckpoint(file_path, save_best_only=False, monitor='loss')

In [None]:
# Train model
history6 = model6.fit(train_generator,
                    steps_per_epoch=22443 // 50,
                    epochs=100,
                    validation_data=validation_generator,
                    validation_steps=5602 // 50,
                    class_weight=class_weights,
                    callbacks=[earlyStopping, best_model6])

In [None]:
# accuracy & validation accuracy
acc6 = history6.history6['accuracy']
val_acc6 = history6.history6['val_accuracy']

In [None]:
# lossvalues & validation lossvalues
loss6 = history6.history6['loss']
val_loss6 = history6.history6['val_loss']

In [None]:
# Plot training
epochs_range = range(100)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc5, label='Training Accuracy')
plt.plot(epochs_range, val_acc5, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss5, label='Training Loss')
plt.plot(epochs_range, val_loss5, label='Validation Loss')
plt.legend(loc='upper right')
plt.grid()
plt.title('Training and Validation Loss')