In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn import metrics
from sklearn import preprocessing
from sklearn.ensemble import ExtraTreesClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import seaborn
import joblib
from utils import load_data

In [2]:
# Avoid tensorflow warnings and info messages about my poor bad CPU
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [3]:
#Path and names

# Loaded dataset folders
training_set_path = "sets/training_set/"
validation_set_path = "sets/validation_set/"
extracted_training_set_path = "sets/extracted_training_set/"
extracted_validation_set_path = "sets/extracted_validation_set/"

# Loaded dataset files
X_training_set_path = training_set_path + "X_training.npy"
y_training_set_path = training_set_path + "y_training.npy"
X_validation_set_path = validation_set_path + "X_validation.npy"
y_validation_set_path = validation_set_path + "y_validation.npy"
X_extracted_training_set_path = extracted_training_set_path + "X_extracted_training.npy"
X_extracted_validation_set_path = extracted_validation_set_path + "X_extracted_validation.npy"

**Deep learning part**

Note: See [here](https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory) for dataset directory structure.

In [None]:
# Model & data parameters
model_name = 'retrain_mobilenet'
retrain_convolution = True

image_size = 192 #in pixels
num_classes = 50
validation_size = 0.2
input_shape = (image_size, image_size, 3)

In [None]:
# Training parameters
optimizer = keras.optimizers.Adam(1e-4) #learning_rate=0.001 (default value)
epochs = 50
batch_size = 16

**Load data from dataset images**

In [None]:
#Load dataset and normalize data to the range [-1, 1]
X, y = load_data((image_size, image_size))
X /= 127.5
X -= 1
# Split Training/Testing and validation test
X, X_validation, y, y_validation = train_test_split(X, y, test_size=validation_size)

**Load data from already saved numpy array**

In [None]:
X = np.load(X_training_set_path)
y = np.load(y_training_set_path)
X_validation = np.load(X_validation_set_path)
y_validation = np.load(y_validation_set_path)

**Save dataset for next time**

In [None]:
if not os.path.exists(training_set_path):
    os.makedirs(training_set_path)
    
if not os.path.exists(validation_set_path):
    os.makedirs(validation_set_path)
    
np.save(X_training_set_path, X)
np.save(y_training_set_path, y)
np.save(X_validation_set_path, X_validation)
np.save(y_validation_set_path, y_validation)

In [None]:
# Call back
#early_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

In [None]:
# Tensorflow data augmentation
datagen = ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2)

datagen.fit(X)

**Select a model**

In [None]:
# MobileNet
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

pretrain = MobileNetV2(weights="imagenet", alpha=0.5, input_shape = input_shape, include_top = False)

In [None]:
if not retrain_convolution:
    for layer in pretrain.layers:
        layer.trainable = False
    pretrain.layers[0].trainable = False
    
pretrain_out = pretrain.output

M = layers.MaxPooling2D()(pretrain_out)
M = layers.Flatten()(M)
M = layers.Dense(num_classes, activation="softmax")(M)

#Compile the model
model = keras.Model(inputs=pretrain.input, outputs=M)
model.summary()

In [None]:
# Training model without tensorflow data augmentation
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy", "top_k_categorical_accuracy"])
history = model.fit(X, y, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.2)

In [None]:
# Training model with tensorflow data augmentation
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy", "top_k_categorical_accuracy"])
history = model.fit(
         datagen.flow(X, y, batch_size=batch_size, subset='training'),
         validation_data=datagen.flow(X, y,batch_size=16, subset='validation'),
         epochs=epochs, 
         verbose=1)

In [None]:
# Validation of the model
scores = model.evaluate(X_validation, y_validation, verbose=1)

In [None]:
# Predict validation set
prediction = np.argmax(model.predict(X_validation), axis=-1)
y_prediction = np.argmax(y_validation, axis=-1)

# Confusion matrix of this validation
cm = metrics.confusion_matrix(y_prediction, prediction)
plt.figure(figsize = (10,7))
seaborn.heatmap(cm, annot=True, linewidths=1)
plt.savefig("dl_confusion_matrix")

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("Retrain " + model_name + " top 1 accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper left")
plt.show()

In [None]:
# Plot training & validation top 5 values
plt.plot(history.history['top_k_categorical_accuracy'])
plt.plot(history.history['val_top_k_categorical_accuracy'])
plt.title("Retrain " + model_name + " top 5 accuracy")
plt.ylabel("Top 5 categorical accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper left")
plt.show()

In [None]:
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Retrain " + model_name + " loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper left")
plt.show()

In [None]:
# Save the model
model_path = './models/' + model_name + '.keras'
model.save(model_path)

**Set features extractor for Extremely Randomized Trees (ET) and Support Vector Machine (SVM)**

In [4]:
# Path and names
model_name = 'retrain_mobilenet'
dl_model_name = model_name

# Layers to remove from our deep learning model to only get convolutif part
layers_to_remove = 2

In [5]:
# Load the deep learning model
model = keras.models.load_model('./models/' + dl_model_name + '.keras')

In [6]:
# Output of the CNN (without the dense part)
feature_extractor = keras.Model(model.input, model.layers[-layers_to_remove].output)

**Extremely Randomized Trees & Support Vector Machine model** <br>
This part is for add a model after our convolution

- 1. We train our model on the same training set
- 2. Test accuracy on the same validation set

In [8]:
# Parameters
et_analyzer_path = "./et_analyzer_model.joblib"
svc_analyzer_path = "./svc_analyzer_model.joblib"
n_estimators = 300

def transform_one_hot_labels(one_hot_labels):
    y = []
    for o in one_hot_labels:
        y.append(np.argmax(o))
    return y

def print_accuracy(predicted_prob, y_validation):
    predicted_classes = []
    for p in predicted_prob:
        predicted_classes.append(np.argmax(p))

    print("Top 1 accuracy: ", metrics.accuracy_score(y_validation, predicted_classes))

**1. Training part**

In [5]:
# Load the training set
X = np.load(X_training_set_path)
y = np.load(y_training_set_path)

# Labels need to be indice of the classes as target value, not one hot encoded
y = transform_one_hot_labels(y)

**Extract training features**

In [10]:
# Extract features
X = feature_extractor.predict(X)

**Load extracted training features**

In [6]:
X = np.load(X_extracted_training_set_path)

**Save extracted training features**

In [11]:
if not os.path.exists(extracted_training_set_path):
    os.makedirs(extracted_training_set_path)
    
np.save(X_extracted_training_set_path, X)

**Extremely Randomized Trees**

In [None]:
# Init Extra trees mdoel
et_model = ExtraTreesClassifier(n_estimators = n_estimators)

In [None]:
# Train the model
et_model.fit(X, y)

In [None]:
# Save the extra trees model
joblib.dump(et_model, et_analyzer_path)

**Support Vector Machines**

In [12]:
# Init the SVM model
svc_model = SVC(probability=True)

In [None]:
# Train the model
svc_model.fit(X, y)

In [None]:
# Save the random forest model
joblib.dump(svc_model, svc_analyzer_path)

**2. Validation part**

In [5]:
# Load the validation set
X_validation = np.load(X_validation_set_path)
y_validation = np.load(y_validation_set_path)

# Labels need to be indice of the classes as target value, not one hot encoded
y_validation = transform_one_hot_labels(y_validation)

**Load a model**

In [None]:
# Extra trees model
model = joblib.load(et_analyzer_path)

In [9]:
# Svc model
model = joblib.load(svc_analyzer_path)

**Extract validation features**

In [None]:
# Extract features
X_validation_features = feature_extractor.predict(X_validation)

**Load extracted validation features**

In [10]:
X_validation_features = np.load(X_extracted_validation_set_path)

**Save extracted validation features**

In [11]:
if not os.path.exists(extracted_validation_set_path):
    os.makedirs(extracted_validation_set_path)

np.save(X_extracted_validation_set_path, X_validation_features)

In [11]:
# Test on validation test
predicted_prob = model.predict_proba(X_validation_features)

In [None]:
# Accuracy
print_accuracy(predicted_prob, y_validation)

**Combine Extra Trees and Support Vector Classifier prediction**

In [13]:
X_validation_features = np.load(X_extracted_validation_set_path)
y_validation = np.load(y_validation_set_path)

# Labels need to be indice of the classes as target value, not one hot encoded
y_validation = transform_one_hot_labels(y_validation)

In [None]:
# Extra trees model & prediction
et_model = joblib.load(et_analyzer_path)
et_predicted_prob = et_model.predict_proba(X_validation_features)

# Svc model & prediction
svc_model = joblib.load(svc_analyzer_path)
svc_predicted_prob = svc_model.predict_proba(X_validation_features)

In [None]:
predicted_prob = (et_predicted_prob + svc_predicted_prob) / 2

In [None]:
# Accuracy
print_accuracy(predicted_prob, y_validation)