# Modèle pre-entrainer VGG16 : classification binaire COVID / SAIN (NORMAL) par extraction de features

In [1]:
# Chargement des modules necessaire :

import os

import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from tensorflow.keras import optimizers
from tensorflow.keras import applications
from tensorflow.keras.models import Model


In [2]:
# On declare les chemins vers les donnees :

trainDir = 'Data/TRAIN'
validationDir = 'Data/TEST'
 
# On declare les dimensions pour les images (224,224) :
img_width, img_height = 224, 224

# On liste et stocke les chemins des images :
ImageTRAINCOVID = os.listdir(trainDir + '/COVID')
ImageTRAINNORMAL = os.listdir(trainDir + '/NORMAL')

ImageTESTCOVID = os.listdir(validationDir + '/COVID')
ImageTESTNORMAL = os.listdir(validationDir + '/NORMAL')

# On affiche le nombre d'image trouve :
print('Il y a ' + str(len(ImageTRAINCOVID)) + ' images d\'entrainement de patient covid.') 
print('Il y a ' + str(len(ImageTRAINNORMAL)) + ' images d\'entrainement de patient non-covid.') 
print('Il y a ' + str(len(ImageTESTCOVID)) + ' images test de patient covid.') 
print('Il y a ' + str(len(ImageTESTNORMAL)) + ' images test de patient non-covid.')


Il y a 460 images d'entrainement de patient covid.
Il y a 1266 images d'entrainement de patient non-covid.
Il y a 116 images test de patient covid.
Il y a 317 images test de patient non-covid.


In [3]:
# Visualisation de 6 exemples en 2 lignes et 3 colonnes pour chaque classe :

    # Chourouk

In [4]:
# Preprocessing :
# On rescale les images :
datagen = ImageDataGenerator(rescale=1./255)

# On definit la batch size :
batch_size = 32

# On prepare les tableaux de donnees depuis les images :
train_generator_bottleneck = datagen.flow_from_directory(
        trainDir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

validation_generator_bottleneck = datagen.flow_from_directory(
        validationDir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)



Found 1726 images belonging to 2 classes.
Found 433 images belonging to 2 classes.


In [5]:
# Chargement de VGG16 sans la partie fully-connected avec le reseau convolutif entrainer sur imagenet :

model_vgg = applications.VGG16(include_top=False, weights='imagenet')

In [6]:
# On utilse le model VGG16 pour extraire les features de nos images 
# (on fait recupere la sortie du reseau convolutionnel) :
train_features = model_vgg.predict_generator(train_generator_bottleneck, 1726 // batch_size)

validation_features = model_vgg.predict_generator(validation_generator_bottleneck, 433 // batch_size)

# L'opération étant longue on enregistre les features obtenus :
np.save(open('models/trainFeatures_VGG16_AllData.npy', 'wb'), train_features) # ecriture en binaire necessaire
np.save(open('models/validationFeatures_VGG16_AllData.npy', 'wb'), validation_features) # Idem

In [7]:
# Si l'operation à deja été effectuer on charge les features :

train_features = np.load(open('models/trainFeatures_VGG16_AllData.npy', 'rb'))

validation_features = np.load(open('models/validationFeatures_VGG16_AllData.npy', 'rb'))

In [8]:
# On definit les labels :

train_labels = np.array([0] * 448 + [1] * 1248)

validation_labels = np.array([0] * 116 + [1] * 300)

# On peut maintenant tester plusieurs couches fully-connected à partir de ce modele :

In [9]:
# Premier modele : 
model_top1 = Sequential()
model_top1.add(Flatten(input_shape=train_features.shape[1:]))
model_top1.add(Dense(64, activation='relu'))
model_top1.add(Dropout(0.5))
model_top1.add(Dense(1, activation='sigmoid'))

# On compile :
model_top1.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# On affiche :
model_top1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 64)                1605696   
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 1,605,761
Trainable params: 1,605,761
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Second modele :
model_top2 = Sequential()
model_top2.add(Flatten(input_shape=train_features.shape[1:]))
model_top2.add(Dense(128, activation='relu'))
model_top2.add(Dropout(0.5))
model_top2.add(Dense(1, activation='sigmoid'))

# On compile :
model_top2.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# On affiche :
model_top2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               3211392   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 3,211,521
Trainable params: 3,211,521
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Troisieme modele :
model_top3 = Sequential()
model_top3.add(Flatten(input_shape=train_features.shape[1:]))
model_top3.add(Dense(256, activation='relu'))
model_top3.add(Dropout(0.5))
model_top3.add(Dense(1, activation='sigmoid'))

# On compile :
model_top3.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# On affiche :
model_top3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               6422784   
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 257       
Total params: 6,423,041
Trainable params: 6,423,041
Non-trainable params: 0
_________________________________________________________________


In [12]:
# On definit les parametres pour l'entrainement :
epochs = 10

In [13]:
# On entraine le premier modèle:

historique1 = model_top1.fit(train_features, train_labels,
        epochs=epochs, 
        batch_size=batch_size,
        validation_data=(validation_features, validation_labels))

Train on 1696 samples, validate on 416 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
# On entraine le deuxième modèle:
historique2 = model_top2.fit(train_features, train_labels,
        epochs=epochs, 
        batch_size=batch_size,
        validation_data=(validation_features, validation_labels))

Train on 1696 samples, validate on 416 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
# On entraine le troisième modèle:
historique3 = model_top3.fit(train_features, train_labels,
        epochs=epochs, 
        batch_size=batch_size,
        validation_data=(validation_features, validation_labels))

Train on 1696 samples, validate on 416 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Visualisation de l'entrainement :


In [18]:
# On evalue :

# Avec les donnes de validation :
eval1 = model_top1.evaluate(validation_features, validation_labels,verbose=2)
eval2 = model_top2.evaluate(validation_features, validation_labels,verbose=2)
eval3 = model_top3.evaluate(validation_features, validation_labels,verbose=2)

# Avec les donnes de test :

    # A ajouter

# On prend quelques exemples aleatoires :

    # on visualise, on test , on affiche :
    
    # => Chourouk

416/1 - 0s - loss: 0.0026 - accuracy: 0.9976
416/1 - 0s - loss: 0.0052 - accuracy: 0.9952
416/1 - 0s - loss: 0.0016 - accuracy: 0.9976
[0.004949446385168699, 0.99759614]
[0.010339963535211338, 0.9951923]
[0.0031635455436091176, 0.99759614]


In [16]:
# On enregistre le model (eventuel) :
model_top1.save_weights('models/vgg16_lightTop_224_224_10G.h5')
model_top2.save_weights('models/vgg16_mediumTop_224_224_10G.h5')
model_top3.save_weights('models/vgg16_heavyTop_224_224_10G.h5')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: FullModels/vgg16_lightTop_224_224_10G/assets
INFO:tensorflow:Assets written to: FullModels/vgg16_mediumTop_224_224_10G/assets
INFO:tensorflow:Assets written to: FullModels/vgg16_heavyTop_224_224_10G/assets
