# Modèle pre-entrainer XCEPTION : classification binaire COVID / SAIN (NORMAL) par extraction de features

In [7]:
# Chargement des modules necessaire :

import os

import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from tensorflow.keras import optimizers
from tensorflow.keras import applications
from tensorflow.keras.models import Model

In [11]:
# On declare les chemins vers les donnees :

trainDir = 'Data/TRAIN'
validationDir = 'Data/TEST'
        
# On declare les dimensions pour les images (224,224) :

img_width, img_height = 224, 224

# On liste et stocke les chemins des images :

ImageTRAINCOVID = os.listdir(trainDir + '/COVID')
ImageTRAINNORMAL = os.listdir(trainDir + '/NORMAL')

ImageTESTCOVID = os.listdir(validationDir + '/COVID')
ImageTESTNORMAL = os.listdir(validationDir + '/NORMAL')

# On affiche le nombre d'image trouve :

print('Il y a ' + str(len(ImageTRAINCOVID)) + ' images d\'entrainement de patient covid.') 
print('Il y a ' + str(len(ImageTRAINNORMAL)) + ' images d\'entrainement de patient non-covid.') 
print('Il y a ' + str(len(ImageTESTCOVID)) + ' images test de patient covid.') 
print('Il y a ' + str(len(ImageTESTNORMAL)) + ' images test de patient non-covid.') 

Il y a 401 images d'entrainement de patient covid.
Il y a 401 images d'entrainement de patient non-covid.
Il y a 102 images test de patient covid.
Il y a 102 images test de patient non-covid.


In [12]:
# Visualisation de 6 exemples en 2 lignes et 3 colonnes pour chaque classe :




In [14]:
# Preprocessing :

# On rescale les images :
datagen = ImageDataGenerator(rescale=1./255)

# On definit la batch size :
batch_size = 32

# On prepare les tableaux de donnees depuis les images :

train_generator_bottleneck = datagen.flow_from_directory(
        trainDir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

validation_generator_bottleneck = datagen.flow_from_directory(
        validationDir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)


Found 802 images belonging to 2 classes.
Found 204 images belonging to 2 classes.


In [16]:
# Chargement de XCEPTION sans la partie fully-connected avec le reseau convolutif :

model_XCEPTION = applications.Xception(include_top=False, weights='imagenet')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [17]:
# On utilse le model XCEPTION pour extraire les features de nos images 
# (on fait recupere la sortie du reseau convolutionnel) :

train_features = model_XCEPTION.predict_generator(train_generator_bottleneck, 802 // batch_size)

np.save(open('models/trainFeatures.npy', 'wb'), train_features)

validation_features = model_XCEPTION.predict_generator(validation_generator_bottleneck, 204 // batch_size)

# L'opération étant longue on enregistre les features obtenus :

np.save(open('models/validationFeatures.npy', 'wb'), validation_features)

In [None]:
# Si l'operation à deja été effectuer on charge les features :

train_features = np.load(open('models/trainFeatures.npy', 'rb'))

validation_features = np.load(open('models/validationFeatures.npy', 'rb'))

In [34]:
# On definit les labels :

train_labels = np.array([0] * 400 + [1] * 400)

validation_labels = np.array([0] * 96 + [1] * 96)

# On peut maintenant tester plusieurs couche fully-connected à partir de ce modele :

In [19]:
# Premier modele : 

model_top1 = Sequential()
model_top1.add(Flatten(input_shape=train_features.shape[1:]))
model_top1.add(Dense(64, activation='relu'))
model_top1.add(Dropout(0.5))
model_top1.add(Dense(1, activation='sigmoid'))


# On compile :

model_top1.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# On affiche :

model_top1.summary()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
dense (Dense)                (None, 64)                6422592   
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 6,422,657
Trainable params: 6,422,657
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Second modele :

model_top2 = Sequential()
model_top2.add(Flatten(input_shape=train_features.shape[1:]))
model_top2.add(Dense(128, activation='relu'))
model_top2.add(Dropout(0.5))
model_top2.add(Dense(1, activation='sigmoid'))


# On compile :

model_top2.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# On affiche :

model_top2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               12845184  
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 12,845,313
Trainable params: 12,845,313
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Troisieme modele :

model_top3 = Sequential()
model_top3.add(Flatten(input_shape=train_features.shape[1:]))
model_top3.add(Dense(256, activation='relu'))
model_top3.add(Dropout(0.5))
model_top3.add(Dense(1, activation='sigmoid'))


# On compile :

model_top3.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# On affiche :

model_top3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               25690368  
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 257       
Total params: 25,690,625
Trainable params: 25,690,625
Non-trainable params: 0
_________________________________________________________________


In [35]:
# On definit les parametres pour l'entrainement :

epochs = 10
train_samples = 802
validation_samples = 204

In [36]:
# On entraine le modèle 1:

hystory1 = model_top1.fit(train_features, train_labels,
        epochs=epochs, 
        batch_size=batch_size,
        validation_data=(validation_features, validation_labels))

Train on 800 samples, validate on 192 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
# On entraine le modèle 2:

hystory2 = model_top2.fit(train_features, train_labels,
        epochs=epochs, 
        batch_size=batch_size,
        validation_data=(validation_features, validation_labels))

Train on 800 samples, validate on 192 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [38]:
# On entraine le modèle 3:

hystory3 = model_top3.fit(train_features, train_labels,
        epochs=epochs, 
        batch_size=batch_size,
        validation_data=(validation_features, validation_labels))

Train on 800 samples, validate on 192 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Visualisation de l'entrainement :



In [39]:
# On evalue le modèle 1:

model_top1.evaluate(validation_features, validation_labels,verbose=2)

# Avec les donnes de validation :

# Avec les donnes de test :

# On prend quelques exemples aleatoires :

    # on visualise, on test , on affiche :

192/192 - 0s - loss: 2.3446 - acc: 0.9688


[2.344556995813266, 0.96875]

In [40]:
# On evalue le modèle 2:

model_top2.evaluate(validation_features, validation_labels,verbose=2)

192/192 - 0s - loss: 4.1675 - acc: 0.9635


[4.167477752688397, 0.9635417]

In [41]:
# On evalue le modèle 3:

model_top3.evaluate(validation_features, validation_labels,verbose=2)

192/192 - 0s - loss: 6.8676 - acc: 0.9479


[6.867551585038503, 0.9479167]

In [None]:
# Data augmentation :

# On reproduit le processus puis on compare :