# Import Clasification model

Import libraries

In [1]:
import tensorflow as tf
import pandas as pd, numpy as np, matplotlib.pyplot as plt, os, json

2023-10-26 09:29:25.781657: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-26 09:29:25.781715: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-26 09:29:25.781776: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-26 09:29:25.790248: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Import "classification_model.keras" from root

In [2]:
data_dir = os.path.join(os.getcwd(), "./photo/Photo")
model_dir = os.path.join(os.getcwd(), "models")
classifier_path = os.path.join(model_dir, "classifier.keras")
denoiser_path = os.path.join(model_dir, "denoiser.h5")

In [3]:
classifier = tf.keras.models.load_model(classifier_path)
denoiser = tf.keras.models.load_model(denoiser_path)

2023-10-26 09:29:28.344809: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-26 09:29:28.349133: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-26 09:29:28.349172: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-26 09:29:28.349686: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-26 09:29:28.349717: I tensorflow/compile



# Load data

In [4]:
def load_image(image_path):
    """
    La fonction load_image a pour entrée le chemin d'une image et pour sortie un couple
    contenant l'image traitée ainsi que son chemin d'accès.
    La fonction load_image effectue les traitement suivant:
        1. Chargement du fichier correspondant au chemin d'accès image_path
        2. Décodage de l'image en RGB.
        3. Redimensionnement de l'image en taille (299, 299).
        4. Normalisation des pîxels de l'image entre -1 et 1
    """
    # Lecture du fichier image
    img = tf.io.read_file(image_path)
    # Decodage de l'image en RGB
    img = tf.image.decode_jpeg(img, channels=3)
    # Redimensionnement de l'image en taille (299, 299)
    img = tf.image.resize(img, (299, 299))
    # Normalisation des pîxels de l'image entre -1 et 1
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

In [5]:
embedding_dim = 256
units = 512 # Taille de la couche cachée dans le RNN
top_k = 5000 # Nombre de mots à utiliser dans le vocabulaire
vocab_size = top_k + 1
max_length = 47 # Nombre maximum de mots dans une légende
attention_features_shape = 64 # Taille de la couche d'attention

In [6]:
class CNN_Encoder(tf.keras.Model):
    # Comme les images sont déjà prétraités par InceptionV3 est représenté sous forme compacte
    # L'encodeur CNN ne fera que transmettre ces caractéristiques à une couche dense
    def __init__(self, embedding_dim):
        super(CNN_Encoder, self).__init__()
        # forme après fc == (batch_size, 64, embedding_dim)
        self.fc = tf.keras.layers.Dense(embedding_dim)

    def call(self, x):
        x = self.fc(x)
        x = tf.nn.relu(x)
        return x

In [7]:
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, features, hidden):
        # features(CNN_encoder output) forme == (batch_size, 64, embedding_dim)

        # forme de la couche cachée == (batch_size, hidden_size)
        hidden_with_time_axis = tf.expand_dims(hidden, 1)

        attention_hidden_layer = (tf.nn.tanh(self.W1(features) +
                                             self.W2(hidden_with_time_axis)))

        # Cela vous donne un score non normalisé pour chaque caractéristique de l'image.
        score = self.V(attention_hidden_layer)

        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights


class RNN_Decoder(tf.keras.Model):
    def __init__(self, embedding_dim, units, vocab_size):
        super(RNN_Decoder, self).__init__()
        self.units = units

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        #Couche dense qui aura pour entrée la sortie du GRU
        self.fc1 = tf.keras.layers.Dense(self.units)
        # Dernière couche dense
        self.fc2 = tf.keras.layers.Dense(vocab_size)

        self.attention = BahdanauAttention(self.units)

    def call(self, x, features, hidden):
        # L'attention est defini par un modèle a part
        context_vector, attention_weights = self.attention(features, hidden)
        # Passage du mot courant à la couche embedding
        x = self.embedding(x)
        # Concaténation
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # Passage du vecteur concaténé à la gru
        output, state = self.gru(x)

        # Couche dense
        y = self.fc1(output)

        y = tf.reshape(y, (-1, x.shape[2]))

        # Couche dense
        y = self.fc2(y)

        return y, state, attention_weights

    def reset_state(self, batch_size):
        return tf.zeros((batch_size, self.units))

In [8]:
# Création de l'encodeur
encoder = CNN_Encoder(embedding_dim)
# Création du décodeur
decoder = RNN_Decoder(embedding_dim, units, vocab_size)

In [9]:
checkpoint_path = os.path.abspath("./models/cp/")  # Modifiez ce chemin si nécessaire.
ckpt = tf.train.Checkpoint(encoder=encoder,
                           decoder=decoder,
                           optimizer=tf.keras.optimizers.Adam())  # Assurez-vous d'utiliser les mêmes paramètres d'optimiseur que l'original.
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

In [10]:
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Modèle de captioning chargé !")

Modèle de captioning chargé !


In [11]:
def load_image(image_path):
    """
    La fonction load_image a pour entrée le chemin d'une image et pour sortie un couple
    contenant l'image traitée ainsi que son chemin d'accès.
    La fonction load_image effectue les traitement suivant:
        1. Chargement du fichier correspondant au chemin d'accès image_path
        2. Décodage de l'image en RGB.
        3. Redimensionnement de l'image en taille (299, 299).
        4. Normalisation des pîxels de l'image entre -1 et 1
    """
    # Lecture du fichier image
    img = tf.io.read_file(image_path)
    # Decodage de l'image en RGB
    img = tf.image.decode_jpeg(img, channels=3)
    # Redimensionnement de l'image en taille (299, 299)
    img = tf.image.resize(img, (299, 299))
    # Normalisation des pîxels de l'image entre -1 et 1
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

In [12]:
image_model = tf.keras.applications.InceptionV3(include_top=False,
                                                weights='imagenet')
# Creation d'une variable qui sera l'entrée du nouveau modèle de pre-traitement d'images
new_input = image_model.input
# récupérer la dernière couche caché qui contient l'image en representation compacte
hidden_layer = image_model.layers[-1].output

# Modèle qui calcule une representation dense des images avec InceptionV3
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)

In [13]:
def evaluate(image):
    attention_plot = np.zeros((max_length, attention_features_shape))

    hidden = decoder.reset_state(batch_size=1)

    temp_input = tf.expand_dims(load_image(image)[0], 0)
    img_tensor_val = image_features_extract_model(temp_input)
    img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))

    features = encoder(img_tensor_val)

    dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
    result = []

    for i in range(max_length):
        predictions, hidden, attention_weights = decoder(dec_input, features, hidden)

        attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()

        predicted_id = tf.random.categorical(predictions, 1)[0][0].numpy()
        result.append(tokenizer.index_word[predicted_id])

        if tokenizer.index_word[predicted_id] == '<end>':
            return result, attention_plot

        dec_input = tf.expand_dims([predicted_id], 0)

    attention_plot = attention_plot[:len(result), :]
    return result, attention_plot


In [14]:
from tensorflow.keras.preprocessing.text import tokenizer_from_json

# Load the JSON string from the file
with open('tokenizer.json', 'r', encoding='utf-8') as f:
    tokenizer_json = json.load(f)

# Recreate the tokenizer from the JSON string
tokenizer = tokenizer_from_json(tokenizer_json)


In [15]:
# Fonction permettant la représentation de l'attention au niveau de l'image
def plot_attention(image, result, attention_plot):
    temp_image = np.array(Image.open(image))

    fig = plt.figure(figsize=(10, 10))

    len_result = len(result)
    for l in range(len_result):
        temp_att = np.resize(attention_plot[l], (8, 8))
        ax = fig.add_subplot(len_result//2, len_result//2, l+1)
        ax.set_title(result[l])
        img = ax.imshow(temp_image)
        ax.imshow(temp_att, cmap='gray', alpha=0.6, extent=img.get_extent())

    plt.tight_layout()
    plt.show()

In [53]:
# Predict on first model, if it is a photo, pass on second model to denoise image, and then pass on third model to get caption
# list of new images & captions
list_of_images = []
list_of_captions = []
list_of_attention = []

image_file = [os.path.join(data_dir, filename) for filename in os.listdir(data_dir) if filename.endswith('.jpg')]

for img in image_file:
    image_path = os.path.join(data_dir, img)
    # if image is photo, go onto second model
    tf_img = tf.keras.preprocessing.image.load_img(image_path, target_size=(128, 128))
    pred = classifier.predict(tf.expand_dims(tf_img, 0))
    score = tf.nn.softmax(pred[0])
    if score[0] > 0.5:
        # denoise image
        image = denoiser.predict(tf.expand_dims(tf.keras.preprocessing.image.load_img(image_path, target_size=(256, 256)), 0))
        # numpy array to img
        ### DJAYAN ICI STP
        ### En gros il faut renvoyer une IMAGE à la fonction evaluate
        #image = tf.keras.preprocessing.image.array_to_img(image)
        # get caption
        result, attention_plot = evaluate(img)
        # remove <start> and <end> from caption
        result = result[1:-1]
        # convert caption from list to string
        caption = ' '.join(result)
        # append image, caption, and attention to list
        list_of_images.append(img)
        list_of_captions.append(caption)
        list_of_attention.append(attention_plot)



KeyboardInterrupt: 