<a href="https://colab.research.google.com/github/ChtibaH4R/scikit-learn/blob/main/fireDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [4]:
import numpy as np
import pandas as pd
import os
import pickle
import urllib.request
import time
import keras
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from tensorflow.keras.layers import (Dense, Dropout, LayerNormalization, Rescaling,
                                     Conv2D, MaxPooling2D, Flatten, RandomFlip,
                                     RandomRotation, RandomZoom)
from tensorflow.keras.models import Model

# Définition des variables

In [5]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = 32
PATCH_SIZE = 7
NUM_LAYERS = 8
NUM_HEADS = 16
MLP_DIM = 128
lr = 1e-3
WEIGHT_DECAY = 1e-4
BATCH_SIZE = 64
epochs = 2
num_classes = 2

# Définition des classes pour le modèle Vision Transformer

In [6]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, feedforward_dim, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.multiheadselfattention = MultiHeadAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential([
            Dense(feedforward_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

    def call(self, inputs, training):
        out1 = self.layernorm1(inputs)
        attention_output = self.multiheadselfattention(out1)
        attention_output = self.dropout1(attention_output, training=training)
        out2 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out2)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out2 + ffn_output)

class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.embed_dim = embed_dim
        assert self.embed_dim % self.num_heads == 0
        self.projection_dim = self.embed_dim // self.num_heads
        self.query_dense = Dense(self.embed_dim)
        self.key_dense = Dense(self.embed_dim)
        self.value_dense = Dense(self.embed_dim)
        self.combine_heads = Dense(self.embed_dim)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        attention_output = tf.matmul(query, key, transpose_b=True) / tf.sqrt(float(self.embed_dim))
        attention_output = tf.nn.softmax(attention_output, axis=-1)
        output = tf.matmul(attention_output, value)
        return self.combine_heads(output)

class VisionTransformer(Model):
    def __init__(self, image_size, patch_size, num_layers, num_classes, d_model, num_heads, mlp_dim, channels=3, dropout=0.1):
        super(VisionTransformer, self).__init__()
        self.patch_size = patch_size
        self.num_layers = num_layers
        self.d_model = d_model
        self.rescale = Rescaling(1./255)
        self.patch_proj = Dense(d_model)
        self.enc_layers = [
            TransformerBlock(d_model, num_heads, mlp_dim, dropout) for _ in range(num_layers)
        ]
        self.mlp_head = tf.keras.Sequential([
            Dense(mlp_dim), Dropout(dropout), Dense(num_classes)
        ])

    def extract_patches(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        return tf.reshape(patches, [batch_size, -1, self.patch_size ** 2 * 3])

    def call(self, x, training):
        x = self.rescale(x)
        patches = self.extract_patches(x)
        x = self.patch_proj(patches)
        for layer in self.enc_layers:
            x = layer(x, training=training)
        return self.mlp_head(x[:, 0])


# Définition d'un modèle pré-entraîné Vision Transformer (ViT) de Google

In [7]:
# Télécharger les poids du modèle ViT
def download_vit_weights():
    url = 'https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_16.npz'
    file_path = 'ViT-B_16.npz'

    if not os.path.exists(file_path):
        print(f"Downloading ViT weights from {url}...")
        urllib.request.urlretrieve(url, file_path)
        print("Download complete!")
    else:
        print("ViT weights already downloaded.")

    return file_path

# Charger les poids téléchargés
def load_vit_weights(model, weights):
    """Charge les poids pré-entraînés dans le modèle ViT"""
    layer_names = [layer.name for layer in model.layers]
    for key in weights.files:
        if key.startswith('embedding') or key.startswith('transformer'):
            # Sélectionner les bonnes couches et appliquer les poids
            layer_name = key.split('/')[0]
            if layer_name in layer_names:
                layer = model.get_layer(layer_name)
                layer.set_weights(weights[key])
    return model

# Définir le modèle ViT
class VisionTransformerGoogle(tf.keras.Model):
    def __init__(self, num_classes, image_size=32, patch_size=16, embed_dim=768, num_heads=12, num_layers=12, dropout=0.1):
        super(VisionTransformerGoogle, self).__init__()

        self.patch_size = patch_size
        self.num_layers = num_layers
        self.d_model = embed_dim
        self.num_classes = num_classes

        # Définition de la projection des patches
        self.rescale = tf.keras.layers.Rescaling(1./255)
        self.patch_proj = tf.keras.layers.Dense(embed_dim)

        # Transformer layers
        self.enc_layers = [
            TransformerBlock(embed_dim, num_heads, embed_dim * 4, dropout) for _ in range(num_layers)
        ]

        # MLP Head
        self.mlp_head = tf.keras.Sequential([
            tf.keras.layers.Dense(embed_dim), tf.keras.layers.Dropout(dropout), tf.keras.layers.Dense(num_classes)
        ])

    def extract_patches(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        return tf.reshape(patches, [batch_size, -1, self.patch_size ** 2 * 3])

    def call(self, x, training=False):
        x = self.rescale(x)
        patches = self.extract_patches(x)
        x = self.patch_proj(patches)
        for layer in self.enc_layers:
            x = layer(x, training=training)
        return self.mlp_head(x[:, 0])

# Charger les poids
def download_and_load_weights(model):
    weights_path = download_vit_weights()
    weights = np.load(weights_path, allow_pickle=True)
    model = load_vit_weights(model, weights)
    return model

# Définition des classes pour le modèle CNN

In [8]:
class CNNModel(Model):
    def __init__(self, num_classes):
        super().__init__()
        self.model = tf.keras.Sequential([
            Rescaling(1./255),
            Conv2D(32, (3,3), activation='relu', padding='same'),
            MaxPooling2D(),
            Conv2D(64, (3,3), activation='relu', padding='same'),
            MaxPooling2D(),
            Flatten(),
            Dense(128, activation='relu'),
            Dense(num_classes)
        ])

    def call(self, x):
        return self.model(x)

# Charger le dataset depuis mon Drive

In [9]:

training_path = "/content/drive/MyDrive/data/Training/Training"
test_path = "/content/drive/MyDrive/data/Test"

ds_train = tf.keras.preprocessing.image_dataset_from_directory(
    training_path,
    seed=123,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

ds_test = tf.keras.preprocessing.image_dataset_from_directory(
    test_path,
    seed=123,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

Found 39385 files belonging to 2 classes.
Found 8617 files belonging to 2 classes.


# Augmentation des données

In [10]:
data_augmentation = tf.keras.Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.1),
    RandomZoom(0.1)
])

ds_train = ds_train.map(lambda x, y: (data_augmentation(x, training=True), y)).prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

# Initialisation des modèles

In [11]:
# Initialisation du modèle ViT
vit_model = VisionTransformer(
    image_size=IMAGE_SIZE,
    patch_size=PATCH_SIZE,
    num_layers=NUM_LAYERS,
    num_classes=num_classes,
    d_model=64,
    num_heads=NUM_HEADS,
    mlp_dim=MLP_DIM,
    channels=3,
    dropout=0.1
)
vit_model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=WEIGHT_DECAY),
    metrics=["accuracy"],
)

# Initialisation du modèle CNN
cnn_model = CNNModel(num_classes)
cnn_model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    metrics=['accuracy']
)

# Initialisation du modèle Google ViT
vit_google_model = VisionTransformerGoogle(num_classes=num_classes)
vit_google_model = download_and_load_weights(vit_google_model)
vit_google_model.compile(optimizer=tf.keras.optimizers.Adam(),
                         loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                         metrics=['accuracy'])

Downloading ViT weights from https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_16.npz...
Download complete!


# Entraînement des modèles

In [None]:
histories = {}
models_path = "/content/drive/MyDrive/models"
# Entraînement des modèles et suivi de l'historique
for model_name, model in zip(['ViT', 'GoogleViT', 'CNN' ], [vit_model, vit_google_model, cnn_model]):
    start = time.time()
    print(f"Training {model_name}...")
    history = model.fit(ds_train, validation_data=ds_test, epochs=epochs)
    model.save(f"{models_path}/{model_name}.keras")
    histories[model_name] = history
    end = time.time()
    print(f"{model_name} trained in {end - start} seconds")
    f = open(f"histories_{model_name}.pkl","wb")
    pickle.dump(histories,f)
    f.close()

Training ViT...
Epoch 1/2
[1m101/616[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1:33:10[0m 11s/step - accuracy: 0.5758 - loss: 0.8421

Training CNN...
Epoch 1/5
616/616 ━━━━━━━━━━━━━━━━━━━━ 8538s 14s/step - accuracy: 0.9196 - loss: 0.2096 - val_accuracy: 0.5903 - val_loss: 1.5432
Epoch 2/5
616/616 ━━━━━━━━━━━━━━━━━━━━ 236s 373ms/step - accuracy: 0.9682 - loss: 0.0937 - val_accuracy: 0.5768 - val_loss: 1.3102
Epoch 3/5
616/616 ━━━━━━━━━━━━━━━━━━━━ 249s 352ms/step - accuracy: 0.9806 - loss: 0.0560 - val_accuracy: 0.5751 - val_loss: 1.6303
Epoch 4/5
616/616 ━━━━━━━━━━━━━━━━━━━━ 213s 346ms/step - accuracy: 0.9843 - loss: 0.0442 - val_accuracy: 0.5646 - val_loss: 1.8229
Epoch 5/5
616/616 ━━━━━━━━━━━━━━━━━━━━ 264s 350ms/step - accuracy: 0.9865 - loss: 0.0406 - val_accuracy: 0.5737 - val_loss: 2.1958
CNN trained in 9501.716970205307 seconds

Training ViT...
Epoch 1/5
167/616 ━━━━━━━━━━━━━━━━━━━━ 2:23 319ms/step - accuracy: 0.6261 - loss: 0.6861

# On Test data
Training Google ViT...
108/108 ━━━━━━━━━━━━━━━━━━━━ 2176s 19s/step - accuracy: 0.5029 - loss: 3.2528 - val_accuracy: 0.6123 - val_loss: 0.6729
Google ViT trained in 2186.0178921222687 seconds

Training CNN...
108/108 ━━━━━━━━━━━━━━━━━━━━ 40s 337ms/step - accuracy: 0.6094 - loss: 0.6609 - val_accuracy: 0.8299 - val_loss: 0.4377
CNN trained in 42.477810859680176 seconds

Training ViT...
108/108 ━━━━━━━━━━━━━━━━━━━━ 98s 435ms/step - accuracy: 0.5381 - loss: 0.7901 - val_accuracy: 0.6135 - val_loss: 0.6815
ViT trained in 98.84237217903137 seconds

# Affichage des courbes de performance

In [None]:
plt.figure(figsize=(12, 6))
for model_name, history in histories.items():
    plt.plot(history.history['accuracy'], label=f'{model_name} - Train')
    plt.plot(history.history['val_accuracy'], label=f'{model_name} - Val')

plt.legend()
plt.title('Comparison of Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

for model_name, history in histories.items():
    plt.plot(history.history['loss'], label=f'{model_name} - Train')
    plt.plot(history.history['val_loss'], label=f'{model_name} - Val')

plt.legend()
plt.title('Comparison of Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()