<a href="https://colab.research.google.com/github/Oleonn/DataMining/blob/main/INat_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Setup (should be executed on startup)

##Setup and connection to Google Drive

In [1]:
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


##Directories

In [3]:
data_dir = "/content/drive/MyDrive/Projet_mellifere/Modeles/INat_resized-cropped" #where to find the training, validation and test data
train_path = os.path.join(data_dir, "training_data")
valid_path = os.path.join(data_dir, "validation_data")
test_path = os.path.join(data_dir, "test_data")
sp_classes = ["Asclepias_syriaca", "Daucus_carota", "Eutrochium_maculatum", "Leucanthemum_vulgare", "Solidago_canadensis"] #list of classes

#ResNet50V2 CNN

##All base model's layers are frozen, and only the new layers are trainable

In [8]:
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
base_model = ResNet50V2(weights="imagenet", include_top=False, input_shape = (128, 128, 3)) #That is, 128x123 pixels over 3 channels (RGB)

#Freezing the base_model'S layers
for layer in base_model.layers:
  layer.trainable = False

#Adding custom layers on top
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(64, activation = "relu"))
model.add(Dense(len(sp_classes), activation="softmax")) #The model finishes with a number of outputs equal to the number of classes in sp_classes

#Compiling the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Preparing training, validation and test data

In [None]:
train_batch_size = 32
valid_batch_size = 32
test_batch_size = 32

#Training data
print("For training data...")
train_datagen = ImageDataGenerator(
    rescale=1./255, # Rescale pixel values between 0 and 1
    horizontal_flip=True, # Randomly flip images horizontally
    vertical_flip=True, #Randomly flip images vertically
)
train_generator = train_datagen.flow_from_directory(
    train_path,
    batch_size=train_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Validation data
print("For validation data...")
valid_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for validation data
valid_generator = valid_datagen.flow_from_directory(
    valid_path,
    batch_size=valid_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Test data
print("For test data...")
test_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for test data
test_generator = test_datagen.flow_from_directory(
    test_path,
    batch_size=test_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

In [None]:
model_path = os.path.join(data_dir, "ResNet50V2_base-model-frzn.keras")
checkpoint = ModelCheckpoint(model_path, monitor = "val_accuracy", verbose=2, save_best_only=True, mode="max") #The model will be saved every time an epoch shows an improvement in accuracy
early_stopping = EarlyStopping(monitor="val_accuracy", patience=4, verbose=2, mode="max")

#Fit the model to the training data
model.fit(
    train_generator,
    epochs=60,
    validation_data = valid_generator,
    callbacks=[checkpoint, early_stopping]
)

Don't forget to copy the previous training log to monitor the evolution of the accuracy across all epochs

In [None]:
#Evaluate the model's performance
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"The tested accuracy is {test_accuracy} and the tested loss is {test_loss}")

##Only the base model's first 2 layers are frozen, and the rest is trainable

In [10]:
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
base_model = ResNet50V2(weights="imagenet", include_top=False, input_shape = (128, 128, 3)) #That is, 128x123 pixels over 3 channels (RGB)

#Freezing the first 2 layers
for layer in base_model.layers[:2]:
  layer.trainable = False

#Adding custom layers on top
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(64, activation = "relu"))
model.add(Dense(len(sp_classes), activation="softmax")) #The model finishes with a number of outputs equal to the number of classes in sp_classes

#Compiling the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Preparing training, validation and test data

In [None]:
train_batch_size = 32
valid_batch_size = 32
test_batch_size = 32

#Training data
print("For training data...")
train_datagen = ImageDataGenerator(
    rescale=1./255, # Rescale pixel values between 0 and 1
    horizontal_flip=True, # Randomly flip images horizontally
    vertical_flip=True, #Randomly flip images vertically
)
train_generator = train_datagen.flow_from_directory(
    train_path,
    batch_size=train_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Validation data
print("For validation data...")
valid_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for validation data
valid_generator = valid_datagen.flow_from_directory(
    valid_path,
    batch_size=valid_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Test data
print("For test data...")
test_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for test data
test_generator = test_datagen.flow_from_directory(
    test_path,
    batch_size=test_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

For training data...
Found 6000 images belonging to 5 classes.
For validation data...
Found 2000 images belonging to 5 classes.
For test data...
Found 2000 images belonging to 5 classes.


In [None]:
model_path = os.path.join(data_dir, "ResNet50V2_2-bottom-layers-frzn.keras")
checkpoint = ModelCheckpoint(model_path, monitor = "val_accuracy", verbose=2, save_best_only=True, mode="max") #The model will be saved every time an epoch shows an improvement in accuracy
early_stopping = EarlyStopping(monitor="val_accuracy", patience=4, verbose=2, mode="max")

#Fit the model to the training data
model.fit(
    train_generator,
    epochs=60,
    validation_data = valid_generator,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/60
Epoch 1: val_accuracy improved from -inf to 0.31500, saving model to best_model.keras
Epoch 2/60
Epoch 2: val_accuracy did not improve from 0.31500
Epoch 3/60
Epoch 3: val_accuracy improved from 0.31500 to 0.32800, saving model to best_model.keras
Epoch 4/60
Epoch 4: val_accuracy improved from 0.32800 to 0.36250, saving model to best_model.keras
Epoch 5/60

Don't forget to copy the previous training log to monitor the evolution of the accuracy across all epochs

In [None]:
#Evaluate the model's performance
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"The tested accuracy is {test_accuracy} and the tested loss is {test_loss}")

#ResNet101 CNN

##All base model's layers are frozen, and only the new layers are trainable

In [None]:
from tensorflow.keras.applications import ResNet101V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
base_model = ResNet101V2(weights="imagenet", include_top=False, input_shape = (128, 128, 3)) #That is, 128x123 pixels over 3 channels (RGB)

#Freezing the base_model'S layers
for layer in base_model.layers:
  layer.trainable = False

#Adding custom layers on top
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(64, activation = "relu"))
model.add(Dense(len(sp_classes), activation="softmax")) #The model finishes with a number of outputs equal to the number of classes in sp_classes

#Compiling the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Preparing training, validation and test data

In [None]:
train_batch_size = 32
valid_batch_size = 32
test_batch_size = 32

#Training data
print("For training data...")
train_datagen = ImageDataGenerator(
    rescale=1./255, # Rescale pixel values between 0 and 1
    horizontal_flip=True, # Randomly flip images horizontally
    vertical_flip=True, #Randomly flip images vertically
)
train_generator = train_datagen.flow_from_directory(
    train_path,
    batch_size=train_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Validation data
print("For validation data...")
valid_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for validation data
valid_generator = valid_datagen.flow_from_directory(
    valid_path,
    batch_size=valid_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Test data
print("For test data...")
test_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for test data
test_generator = test_datagen.flow_from_directory(
    test_path,
    batch_size=test_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

For training data...
Found 6000 images belonging to 5 classes.
For validation data...
Found 2000 images belonging to 5 classes.
For test data...
Found 2000 images belonging to 5 classes.


In [None]:
model_path = os.path.join(data_dir, "ResNet101V2_base-model-frzn.keras")
checkpoint = ModelCheckpoint(model_path, monitor = "val_accuracy", verbose=2, save_best_only=True, mode="max") #The model will be saved every time an epoch shows an improvement in accuracy
early_stopping = EarlyStopping(monitor="val_accuracy", patience=4, verbose=2, mode="max")

#Fit the model to the training data
model.fit(
    train_generator,
    epochs=60,
    validation_data = valid_generator,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/60
Epoch 1: val_accuracy improved from -inf to 0.31500, saving model to best_model.keras
Epoch 2/60
Epoch 2: val_accuracy did not improve from 0.31500
Epoch 3/60
Epoch 3: val_accuracy improved from 0.31500 to 0.32800, saving model to best_model.keras
Epoch 4/60
Epoch 4: val_accuracy improved from 0.32800 to 0.36250, saving model to best_model.keras
Epoch 5/60

Don't forget to copy the previous training log to monitor the evolution of the accuracy across all epochs

In [None]:
#Evaluate the model's performance
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"The tested accuracy is {test_accuracy} and the tested loss is {test_loss}")

##Only the base model's first 2 layers are frozen, and the rest is trainable

In [None]:
from tensorflow.keras.applications import ResNet101V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
base_model = ResNet101V2(weights="imagenet", include_top=False, input_shape = (128, 128, 3)) #That is, 128x123 pixels over 3 channels (RGB)

#Freezing the first 2 layers
for layer in base_model.layers[:2]:
  layer.trainable = False

#Adding custom layers on top
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(64, activation = "relu"))
model.add(Dense(len(sp_classes), activation="softmax")) #The model finishes with a number of outputs equal to the number of classes in sp_classes

#Compiling the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Preparing training, validation and test data

In [None]:
train_batch_size = 32
valid_batch_size = 32
test_batch_size = 32

#Training data
print("For training data...")
train_datagen = ImageDataGenerator(
    rescale=1./255, # Rescale pixel values between 0 and 1
    horizontal_flip=True, # Randomly flip images horizontally
    vertical_flip=True, #Randomly flip images vertically
)
train_generator = train_datagen.flow_from_directory(
    train_path,
    batch_size=train_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Validation data
print("For validation data...")
valid_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for validation data
valid_generator = valid_datagen.flow_from_directory(
    valid_path,
    batch_size=valid_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

#Test data
print("For test data...")
test_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for test data
test_generator = test_datagen.flow_from_directory(
    test_path,
    batch_size=test_batch_size, # Set batch size
    class_mode='categorical' # Use categorical labels for multi-class classification
)

In [None]:
model_path = os.path.join(data_dir, "ResNet101V2_2-bottom-layers-frzn.keras")
checkpoint = ModelCheckpoint(model_path, monitor = "val_accuracy", verbose=2, save_best_only=True, mode="max") #The model will be saved every time an epoch shows an improvement in accuracy
early_stopping = EarlyStopping(monitor="val_accuracy", patience=4, verbose=2, mode="max")

#Fit the model to the training data
model.fit(
    train_generator,
    epochs=60,
    validation_data = valid_generator,
    callbacks=[checkpoint, early_stopping]
)

Don't forget to copy the previous training log to monitor the evolution of the accuracy across all epochs

In [None]:
#Evaluate the model's performance
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"The tested accuracy is {test_accuracy} and the tested loss is {test_loss}")