<a href="https://colab.research.google.com/github/Tanguyvans/StGeorge/blob/main/StGeorge_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports 

In [9]:
!/opt/bin/nvidia-smi
!rm -rf sample_data

/bin/bash: /opt/bin/nvidia-smi: No such file or directory


In [10]:
import sys
import urllib
import os
from matplotlib import pyplot as plt
import math

import zipfile
import pandas as pd
import csv
from google.colab import drive

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing import image
from keras.models import Model, load_model
from keras import backend as K
from keras.applications.vgg16 import VGG16, preprocess_input # 224*224
from keras.applications.xception import Xception, preprocess_input, decode_predictions # 299*299
from keras.applications.mobilenet import MobileNet, preprocess_input, decode_predictions # 224*224
from keras.applications.densenet import DenseNet121 # 224*224

from keras.preprocessing.image import ImageDataGenerator
from keras.losses import categorical_crossentropy
from keras.layers import Dense, GlobalAveragePooling2D, Activation, Flatten, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping

# Loading the dataset

In [11]:
get_images = True

source_path = '/content/gdrive/MyDrive/test_assignment_cv/'
dataset_path = '/content/gdrive/MyDrive/george_ds/'

drive.mount('/content/gdrive')
# if unzip: 
#   with zipfile.ZipFile('/content/gdrive/MyDrive/test_assignment_cv/george_test_task.zip', 'r') as zip_ref:
#     zip_ref.extractall('/content/gdrive/MyDrive/george_dataset/')

if get_images: 
  if os.path.exists(dataset_path) == False:
    os.mkdir(dataset_path+'george')
    os.mkdir(dataset_path+'no_george')

  george = pd.read_csv(source_path+'georges.csv', header = None)
  non_george = pd.read_csv(source_path+'non_georges.csv', header = None)

  for index, row in george.iterrows(): 
    urllib.request.urlretrieve(row[0], dataset_path+'george/'+str(index)+'.jpg')

  for index, row in non_george.iterrows():
    urllib.request.urlretrieve(row[0], dataset_path+'no_george/'+str(index)+'.jpg')

KeyboardInterrupt: ignored

In [None]:
!printf '%s\n' 'george' 'no_george'> classes.txt

In [None]:
configs = dict(
    nb_classes = 2,
    batch_size = 64,
    input_dim = 224, 
    epochs = 5, 
    dataset_name = '/content/gdrive/MyDrive/george_dataset/one/',
    classifier = "Xception",
    pretrain_weights = 'imagenet',
    init_learning_rate = 0.001,
    lr_decay_rate = 0.1, 
    optimizer = 'adam',
    loss_fn = 'categorical_crossentropy',
    metrics = ['acc'],
    seed = 42, 
    validation_split = 0.2
)

classes_path = 'classes.txt'
csv_path = 'result.csv'
log_path='logs'
result_path = 'results/' + configs['classifier']

In [None]:
with open(classes_path, 'r') as f:
    classes = f.readlines()
    classes = list(map(lambda x: x.strip(), classes))
num_classes = len(classes)

print(f'Classes : {classes}')
print(f'Number of classes : {num_classes}')

Classes : ['george', 'no_george']
Number of classes : 2


In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
	configs['dataset_name'],          																		  # Path of the dataset
	validation_split = configs['validation_split'],             						# Data division : validation (20%), train (80%)
	subset = 'training',                																		# Selection of training data
	seed = configs['seed'],                          												# Initialization of random generator (for permutations)
	image_size = (configs['input_dim'], configs['input_dim']),    					# Input size of images
	batch_size = configs['batch_size'],																			# Batch_size
  label_mode = 'categorical'     																					# Conversion to One-Hot format
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
	configs['dataset_name'],          																		  # Path of the dataset
	validation_split = configs['validation_split'],             						# Data division : validation (20%), train (80%)
	subset = 'validation',              																		# Selection of validation data
	seed = configs['seed'],                         												# Initialization of random generator (for permutations)
	image_size = (configs['input_dim'], configs['input_dim']),    					# Input size of images
	batch_size = configs['batch_size'], 																		# Batch_size
  label_mode = 'categorical'     																					# Conversion to One-Hot format
)

Found 6046 files belonging to 2 classes.
Using 4837 files for training.
Found 6046 files belonging to 2 classes.
Using 1209 files for validation.


In [None]:
data_gen_args = dict(              
    featurewise_center=False,
    featurewise_std_normalization=False,
    rotation_range=20, 
    width_shift_range=0.3, 
    height_shift_range=0.3,
    validation_split = 0.2,
    horizontal_flip = True,
    dtype = 'uint8'
)

color_datagen = ImageDataGenerator(**data_gen_args)

train_generator = color_datagen.flow_from_directory(
  configs['dataset_name'],          																		  # Path of the dataset             			
	subset = 'training',                																		# Selection of training data
	seed = configs['seed'],                         												# Initialization of random generator (for permutations)
	target_size = (configs['input_dim'], configs['input_dim']),    					# Input size of images
	batch_size = configs['batch_size'], 																		# Batch_size
  class_mode = 'categorical',
  shuffle = True
  )

val_generator = color_datagen.flow_from_directory(
  configs['dataset_name'],          																		  # Path of the dataset             			
	subset = 'validation',                																		# Selection of training data
	seed = configs['seed'],                         												# Initialization of random generator (for permutations)
	target_size = (configs['input_dim'], configs['input_dim']),    					# Input size of images
	batch_size = configs['batch_size'], 																		# Batch_size
  class_mode = 'categorical'
  )


Found 4837 images belonging to 2 classes.
Found 1209 images belonging to 2 classes.


In [None]:
def build_model():

  base_model = Xception(
      include_top = False, 
      weights = 'imagenet', 
      input_shape = (configs['input_dim'], configs['input_dim'],3))

  model = base_model.output
  model = Flatten()(model)
  model = Dense(128, activation='relu')(model)
  model = Dropout(0.4)(model)
  model = Dense(32, activation = 'relu')(model)
  model = Dropout(0.4)(model)
  predictions = Dense(2, activation = 'softmax')(model)

  model = Model(inputs=base_model.inputs, outputs = predictions)

  return model

In [None]:
model = build_model()


In [None]:
def train(config: dict, callbacks: list, verbose: int=0):
  
  # https://stackoverflow.com/questions/57731214/what-tf-keras-backend-clear-session-actually-do
  tf.keras.backend.clear_session()                  
  
  model = build_model()

  # Select layers to be trained
  for layer in model.layers:
      layer.trainable = False       
  
  for layer in model.layers[:-6]:
      layer.trainable = True


  opt = keras.optimizers.SGD(learning_rate = config['init_learning_rate'])
  opt2 = keras.optimizers.Adam(learning_rate = config['init_learning_rate'])
  opt3 = keras.optimizers.RMSprop(learning_rate = config['init_learning_rate'])

  model.compile(loss = config['loss_fn'],
                optimizer = opt2,
                metrics = config['metrics'])  


  # Création du dossier pour sauvegrader le model
  if os.path.exists(result_path) == False:
      os.makedirs(result_path)

  history = model.fit_generator(
      train_generator,
      steps_per_epoch=math.ceil(len(train_generator)),
      epochs=config['epochs'],
      validation_data = val_ds,
      validation_steps=math.ceil(len(val_ds)),
      callbacks = callbacks
  )
  
  return model, history

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_path,
                                                      histogram_freq = 1)

# https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/EarlyStopping
stopper_callback = EarlyStopping(monitor = 'val_loss',
                                 patience = 5,
                                 mode='auto',
                                 restore_best_weights=True)
# https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ModelCheckpoint
ckpt_save = os.path.join('.', 'model_fine_ep_{epoch}_val_acc_{val_acc:.3f}.h5')
ckpt_callbak = ModelCheckpoint(ckpt_save,
                               monitor = 'val_acc',
                               verbose = 1,
                               save_best_only = True,
                               mode = 'auto')



In [None]:
callbacks = [tensorboard_callback,
             stopper_callback,
             ckpt_callbak]

# Start training
model, history = train(configs, callbacks, 1)

loss, acc = model.evaluate(val_ds)
print(f'Validation loss: {loss}, validation accuracy : {acc}')

  history = model.fit_generator(


Epoch 1/5
