In [2]:
%load_ext autoreload

In [4]:
# autoreload chaque module spécifié par %aimport
%autoreload 1

from collections import OrderedDict
from IPython.display import display
import json
import os
import time

import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop

import src.constants as constants
import src.filenameGenerator as fileNameGen
import src.imageLoader as iLoader
import src.tuning as tuning
import src.runUtil as runUtil

In [7]:
%aimport src.imageLoader

# Charge les images, leur étiquette et affiche des stats.

gala_imgs, gala_labels = iLoader.load_processed_imgs(max_load=4227)

gala_imgs = np.array(gala_imgs)
gala_labels = np.array(gala_labels)

INPUT_SHAPE = gala_imgs[0].shape

print('input shape : ' + str(INPUT_SHAPE))

print('gala_imgs shape : ' + str(gala_imgs.shape))
print('gala_labels shape : ' + str(gala_labels.shape))

nb_spiral = sum([ 1 if x == 'spiral' else 0 for x in np.nditer(gala_labels)])
print('nb_spiral : ' + str(nb_spiral))
nb_smooth = len(gala_labels) - nb_spiral
print('nb_smooth : ' + str(nb_smooth))

input shape : (140, 140, 3)
gala_imgs shape : (4227, 140, 140, 3)
gala_labels shape : (4227,)
nb_spiral : 2185
nb_smooth : 2042


In [32]:
# One Hot encode les étiquettes
gala_labels_2d = gala_labels.reshape(len(gala_labels), 1)

oh_encoder = OneHotEncoder(sparse=False)
gala_labels_oh = oh_encoder.fit_transform(gala_labels_2d)

In [33]:
%aimport src.constants
%aimport src.filenameGenerator
%aimport src.tuning
%aimport src.runUtil

def create_cnn_model(config=None):
    
    model = Sequential()
    
    model.add(Conv2D(3, (3, 3), padding='same', activation='relu', input_shape=INPUT_SHAPE))
    model.add(Conv2D(3, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Conv2D(6, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(6, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))

    model.add(Conv2D(12, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(12, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    if config is None or 'layers' not in config or config['layers'] == 'more':
        model.add(Conv2D(24, (3, 3), padding='same', activation='relu'))
        model.add(Conv2D(24, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        
    if config is not None and 'layers' in config and config['layers'] == 'more':
        model.add(Conv2D(48, (3, 3), padding='same', activation='relu'))
        model.add(Conv2D(48, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(2, activation='softmax'))
    
    if config is not None and 'learning_rate' in config:
        optimizer = RMSprop(lr=config['learning_rate'])
    else:
        optimizer = RMSprop()
    
    
    model.compile(
        optimizer=optimizer, 
        loss='categorical_crossentropy', 
        metrics=[
            'accuracy',
            Precision(name='precision'), 
            Recall(name='recall')
        ]
    )
    
    return model

def cnn_train(run_name, config=None):
    """
    config:
        batch_size
    """
    
    print('==================================')
    print(run_name)
    print('==================================')

    model_save_path = os.path.join(
        constants.PROJECT_ROOT_PATH,
        constants.CNN_MODELS_PATH,
        run_name + '.h5'
    )
    
    model = create_cnn_model(config)
    model.summary()
    
    tk_board = TensorBoard(log_dir=constants.CNN_LOGS_PATH + run_name)
    early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)
    model_ckeckpt = ModelCheckpoint(filepath=model_save_path, monitor='val_loss', save_best_only=True, verbose=1)

    callbacks = [
        tk_board, 
        early_stop, 
        model_ckeckpt
    ]
    
    # Fit le model
    start_train_time = time.perf_counter()
    
    if config is not None and 'batch_size' in config:
        history = model.fit(
            x=gala_imgs, y=gala_labels_oh, epochs=50, shuffle=True,
            batch_size=config['batch_size'], validation_split=0.2, 
            verbose=2, callbacks=callbacks
        )
    else: 
        history = model.fit(
            x=gala_imgs, y=gala_labels_oh, epochs=50, shuffle=True,
            validation_split=0.2, verbose=2, callbacks=callbacks
        )
        
    train_time = time.perf_counter() - start_train_time
    
    runUtil.save_history(history.history, run_name)
    
    # Agrège les résultats
    run_results = OrderedDict([
        ('run_name', run_name),
        ('config', config),
        ('train_time (seconds)', train_time)
    ])
    
    hist_resume = runUtil.resume_history(history.history)
    run_results.update(hist_resume)
    
    runUtil.save_run_results(run_results, run_name)
    
    if config is not None:
        runUtil.save_config(config, run_name)
    
    return run_results


runs_results = []

run_results_baseline = cnn_train(run_name='baseline_25percent')
runs_results.append(run_results_baseline)

"""
# Baseline
run_results_baseline = cnn_train(run_name='baseline')
runs_results.append(run_results_baseline)

# Higher batch size
run_results_higher_batch = cnn_train(
    run_name='higher_batch_size',
    config={
        'batch_size': 150
    }
)
runs_results.append(run_results_higher_batch)

# Lower batch size
run_results_lower_batch = cnn_train(
    run_name='lower_batch_size',
    config={
        'batch_size': 10
    }
)
runs_results.append(run_results_lower_batch)

# Less convolutional layers
run_results_less_conv = cnn_train(
    run_name='less_conv_layers',
    config={
        'layers': 'less'
    }
)
runs_results.append(run_results_less_conv)

# More convolutional layers
run_results_more_conv = cnn_train(
    run_name='more_conv_layers',
    config={
        'layers': 'more'
    }
)
runs_results.append(run_results_more_conv)

# Lower learning rate
run_results_lower_lr = cnn_train(
    run_name='lower_learning_rate6',
    config={
        'learning_rate': 0.0001
    }
)
runs_results.append(run_results_lower_lr)

# Higher learning rate
run_results_higher_lr = cnn_train(
    run_name='higher_learning_rate',
    config={
        'learning_rate': 0.01
    }
)
runs_results.append(run_results_higher_lr)

# 100 epochs
run_results_100_epoch = cnn_train(
    run_name='baseline_100epoch'
)
runs_results.append(run_results_100_epoch)

# Way higher batch size
run_results_way_higher_batch = cnn_train(
    run_name='way_higher_batch_size',
    config={
        'batch_size': 250
    }
)
runs_results.append(run_results_way_higher_batch)

# Way way higher batch size
run_results_way_way_higher_batch = cnn_train(
    run_name='way_way_higher_batch_size',
    config={
        'batch_size': 350
    }
)
runs_results.append(run_results_way_way_higher_batch)
"""

display(pd.DataFrame(runs_results).transpose())


baseline_25percent
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_200 (Conv2D)          (None, 140, 140, 3)       84        
_________________________________________________________________
conv2d_201 (Conv2D)          (None, 138, 138, 3)       84        
_________________________________________________________________
max_pooling2d_100 (MaxPoolin (None, 69, 69, 3)         0         
_________________________________________________________________
conv2d_202 (Conv2D)          (None, 69, 69, 6)         168       
_________________________________________________________________
conv2d_203 (Conv2D)          (None, 67, 67, 6)         330       
_________________________________________________________________
max_pooling2d_101 (MaxPoolin (None, 33, 33, 6)         0         
_________________________________________________________________
conv2d_204 (Conv2D)          (None, 33, 33, 12)        66

Unnamed: 0,0
run_name,baseline_25percent
config,
train_time (seconds),68.0494
best_epoch,7
best_loss,0.131413
best_val_loss,0.193495
best_accuracy,0.949719
best_val_accuracy,0.92435
best_f1,0.949719
best_val_f1,0.92435
