In [1]:
import os
import tensorflow as tf
import numpy as np

SEED = 1234
tf.random.set_seed(SEED)
np.random.seed(SEED)

In [2]:
# Run this cell only if you are using Colab with Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%%capture
!unzip "/content/drive/My Drive/data.zip"

In [4]:
# ImageDataGenerator
# ------------------

from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = True

# Create training ImageDataGenerator object
if apply_data_augmentation:
    train_data_gen = ImageDataGenerator(rotation_range=10,
                                        width_shift_range=10,
                                        height_shift_range=10,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        fill_mode='constant',
                                        cval=0,
                                        rescale=1./255)
else:
    train_data_gen = ImageDataGenerator(rescale=1./255)

# Create validation and test ImageDataGenerator objects
valid_data_gen = ImageDataGenerator(rescale=1./255)


In [5]:
# Create generators to read images from dataset directory
# -------------------------------------------------------
# Get current working directory
cwd = os.getcwd()
dataset_dir = os.path.join(cwd, 'data')

# img

# Batch size
bs = 16

# img shape
img_h = 256
img_w = 256

num_classes=3

# Training
training_dir = os.path.join(dataset_dir, 'training')
train_gen = train_data_gen.flow_from_directory(training_dir,
                                               batch_size=bs, 
                                               class_mode='categorical',
                                               shuffle=True,
                                               seed=SEED)

# Validation
validation_dir = os.path.join(dataset_dir, 'validation')
valid_gen = valid_data_gen.flow_from_directory(validation_dir,
                                               batch_size=bs, 
                                               class_mode='categorical',
                                               shuffle=False,
                                               seed=SEED)



Found 5052 images belonging to 3 classes.
Found 562 images belonging to 3 classes.


In [6]:
# Create Dataset objects
# ----------------------
size = (256, 256)
# Training
train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))

train_dataset = train_dataset.map(lambda x, y: (tf.image.resize(x, size), y))
train_dataset = train_dataset.repeat()

# Validation
# ----------
valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))

valid_dataset = valid_dataset.map(lambda x, y: (tf.image.resize(x, size), y))
valid_dataset = valid_dataset.repeat()



In [7]:
EfficientNetB5 = tf.keras.applications.EfficientNetB5(weights='imagenet', include_top=False, input_shape=(img_h, img_w, 3))

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb5_notop.h5


In [None]:
EfficientNetB5.summary()

Model: "efficientnetb5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
rescaling (Rescaling)           (None, 256, 256, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
normalization (Normalization)   (None, 256, 256, 3)  7           rescaling[0][0]                  
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 257, 257, 3)  0           normalization[0][0]              
_____________________________________________________________________________________

In [8]:
# Create Model
# ------------

finetuning = True

if finetuning:
    freeze_until = 18 # layer from which we want to fine-tune
    
    for layer in EfficientNetB5.layers[:freeze_until]:
        layer.trainable = False
else:
    xception.trainable = False
    
model = tf.keras.Sequential()
model.add(EfficientNetB5)
model.add(tf.keras.layers.SeparableConv2D(filters=3,kernel_size=(3,3)))
model.add(tf.keras.layers.BatchNormalization())   
model.add(tf.keras.layers.ReLU())
model.add(tf.keras.layers.AveragePooling2D())
model.add(tf.keras.layers.Dense(units=512, activation=tf.keras.activations.relu, kernel_regularizer=tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.Dropout(0.05))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=3, activation='softmax'))

# Visualize created model as a table
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb5 (Functional)  (None, 8, 8, 2048)        28513527  
_________________________________________________________________
separable_conv2d (SeparableC (None, 6, 6, 3)           24579     
_________________________________________________________________
batch_normalization (BatchNo (None, 6, 6, 3)           12        
_________________________________________________________________
re_lu (ReLU)                 (None, 6, 6, 3)           0         
_________________________________________________________________
average_pooling2d (AveragePo (None, 3, 3, 3)           0         
_________________________________________________________________
dense (Dense)                (None, 3, 3, 512)         2048      
_________________________________________________________________
dropout (Dropout)            (None, 3, 3, 512)         0

In [9]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 5e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [10]:
import os
from datetime import datetime



cwd = os.path.join(os.getcwd(),"drive/My Drive/logs")

exps_dir = os.path.join(cwd, 'EfficientNetB5')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

model_name = 'EfficientNetB5'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                   save_weights_only=True)  # False to save the model directly
callbacks.append(ckpt_callback)

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=1)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = True
if early_stop:
  es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
  callbacks.append(es_callback)
  reduce_rl_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5)
  callbacks.append(reduce_rl_callback)



In [11]:
epochs = 100
model.fit(x=train_dataset, epochs=epochs,steps_per_epoch=len(train_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_gen), 
          callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


<tensorflow.python.keras.callbacks.History at 0x7f04f5a9c748>

In [13]:
model.save('/content/drive/My Drive/efficientNetB5_new2.h5')

In [12]:
import pandas as pd
from datetime import datetime

def create_csv(results, results_dir='/content/drive/My Drive/'):

    csv_fname = 'results_EffNetB5_new2'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

    with open(os.path.join(results_dir, csv_fname), 'w') as f:

        f.write('Id,Category\n')

        for key, value in results.items():
            f.write(key + ',' + str(value) + '\n')


test_dir = os.path.join(dataset_dir, 'test')

images = [f for f in os.listdir(test_dir)]
images = pd.DataFrame(images)
images.rename(columns = {0:'filename'}, inplace = True)
images["class"] = 'test'

test_gen = train_data_gen.flow_from_dataframe(images,
                                               test_dir,
                                               batch_size=bs,
                                               target_size=(img_h, img_w),
                                               class_mode='categorical',
                                               shuffle=False,
                                               seed=SEED)


test_gen.reset()

predictions = model.predict(test_gen, len(test_gen), verbose=1)

results = {}
images = test_gen.filenames
i = 0

for p in predictions:
  prediction = np.argmax(p)
  import ntpath
  image_name = ntpath.basename(images[i])
  results[image_name] = str(prediction)
  i = i + 1

create_csv(results)

Found 450 validated image filenames belonging to 1 classes.
