In [None]:
import datetime
import tensorflow as tf
import keras
import pandas as pd
from pathlib import Path

from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, Dropout

# TODO Find a way to turn off that red debugging spam from tensorflow, this does not work
tf.get_logger().setLevel('WARN')

In [None]:
'''

Added parameters to match with VGG16 and for augmenting data

'''

# Global params and constants
WIDTH = 600
HEIGHT = 600
BATCH_SIZE = 50
EPOCHS = 2
TRAIN_IMAGES_PATH = r'./dataset/train_set_labelled'
TEST_IMAGES_PATH = r'./dataset/test_set'
TRAIN_LABELS_PATH = r'./dataset/train_labels.csv'
PREDICTIONS_PATH = r'predictions.csv'
NUM_EXAMPLES = len(list(Path(TRAIN_IMAGES_PATH).rglob('*.jpg')))
NUM_CLASSES = len(list(Path(TRAIN_IMAGES_PATH).iterdir()))
print(f'Num classes: {NUM_CLASSES}  num samples: {NUM_EXAMPLES}')

# Generators allow to get the data in batches without having to worry about the memory
train_generator = ImageDataGenerator(
    validation_split=0.2,
    rescale = 1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode="nearest"
#     featurewise_center=True,
#     featurewise_std_normalization=True
)
val_generator = ImageDataGenerator(
    validation_split=0.2,
    rescale = 1./255,
)

train_gen = train_generator.flow_from_directory(
    directory=TRAIN_IMAGES_PATH,
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    target_size=(WIDTH, HEIGHT),
    shuffle=True,
    subset='training'
)
validation_gen = val_generator.flow_from_directory(
    directory=TRAIN_IMAGES_PATH,
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    target_size=(WIDTH, HEIGHT),
    shuffle=True,
    subset='validation'
)
test_gen = val_generator.flow_from_directory(
    directory=TEST_IMAGES_PATH,
    class_mode=None,
    batch_size=BATCH_SIZE,
    target_size=(WIDTH, HEIGHT),
    shuffle=False
)

def make_predictions(model: keras.Model, test_gen: ImageDataGenerator):
    """
    Output a CSV with model's predictions on test set that is ready to be submitted to Kaggle.
    The file will be created in the main directory of the project, named 'predictions <current_time>'
    """
    predictions = model.predict(test_gen, verbose=True, batch_size=BATCH_SIZE)
    # Get names of test files in the same order they were used for predictions
    file_names = list(map(lambda x: x.split('\\')[1], test_gen.filenames))
    # Obtain final labels for predictions, add one since classes start from one
    predictions = predictions.argmax(axis=1) + 1
    result = pd.DataFrame({'img_name': file_names, 'label': predictions})
    result = result.set_index('img_name')
    # Save the CSV file to main project directory
    result.to_csv(f'predictions {datetime.datetime.now().strftime("%d-%m-%Y %Hh %Mm %Ss")}.csv')


In [None]:
'''
Import the VGG16 library and add preprocessing layer to the front of VGG
''' 
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from keras.models import Model

effB7 = EfficientNetB7(input_shape=[600,600,3], include_top=False, weights="imagenet")

# VGG16(input_shape=[224, 224, 3], weights='imagenet', include_top=False)

# to not train existing weights
for layer in effB7.layers:
    layer.trainable = False
    
# our layers - you can add more if you want
x = Flatten()(effB7.output)

pred = Dense(80, activation='softmax')(x)
    
model = Model(inputs=effB7.input, outputs=pred)
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit_generator(
    train_gen,
    validation_data=validation_gen,
    steps_per_epoch=len(train_gen),
    validation_steps=len(validation_gen),
    epochs=EPOCHS,
    verbose=True,
)

make_predictions(model=model, test_gen=test_gen)