In [None]:
# Basic python packages
import os
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from collections import defaultdict
import glob

# General machine learning packages
from sklearn.model_selection import train_test_split

# Packages related to images
from PIL import Image
import PIL

# Packages for neural networks
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from keras.layers import Activation, Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Embedding
from keras.layers import Dense, GlobalAveragePooling2D, Convolution2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from tensorflow.keras.models import Model

In [None]:
#Paths to different folders/files
image_dir = "../input/amldata/train_set/train_set"
test_image_dir = "../input/amldata/test_set"
labels_file = "../input/amldata/train_labels.csv"
training_path = '../input/amldata/training_data/'
validation_path = '../input/amldata/validation_data/'

img_size = (250, 250) #Size of the input of the neural networks
IMG_SHAPE = img_size + (3,)
batch_size = 32
n_labels = 80
labels = pd.read_csv(labels_file)

In [None]:
def normalize(image):
    image = tf.cast(image, tf.float32)
    image = (image / 127.5) - 1
    return image

train_datagen = ImageDataGenerator(
    preprocessing_function=normalize,
    shear_range=2,
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range=5,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=True,  # randomly flip images
    vertical_flip=True, # randomly flip images
    zoom_range=[.8, 1],
    brightness_range=[0.6,1.3],
    channel_shift_range=30,
    fill_mode='reflect')

test_datagen = ImageDataGenerator(
        preprocessing_function=normalize)

train_generator = train_datagen.flow_from_directory(
        training_path,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        validation_path,
        target_size=img_size,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
        test_image_dir,
        target_size=img_size,
        batch_size=batch_size,
        shuffle=False)

In [None]:
def train_model(model, steps_per_epoch=150, epochs=3, validation_steps=20, workers=7, checkpoint_loc=""):
    """
    Trains a given model

    :steps_per_epoch: Amount of batches uploaded per epoch. Cant be higher than +- 200
    :epochs: Amount of times the model trains on the data
    :validation_steps: Amount of batches used for validation. Cant be higher than +- 50
    :workers: Amount of processes used to load the data
    :checkpoint_loc: Place for the model checkpoints to be saved
    :return: The trained model and some training data
    """ 
    # Create a callback that saves the model's weights
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_loc,
                                                     monitor='acc',
                                                     save_weights_only=True)
#     cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_loc, 
#                                                      monitor='acc', 
#                                                      save_weights_only=True, 
#                                                      save_best_only=True, 
#                                                      mode='max')
    inception_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    begin_time = datetime.datetime.now()
    history = model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=validation_generator, validation_steps=validation_steps, workers=workers, callbacks=[cp_callback, inception_early_stopping], verbose=1)
    print(datetime.datetime.now() - begin_time)
    return (model, history)

In [None]:
sns.set_context("poster") 
plt.style.use('seaborn-poster')
    
def plot_history(history, file_name): 
    #Plots the training data. 
    plt.plot(history.history['accuracy'], label='accuracy', linewidth=10) 
    plt.plot(history.history['val_accuracy'], label = 'val_accuracy', linewidth=10) 
    plt.yticks(fontsize=50) 
    plt.xticks(fontsize=50) 
    plt.xlabel('Epoch', fontsize=50) 
    plt.ylabel('Accuracy', fontsize=50) 
    plt.legend(loc='lower right', prop={'size': 40}) 
    plt.savefig(file_name) 
    plt.show()
     
def plot_multiple_histories(histories, labels, file_name): 
    for l in labels: 
        total = [] 
        for h in histories: 
            total = total + h.history[l] 
        if l[:3] == 'val': 
            l = "validation " + l[4:] 
        plt.plot(total, label=l, linewidth=10) 
     
    plt.yticks(fontsize=50) 
    plt.xticks(fontsize=50) 
    plt.xlabel('Epoch', fontsize=50) 
    plt.ylabel('Accuracy', fontsize=50) 
    plt.legend(loc='lower right', prop={'size': 40}) 
    plt.savefig(file_name) 
    plt.show()

In [None]:
def plot_loss(history, file_name):
    #Plots the training data.
    plt.plot(history.history['loss'], label='loss', linewidth=10)
    plt.plot(history.history['val_loss'], label = 'val_loss', linewidth=10)
    plt.yticks(fontsize=50) 
    plt.xticks(fontsize=50) 
    plt.xlabel('Epoch', fontsize=50) 
    plt.ylabel('Loss', fontsize=50) 
    plt.legend(loc='upper right', prop={'size': 40}) 
    plt.savefig(file_name) 
    plt.show()

In [None]:
def make_model(n_labels):
    InceptionV3_model = tf.keras.applications.InceptionV3(input_shape = IMG_SHAPE, weights = 'imagenet', include_top=False)
    for layer in InceptionV3_model.layers[:249]:
       layer.trainable = False
    for layer in InceptionV3_model.layers[249:]:
       layer.trainable = True
    x = InceptionV3_model.output
    
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    pred = Dense(n_labels, activation='softmax')(x)
    model = Model(inputs = InceptionV3_model.input, outputs = pred)
    
    for layer in model.layers[-8:]:
        layer.trainable=True
        
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005), 
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.1), 
                  metrics=['accuracy'])
    
    return model

model = make_model(n_labels)
model.load_weights("./Model_weights/")
# InceptionResNetV2model.summary()

In [None]:
model, history = train_model(model, steps_per_epoch=150, epochs=30, validation_steps=50, checkpoint_loc="./Model_weights/")

In [None]:
plot_history(history60, 'InceptionV3_accuracy.png')

In [None]:
plot_loss(history60, 'InceptionV3_loss.png')

In [None]:
def predict_test(model):
    preds = model.predict(test_generator)
    preds_cls_idx = preds.argmax(axis=-1)
    idx_to_cls = {v: k for k, v in train_generator.class_indices.items()}
    preds_cls = np.vectorize(idx_to_cls.get)(preds_cls_idx)
    filenames_to_cls = list(zip(test_generator.filenames, preds_cls))
    
    l = []
    n = []
    for p in filenames_to_cls:
        n.append(p[0].split("/")[-1])
        l.append(p[1])
    return pd.DataFrame(list(zip(n, l)), columns=['img_name','label'])

# model.load_weights("./Model_weights/")
res = predict_test(model)
res.to_csv("submission2.csv", index=False)