In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import warnings
from tqdm import tqdm
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

from process.utils import load_history_graph

warnings.filterwarnings("ignore")

In [None]:
PATHS = {
    'data': os.path.join(os.getcwd(), 'data'),
    'models': os.path.join(os.getcwd(), 'models'),
    'train': os.path.join(os.getcwd(), 'data', 'train'),
    'test': os.path.join(os.getcwd(), 'data', 'test')
}

def load_datafile_path(file: str) -> str: return os.path.join(PATHS['data'], file)
def load_modelfile_path(file: str) -> str: return os.path.join(PATHS['models'], file)
def load_train_image_path(file: str) -> str: return os.path.join(PATHS['train'], file)
def load_test_image_path(file: str) -> str: return os.path.join(PATHS['test'], file)

In [None]:
train_info = pd.read_feather(load_datafile_path('train.ftr'))
train_info = train_info[train_info['year'] < 2012]
train_info = (
    train_info
    .sample(len(train_info))
    .reset_index(drop=True)
)

labels = train_info['label'].to_list()

# Load images

In [None]:
images_paths = train_info['example_path']
images_names = [p.split('/')[-1] for p in images_paths]
images = [cv2.imread(load_train_image_path(images_names[i])) for i in tqdm(range(len(images_names)))]

for img in images:
    assert img.shape == (332, 332, 3)

In [None]:
with open(load_datafile_path('augmented_data'), 'rb') as file:
    data_aug = pickle.load(file)

for img in data_aug['images']:
    assert img.shape == (332, 332, 3)

for i in range(len(data_aug['images'])):
    images.append(data_aug['images'][i])
    labels.append(data_aug['labels'][i])

In [None]:
class_num = train_info['label'].nunique()
img_height = images[0].shape[0]
img_width = images[0].shape[1]

# Model's data

In [None]:
total_count = len(images)
val_count = 500

for i in range(len(labels)):
    if labels[i] == 2:
        labels[i] = 1

In [None]:
train_images = np.array(images[:total_count-val_count])
val_images = np.array(images[total_count-val_count:])
assert len(train_images) + len(val_images) == total_count

In [None]:
train_labels = np.array(labels[:total_count-val_count])
val_labels = np.array(labels[total_count-val_count:])
assert len(train_labels) + len(val_labels) == total_count

# Model

In [None]:
model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(8, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(32, activation='relu'),
  layers.Dense(class_num)
])

In [None]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.1,
    patience=5,
    verbose=1,
    mode='auto',
    baseline=None,
    restore_best_weights=True,
    start_from_epoch=10
)

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [None]:
epochs = 50
batch_size = 32

history = model.fit(
  train_images,
  train_labels,
  validation_data=(val_images, val_labels),
  epochs=epochs,
  batch_size=batch_size,
  callbacks=[early_stopping_callback]
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
assert len(acc) == len(val_acc) == len(loss) == len(val_loss)

fig = load_history_graph(acc, val_acc, loss, val_loss)
fig.show()

In [None]:
model.save(load_modelfile_path('augmented_3k'))