In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import warnings
from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

warnings.filterwarnings("ignore")

In [None]:
PATHS = {
    'data': os.path.join(os.getcwd(), 'data'),
    'train': os.path.join(os.getcwd(), 'data', 'train'),
    'test': os.path.join(os.getcwd(), 'data', 'test')
}

def load_datafile_path(file: str) -> str: return os.path.join(PATHS['data'], file)
def load_train_image_path(file: str) -> str: return os.path.join(PATHS['train'], file)
def load_test_image_path(file: str) -> str: return os.path.join(PATHS['test'], file)

In [None]:
train_info = pd.read_feather(load_datafile_path('train.ftr'))
train_info = train_info[train_info['year'] < 2012]
train_info = (
    train_info
    .sample(len(train_info))
    .reset_index(drop=True)
)


# Load images

In [None]:
train_info['example_path'][0].split('/')[-1]

In [None]:
images_paths = train_info['example_path']
images_names = [p.split('/')[-1] for p in images_paths]

In [None]:
images = [cv2.imread(load_train_image_path(images_names[i])) for i in tqdm(range(len(images_names)))]

In [None]:
img_i = np.random.randint(0, len(images)+1)

print(f"Label = {train_info['label'][img_i]}")
plt.imshow(images[img_i])

In [None]:
class_num = train_info['label'].nunique()
img_height = images[0].shape[0]
img_width = images[0].shape[1]

# Model's data

In [None]:
total_count = len(images)
val_count = 100
labels = train_info['label'].apply(lambda x: 1 if x else 0).to_numpy()

In [None]:
train_images = np.array(images[:total_count-val_count])
val_images = np.array(images[total_count-val_count:])
assert len(train_images) + len(val_images) == total_count

In [None]:
train_labels = np.array(labels[:total_count-val_count])
val_labels = np.array(labels[total_count-val_count:])
assert len(train_labels) + len(val_labels) == total_count

# Model

In [None]:
model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(class_num)
])

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [None]:
epochs = 1000
batch_size = 32

history = model.fit(
  train_images,
  train_labels,
  validation_data=(val_images, val_labels),
  epochs=epochs,
  batch_size=10
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
