## Instructions

This file is meant to be run from Google Colab to run properly.

Steps to get this working:
1. Go to your Kaggle Account, and get a "New API Token" which installs a json file.
2. Upload this file into Colab under root/.kaggle (need to toggle visibility of hidden directories to see this)

In [None]:
! chmod 600 /root/.kaggle/kaggle.json
! kaggle competitions download state-farm-distracted-driver-detection --force
! unzip state-farm-distracted-driver-detection.zip

In [50]:
import os, csv

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers 
from tensorflow.keras.applications.resnet50 import ResNet50

TRAIN_DIR = './imgs/train'
TEST_DIR = './imgs/test'

NUM_CLASSES = 10
IMG_SIZE = 224  # EfficientNet was trained on (224,224,3) images
BATCH_SIZE = 64
NUM_EPOCH = 10
VALIDATION_SPLIT = 0.2
MODEL_NAME = 'ResNet50'

In [58]:
def build_dataset():
    train_data = tf.keras.utils.image_dataset_from_directory(
        TRAIN_DIR, 
        label_mode='categorical', 
        image_size=(IMG_SIZE, IMG_SIZE), 
        batch_size=64,
        seed=0, # Needed when using validation split
        validation_split=VALIDATION_SPLIT,
        subset='training'
    )
    test_data = tf.keras.utils.image_dataset_from_directory(
        TEST_DIR, 
        labels=None, 
        image_size=(IMG_SIZE, IMG_SIZE), 
        batch_size=64, 
        shuffle=False # Sorts alphabetically by filename
    )

    return (train_data, test_data)

In [54]:
img_augmentation = Sequential(
    [
        layers.RandomRotation(factor=0.15),
        layers.RandomContrast(factor=0.1),
    ],
    name='img_augmentation',
)

def build_model(num_classes, weights=None):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = img_augmentation(inputs)
    model = ResNet50(include_top=True, input_tensor=x, weights=None, classes=num_classes)

    if weights is not None:
      # Using pretrained weights, so let's finetune the top layers
      max_pooling = layers.MaxPooling2D(pool_size=(2, 2))
      batch_norm = layers.BatchNormalization()
      dropout = layers.Dropout(0.2, name='top_dropout')
      fc_out = layers.Dense(num_classes, activation='softmax', name='prediction')

      model = ResNet50(include_top=False, input_tensor=x, weights=weights)
      model.trainable = False # Freeze layers

      # Add top layers (not frozen)
      x = max_pooling(model.output)
      x = batch_norm(x)
      x = dropout(x)
      outputs = fc_out(x)
      
      model = tf.keras.Model(inputs, outputs, name=MODEL_NAME)
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(optimizer=optimizer, 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

    return model

In [60]:
from google.colab import files

def save_output_csv(filename, y_pred):
  output_path = f'{filename}.csv'
  image_names = sorted([img_name for img_name in os.listdir(TEST_DIR)])

  with open(output_path, 'w', newline='') as f:
        writer = csv.writer(f)
        title_row = ['img'] + [f'c{i}' for i in range(NUM_CLASSES)]
        writer.writerow(title_row, )
        for i in range(len(image_names)):
            name = [image_names[i]]
            data = list(map(str, y_pred[i]))
            row = name + data
            writer.writerow(row)

  files.download(output_path)

In [None]:
train_data, test_data = build_dataset()

model = build_model(NUM_CLASSES, weights=None)

history = model.fit(train_data, 
                    epochs=NUM_EPOCH, 
                    batch_size=BATCH_SIZE)
y_pred = model.predict(test_data, batch_size=BATCH_SIZE, verbose=1)

save_output_csv(MODEL_NAME, y_pred)

In [None]:
import matplotlib.pyplot as plt

def plot_accuracy(hist):
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()


def plot_loss(hist):
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()


plot_accuracy(history)