## Basic setup

In [None]:
import os
import tensorflow as tf
import numpy as np

SEED = 1234
tf.random.set_seed(SEED)

cwd = os.getcwd()
drive_path = os.path.join(cwd, 'drive')
my_drive_path = os.path.join(drive_path, 'My Drive')

project_path = os.path.join(my_drive_path, 'AN2DL-Project')
data_path = os.path.join(project_path, 'data')
train_path = os.path.join(data_path, 'training')
test_path = os.path.join(data_path, 'test')
logs_path = os.path.join(project_path, 'logs')

In [None]:
from google.colab import drive
drive.mount(drive_path)

## Images and labels

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = True
validation_split = 0.175

if apply_data_augmentation:
    train_data_gen = ImageDataGenerator(rotation_range=10,
                                        width_shift_range=1.5,
                                        height_shift_range=1.5,
                                        zoom_range=0.2,
                                        horizontal_flip=True,
                                        fill_mode='constant',
                                        cval=0,
                                        rescale=1./255,
                                        validation_split=validation_split)
else:
    train_data_gen = ImageDataGenerator(rescale=1./255,
                                        validation_split=validation_split)

In [None]:
import pandas as pd
import json

labels_df = pd.read_json(os.path.join(data_path,'train_gt.json'), orient='index')
labels_df = pd.DataFrame([(x,str(y)) for x,y in zip(labels_df.index, labels_df[0])])
labels_df.rename(columns={0:'file',1:'class'}, inplace=True)
print(labels_df)

## Dataset

In [None]:
img_h = 299
img_w = 299
bs = 8

train_gen = train_data_gen.flow_from_dataframe(dataframe=labels_df,
                                            target_size=(img_h,img_w),
                                            directory=train_path,
                                            x_col='file',
                                            y_col='class',
                                            batch_size=bs, 
                                            class_mode='categorical',
                                            shuffle=True,
                                            seed=SEED)

In [None]:
num_channels = 3
num_classes = 3

train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, num_channels],
                                               [None, num_classes]))
train_dataset = train_dataset.repeat()

print(train_dataset)

## Model

In [None]:
basic_model = tf.keras.applications.InceptionResNetV2(pooling='avg',
                                                      weights='imagenet',
                                                      include_top=False, 
                                                      input_shape=(img_h, img_w, num_channels))

model = tf.keras.Sequential()
model.add(basic_model)
model.add(tf.keras.layers.Dropout(0.6))
model.add(tf.keras.layers.Dense(1500, activation='linear'))
model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

model.summary()

In [None]:
model_path = os.path.join(logs_path, 'Trial/ckpts/cp_06.ckpt')
model.load_weights(model_path)

encoding_size = 1536

In [None]:
encoder = tf.keras.Sequential()
encoder.add(model.layers[0])
del model

encoder.summary()

# Training data

In [None]:
dataset_size = 5606

iterator = iter(train_dataset)

file_name = 'training_deep_new.csv'
output_path = os.path.join(data_path, file_name)

with open(output_path, 'w') as f:
  for i in range(encoding_size):
    f.write('C{},'.format(i+1))
  f.write('Class\n')

  batch_count = int(dataset_size/bs)
  progress_bins = 20
  progress_period = int(batch_count/progress_bins)

  for i in range(batch_count):
    imgs, targets = next(iterator)
    img_encoded = encoder.predict(imgs)

    curr_bs = imgs.shape[0]
    for j in range(curr_bs):
      curr_encoded = img_encoded[j]

      for k in range(encoding_size):
        f.write('{},'.format(curr_encoded[k]))
      
      curr_target = targets[j]
      curr_target = np.argmax(curr_target)
      f.write('{}\n'.format(curr_target))

    if i%progress_period == progress_period-1:
      print('Completed {:.1f}%'.format(i/progress_period/progress_bins*100))

print('Saved {}'.format(file_name))

# Test data

In [None]:
from PIL import Image

image_filenames = next(os.walk(test_path))[2]

file_name = 'test_deep_new.csv'
output_path = os.path.join(data_path, file_name)

with open(output_path, 'w') as f:
  f.write('Id,')
  for i in range(encoding_size-1):
    f.write('C{},'.format(i+1))
  f.write('C{}\n'.format(encoding_size))

  progress_bins = 20
  progress_period = int(len(image_filenames)/progress_bins)
  
  for i,image_name in enumerate(image_filenames):
      img = Image.open(test_path + '/' + image_name).convert('RGB')
      img = img.resize((img_w, img_h))
      img_array = np.array(img)
      img_array = np.expand_dims(img_array, 0)
      img_array = img_array * 1./255
      img_encoded = encoder.predict(img_array, batch_size=1)[0]

      f.write('{},'.format(image_name))
      for i in img_encoded[:-1]:
        f.write('{},'.format(i))
      f.write('{}\n'.format(img_encoded[-1]))

      if i%progress_period == progress_period-1:
        print('Completed {:.1f}%'.format(i/progress_period/progress_bins*100))

  print('Saved {}'.format(file_name))