In [None]:
#Run this cell only if executing in Colab
!pip install git+https://github.com/tensorflow/examples.git

In [None]:
import os
import glob
from pathlib import Path
import pandas as pd
import time
import numpy as np

import tensorflow as tf
from tensorflow import keras
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint
from tensorflow.keras import layers, models, Model, optimizers
from tensorflow.keras import regularizers
from keras import backend as K
from tensorflow_examples.models.pix2pix import pix2pix
from keras_preprocessing.image import ImageDataGenerator

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Getting the dataset from kaggle

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = <...path to kaggle.json file>
%cd <...path to working directory>

In [None]:
!kaggle datasets download -d crowww/a-large-scale-fish-dataset

In [None]:
%ls

## Data extraction

In [None]:
if not os.path.exists('Fish_Dataset'):
  %shell unzip a-large-scale-fish-dataset.zip -d ./
else:
  print('The dataset is already available')

In [None]:
image_dir=Path('./Fish_Dataset/Fish_Dataset')
labels=list(image_dir.glob('**/*GT*/*.png'))

In [None]:
images = list()

for folder in glob.glob('./Fish_Dataset/Fish_Dataset/*/**'):
  if not folder.endswith('GT'):
    path = folder+'/**'
    images.append(glob.glob(path))

images = [item for sublist in images for item in sublist]

In [None]:
images = pd.Series(images, name='image').astype(str)
labels = pd.Series(labels,name='label').astype(str)
df = pd.concat((images, labels), axis=1)
df['fish_type'] = df['label'].apply(lambda x: x.split('/')[-3])

In [None]:
df['fish_type'].value_counts()
# There 1000 images and 1000 labels for each class

In [None]:
%pylab inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = mpimg.imread(images[0])
imgplot = plt.imshow(img)
plt.show()

## Image preprocessing

In [None]:
train, test, val = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])
print('There are %d samples of training, %d of test and %d of validation' %(train.shape[0], test.shape[0], val.shape[0]))

In [None]:
BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3

In [None]:
datagen=ImageDataGenerator(rescale=1./255.)

In [None]:
#! IMPORTANT: These generators contains the train/test image and the label of the img (oneHotEncoded 0->9). We want the images, but the labels are NOT IMPORTANT

print('----- X IMAGES -----')
x_train_generator=datagen.flow_from_dataframe(
  dataframe=train,
  x_col="image",
  y_col="fish_type",
  batch_size=BATCH_SIZE,
  seed=42,
  shuffle=False,
  target_size=(IMG_HEIGHT,IMG_WIDTH)
)

x_test_generator=datagen.flow_from_dataframe(
  dataframe=test,
  x_col="image",
  y_col="fish_type",
  batch_size=BATCH_SIZE,
  seed=42,
  shuffle=False,
  target_size=(IMG_HEIGHT,IMG_WIDTH)
)

x_val_generator=datagen.flow_from_dataframe(
  dataframe=val,
  x_col="image",
  y_col="fish_type",
  batch_size=BATCH_SIZE,
  seed=42,
  shuffle=False,
  target_size=(IMG_HEIGHT,IMG_WIDTH)
)

print()
print('----- Y IMAGES -----')
y_train_generator=datagen.flow_from_dataframe(
  dataframe=train,
  x_col="label",
  y_col="fish_type",
  batch_size=BATCH_SIZE,
  seed=42,
  shuffle=False,
  target_size=(IMG_HEIGHT,IMG_WIDTH)
)

y_test_generator=datagen.flow_from_dataframe(
  dataframe=test,
  x_col="label",
  y_col="fish_type",
  batch_size=BATCH_SIZE,
  seed=42,
  shuffle=False,
  target_size=(IMG_HEIGHT,IMG_WIDTH)
)

y_val_generator=datagen.flow_from_dataframe(
  dataframe=val,
  x_col="label",
  y_col="fish_type",
  batch_size=BATCH_SIZE,
  seed=42,
  shuffle=False,
  target_size=(IMG_HEIGHT,IMG_WIDTH)
)

In [None]:
for image_batch, labels_batch in y_val_generator:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

## Model

In [None]:
base_model = tf.keras.applications.MobileNetV2(input_shape=[IMG_HEIGHT, IMG_WIDTH, CHANNELS], include_top=False)

#Use the activations of these layers
layer_names = [
    'block_1_expand_relu',  #64x64
    'block_3_expand_relu',  #32x32
    'block_6_expand_relu',  #16x16
    'block_13_expand_relu', #8x8
    'block_16_project',     #4x4
]

base_model_outputs = [base_model.get_layer(name).output for name in layer_names]

#Create a feature extraction model
down_stack = tf.keras.Model(inputs=base_model.input, outputs=base_model_outputs)
down_stack.trainable = False

In [None]:
up_stack = [
    pix2pix.upsample(512, 3), #4x4 -> 8x8
    pix2pix.upsample(256, 3), #8x8 -> 16x16
    pix2pix.upsample(128, 3), #16x16 -> 32x32
    pix2pix.upsample(64, 3),  #32x32 -> 64x64
]

In [None]:
def unet_model(output_channels):
  inputs = tf.keras.layers.Input(shape=[IMG_HEIGHT, IMG_WIDTH, CHANNELS])

  #Downsampling through the model
  skips = down_stack(inputs)
  x = skips[-1]
  skips = reversed(skips[:-1])

  #Upsampling ans establishing the skip connections
  for up, skip in zip(up_stack, skips):
    x = up(x)
    concat = tf.keras.layers.Concatenate()
    x = concat([x, skip])

  #This is the last layer of the model
  last = tf.keras.layers.Conv2DTranspose(
      filters=output_channels,
      kernel_size=3,
      strides=2,
      padding='same'
  ) #64x64 -> 128x128

  x = last(x)
  return tf.keras.Model(inputs=inputs, outputs=x)

In [None]:
OUTPUT_CHANNELS = 3

model = unet_model(output_channels=OUTPUT_CHANNELS)

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)