In [1]:
# Importing modules 
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import cv2

import tensorflow as tf

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPool2D,Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
import tensorflow_hub as hub
import tensorflow_addons as tfa

from PIL import Image

from sklearn.model_selection import train_test_split

np.random.seed(1)

 The versions of TensorFlow you are currently using is 2.6.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

shape = (332,332)

# Process training data.
train_images = []
train_path = df_train["example_path"].to_numpy()

for filename in train_path:
    img = cv2.imread(filename)
    img = cv2.resize(img, shape)

    sigma = 0.33
    v = np.median(img)
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    canny_edge = cv2.Canny(img, lower, upper)

    img = cv2.applyColorMap(canny_edge, cv2.COLORMAP_PINK)

    train_images.append(img)

train_images = np.array(train_images)



# Process test data.
test_images = []
test_path = df_test["example_path"].to_numpy()

for filename in test_path:
    img = cv2.imread(filename)
    img = cv2.resize(img, shape)

    sigma = 0.33
    v = np.median(img)
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    canny_edge = cv2.Canny(img, lower, upper)

    img = cv2.applyColorMap(canny_edge, cv2.COLORMAP_PINK)

    test_images.append(img)

test_images = np.array(test_images)

display(train_images.shape)
display(test_images.shape)

(1714, 332, 332, 3)

(635, 332, 332, 3)

In [3]:
num_classes = 3

df_train_label = df_train.pop("label")
df_train_label = pd.get_dummies(df_train_label).values

In [4]:
X_train, X_val, y_train, y_val = train_test_split(train_images, df_train_label, random_state=1234, test_size=0.1)

In [5]:
num_examples_train = X_train.shape[0]
num_examples_val = X_val.shape[0]
display(num_examples_train)
display(num_examples_val)

1542

172

In [6]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
valid_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))

In [7]:
def prepare_for_training(ds, cache=True, batch_size=64, shuffle_buffer_size=1000):
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()
  #ds = ds.map(lambda d: (d["image"], tf.one_hot(d["label"], num_classes)))
  # shuffle the dataset
  ds = ds.shuffle(buffer_size=shuffle_buffer_size)
  # Repeat forever
  ds = ds.repeat()
  # split to batches
  ds = ds.batch(batch_size)
  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return ds

In [8]:
batch_size = 1

# preprocess training & validation sets
train_ds = prepare_for_training(train_ds, batch_size=batch_size)
valid_ds = prepare_for_training(valid_ds, batch_size=batch_size)

In [9]:
# validating shapes
for el in valid_ds.take(1):
  print(el[0].shape, el[1].shape)
for el in train_ds.take(1):
  print(el[0].shape, el[1].shape)

(1, 332, 332, 3) (1, 3)
(1, 332, 332, 3) (1, 3)


In [10]:
# take the first batch of the training set
batch = next(iter(train_ds))

In [11]:
def show_batch(batch):
  plt.figure(figsize=(16, 16))
  for n in range(min(32, batch_size)):
      ax = plt.subplot(batch_size//8, 8, n + 1)
      # show the image
      plt.imshow(batch[0][n])
      # and put the corresponding label as title upper to the image
      #plt.title(class_names[tf.argmax(batch[1][n].numpy())])
      plt.axis('off')
      plt.savefig("sample-images.png")

# showing a batch of images along with labels
#show_batch(batch)

In [12]:
model_url = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/feature_vector/2"

# download & load the layer as a feature vector
output_size = batch_size*3*2
keras_layer = hub.KerasLayer(model_url, output_shape=[output_size], trainable=True)

In [13]:
m = tf.keras.Sequential([
  keras_layer,
  tf.keras.layers.Dense(num_classes, activation="softmax")
])
# build the model with input image shape as (64, 64, 3)
m.build([None, 332, 332, 3])
m.compile(
    loss="categorical_crossentropy", 
    optimizer="adam", 
    metrics=["accuracy", tfa.metrics.F1Score(num_classes)]
)

In [14]:
m.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 1280)              117746848 
_________________________________________________________________
dense (Dense)                (None, 3)                 3843      
Total params: 117,750,691
Trainable params: 117,238,115
Non-trainable params: 512,576
_________________________________________________________________


In [15]:
model_name = "zero-deforestation-classification"
model_path = os.path.join("results", model_name + ".h5")
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(model_path, save_best_only=True, verbose=1)

In [16]:
# set the training & validation steps since we're using .repeat() on our dataset
# number of training steps
n_training_steps   = int(num_examples_train) // (batch_size)
# number of validation steps
n_validation_steps = int(num_examples_val) // (batch_size)

In [17]:
history = m.fit(
    train_ds, validation_data=valid_ds,
    steps_per_epoch=n_training_steps,
    validation_steps=n_validation_steps,
    verbose=1, epochs=5, 
    callbacks=[model_checkpoint]
)

Epoch 1/5

Epoch 00001: val_loss improved from inf to 30.45578, saving model to results\zero-deforestation-classification.h5
Epoch 2/5

Epoch 00002: val_loss improved from 30.45578 to 1.12121, saving model to results\zero-deforestation-classification.h5
Epoch 3/5

Epoch 00003: val_loss did not improve from 1.12121
Epoch 4/5

Epoch 00004: val_loss did not improve from 1.12121
Epoch 5/5

Epoch 00005: val_loss did not improve from 1.12121
