# 0 - Test for GPU
Execute the code below for printing the TF version and testing for GPU availability.

In [1]:
#@title Print TF version and GPU stats
import tensorflow as tf
import sys
print('TensorFlow version:', tf.__version__)

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
   raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name), '', sep='\n')
!nvidia-smi

TensorFlow version: 2.7.0
Found GPU at: /device:GPU:0

Wed Jan 26 13:10:02 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    59W / 149W |    145MiB / 11441MiB |      1%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+--------------------------------

# 1 - Explore and preprocess the data
## 1.1 - Download and plot the data
We'll start by downloading our data set contained in a zip archive. Execute the code below for downloading and extracting the data to /tmp/bee-vs-wasp.zip.

In [2]:
#@title Dataset download and preprocessing

import requests, os, zipfile
import numpy as np

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"
    session = requests.Session()
    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)
    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)
    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value
    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768
    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
def unzip(file):
    zip_ref = zipfile.ZipFile(file, 'r')
    zip_ref.extractall('/tmp')
    zip_ref.close()

dataset_file = '/tmp/kaggle_bee_vs_wasp.zip'
download_file_from_google_drive('1-aqpfT73D7HCQGoQEFg-phd3Mtsb9PiP',dataset_file)
unzip('/tmp/kaggle_bee_vs_wasp.zip')

The base directory `/tmp/bee-vs-wasp` contains the subdirectories. Let's define the base directory:

In [3]:
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm

base_dir = '/tmp/kaggle_bee_vs_wasp/'

Get the `labels.csv` of the data set:

In [4]:
df = pd.read_csv(base_dir+'/labels.csv')
df = df.set_index('id')

# You can perform dataset subsampling by adding frac=training_subsample as argument to the sample function, 
# with training_subsample within [0,1].
# For development, you should use a small fraction of the entire dataset rater than full dataset, e.g. 0.1.
# For now lets use the whole Dataset
training_subsample = 1 
df = df.sample(frac=training_subsample,axis=0)


# Replace \\ in path with / to load images properly
for index in tqdm(df.index):    
    df.loc[index,'path']=df.loc[index,'path'].replace('\\', '/') 
df['path'] = base_dir + df['path']

100%|██████████| 11420/11420 [00:12<00:00, 908.72it/s]



Now we can create the training, validation and test data set by filtering the dataframe:

In [5]:
# Firstly, we create our training set from the data frame
train_df = df.query("(is_validation==0) and (is_final_validation==0)")
train_classes = train_df["label"]

# Now we can do the same for the valdation set
validation_df = df.query("(is_validation==1) and (is_final_validation==0)")
validation_classes = validation_df["label"]

# Now we can do the same for the test set
test_df = df.query("(is_validation==0) and (is_final_validation==1)")

Now we create our data set generators as learned in the lecture:

In [6]:
#@title Prepare data generators

from tensorflow.keras.preprocessing.image import ImageDataGenerator

batch_size = 64

# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=20,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   zoom_range=0.4,
                                   brightness_range=(.5, 1.5),
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using train_datagen generator
train_generator = train_datagen.flow_from_dataframe(
                    train_df,
                    x_col="path",
                    y_col="label",
                    target_size=(224,224),
                    batch_size=batch_size,
                    class_mode='categorical',
                    shuffle=True,
                    seed=42)

num_classes = 4

# Flow validation images in batches using val_datagen generator
validation_generator = val_datagen.flow_from_dataframe(
                    validation_df,
                    x_col="path",
                    y_col="label",
                    target_size=(224,224),
                    batch_size=batch_size,
                    class_mode='categorical',
                    shuffle=False)

# Flow test images using test_datagen generator
test_generator = test_datagen.flow_from_dataframe(
                    test_df,
                    x_col="path",
                    y_col="label",
                    target_size=(224,224),
                    class_mode='categorical',
                    shuffle=False)

train_steps = np.ceil(train_generator.samples / train_generator.batch_size)
val_steps = np.ceil(validation_generator.samples / validation_generator.batch_size)

print(train_steps)
print(val_steps)

Found 7938 validated image filenames belonging to 4 classes.
Found 1719 validated image filenames belonging to 4 classes.
Found 1763 validated image filenames belonging to 4 classes.
125.0
27.0


We now build and train the model. For now lets use a fresh `EfficientNetB0` model trained with transfer learning:

In [7]:
#@title Build the EfficientNetB0 model

from tensorflow.keras import layers, Model
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers


def build_model(input_shape, num_classes, summary=True):

  pre_trained_model = EfficientNetB0(input_shape=input_shape, weights='imagenet', include_top=False)

  x = layers.GlobalAveragePooling2D()(pre_trained_model.output)
  x = layers.Dropout(0.2, name="top_dropout")(x)
  output = layers.Dense(num_classes, activation = 'softmax')(x)

  # Define the model
  model = Model(pre_trained_model.input, output, name='efn_model')

  return model, pre_trained_model

model, pre_trained_model = build_model((224,224,3), 4)
pre_trained_model.trainable = False

print(model.summary())

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
Model: "efn_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling (Rescaling)          (None, 224, 224, 3)  0           ['input_1[0][0]']                
                                                                                                  
 normalization (Normalization)  (None, 224, 224, 3)  7           ['rescaling[0][0]']              
                                                                                             

Now we define our initial learning rate for the training and define our model:

In [None]:
#@title Compile the model

INITIAL_LEARNING_RATE = 1e-3
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=INITIAL_LEARNING_RATE), metrics=['accuracy'])

  super(Adam, self).__init__(name, **kwargs)


----------------------------------------- THIS SECTION IS FOR TRAINING THE EFFICIENTNET (SKIP IF YOU ALREADY TRAINED WEIGHTS) ------------------------------

In this section we train our Network with the methods learned in the lecture:

In [8]:
#@title Plot the training history

from matplotlib import pyplot as plt

def plot_history(history):
  fig, (ax1, ax2) = plt.subplots(2,1, sharex=True, dpi=150)
  ax1.plot(history.history['loss'], label='training')
  ax1.plot(history.history['val_loss'], label='validation')
  ax1.set_ylabel('Cross-Entropy Loss')
  ax1.set_yscale('log')
  if history.history.__contains__('lr'):
    ax1b = ax1.twinx()
    ax1b.plot(history.history['lr'], 'g-', linewidth=1)
    ax1b.set_yscale('log')
    ax1b.set_ylabel('Learning Rate', color='g')

  ax2.plot(history.history['accuracy'], label='training')
  ax2.plot(history.history['val_accuracy'], label='validation')
  ax2.set_ylabel('Accuracy')
  ax2.set_xlabel('Epochs')
  ax2.legend()
  plt.show() 

In [9]:
#@title LRD and early stopping

def lr_step_decay(epoch, lr, drop=.9, drop_epochs=10):
  if epoch < 10:
    return INITIAL_LEARNING_RATE
  else:
    return INITIAL_LEARNING_RATE * np.power(drop, np.floor(epoch/drop_epochs))

LRDecayCallback = tf.keras.callbacks.LearningRateScheduler(lr_step_decay, verbose=1)
StopCallback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True,verbose=1)

In [None]:
#@title Train the model (frozen)

history = model.fit(train_generator,
                    steps_per_epoch=train_steps,
                    epochs=200,
                    validation_data=validation_generator,
                    validation_steps=val_steps,
                    callbacks=[LRDecayCallback,StopCallback],
                    verbose=2)

plot_history(history)


Epoch 00001: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/200
125/125 - 163s - loss: 1.2599 - accuracy: 0.4316 - val_loss: 1.2405 - val_accuracy: 0.4363 - lr: 0.0010 - 163s/epoch - 1s/step

Epoch 00002: LearningRateScheduler setting learning rate to 0.001.
Epoch 2/200
125/125 - 146s - loss: 1.2502 - accuracy: 0.4313 - val_loss: 1.2396 - val_accuracy: 0.4363 - lr: 0.0010 - 146s/epoch - 1s/step

Epoch 00003: LearningRateScheduler setting learning rate to 0.001.
Epoch 3/200


KeyboardInterrupt: ignored

Here we encountered the problem of the constant validation accuracy. We did not manage to fix that problem, so we did not proceed with this network configuration.