In [1]:
import os, cv2, random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
from matplotlib import ticker
import matplotlib as mpl

from keras.models import Sequential
from keras.layers import Dropout, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, Dense, Activation
from keras.optimizers import RMSprop, Adam
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical  
from keras import backend as K

TRAIN_DIR = 'data/train/'
TEST_DIR = 'data/test_stg1/'
TEST_DIR2 = 'data/test_stg2/'
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
WIDTH = 244  
HEIGHT = 244 
CHANNELS = 3

2023-12-06 00:22:01.353771: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-06 00:22:01.353798: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-06 00:22:01.353816: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-06 00:22:01.359108: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
"""
Normalize the data to 244, 244, 3.
Name each files with the fish type in the filename
"""
def get_images_in_folder(folder, fish):
    """Load files from the specified folder of FISH Class (ALB, BET, DOL, etc.)"""
    fish_folder = os.path.join(folder, fish)
    images = [os.path.join(fish, im) for im in os.listdir(fish_folder)]
    return images

def resizeImage(src):
    """Read and resize an individual image"""
    image = cv2.imread(src, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (HEIGHT, WIDTH), interpolation=cv2.INTER_CUBIC)
    image = (image - np.mean(image)) / np.std(image)
    return image

def load_data(classes, train_dir):
    files = []
    y_all = []

    for fish in classes:
        fish_files = get_images_in_folder(train_dir, fish)
        files.extend(fish_files)

        y_fish = np.tile(fish, len(fish_files))
        y_all.extend(y_fish)
        print("{0} photos of {1}".format(len(fish_files), fish))

    return files, np.array(y_all)

# Will use in next step
files, y_all = load_data(FISH_CLASSES, TRAIN_DIR)




1375 photos of ALB
160 photos of BET
93 photos of DOL
53 photos of LAG
372 photos of NoF
239 photos of OTHER
140 photos of SHARK
587 photos of YFT


In [3]:
X_all = np.ndarray((len(files), WIDTH, HEIGHT, CHANNELS), dtype=np.uint8)

for i, im in enumerate(files): 
    X_all[i] = resizeImage(TRAIN_DIR+im)
    if i%1000 == 0: print('Processed {} of {}'.format(i, len(files)))

print(X_all.shape)



y_all = LabelEncoder().fit_transform(y_all)
y_all = to_categorical(y_all)

X_train, X_valid, y_train, y_valid = train_test_split(X_all, y_all, 
                                                    test_size=0.2, random_state=23, 
                                                    stratify=y_all)

Processed 0 of 3019
Processed 1000 of 3019
Processed 2000 of 3019
Processed 3000 of 3019
(3019, 244, 244, 3)


In [4]:
from keras.layers import LeakyReLU
from keras.initializers import he_normal
optimizer = Adam(learning_rate=1e-4)
objective = 'categorical_crossentropy'

model = Sequential()

def add_layers(model, filters, kernel_size):
    model.add(Convolution2D(filters, kernel_size, padding='valid', activation=LeakyReLU(alpha=0.1), data_format='channels_last', kernel_initializer=he_normal()))
    model.add(Convolution2D(filters, kernel_size, padding='valid', activation=LeakyReLU(alpha=0.1), data_format='channels_last', kernel_initializer=he_normal()))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format='channels_last'))

add_layers(model, 32, (3, 3))
add_layers(model, 64, (3, 3))
add_layers(model, 128, (3, 3))
add_layers(model, 256, (3, 3))


# Flatten and Dense Layers
model.add(Flatten())
model.add(Dense(2056, activation=LeakyReLU(alpha=0.1)))
model.add(Dropout(0.5))

model.add(Dense(1028, activation=LeakyReLU(alpha=0.1)))
model.add(Dropout(0.5))

model.add(Dense(len(FISH_CLASSES)))
model.add(Activation('softmax'))


model.compile(loss=objective, optimizer=optimizer)




In [9]:
# from keras.applications import EfficientNetB2
# from keras.optimizers import Adam
# optimizer = Adam(learning_rate=1e-4)


# model = EfficientNetB2(
#     weights=None, 
#     input_shape=(244, 244, 3),
#     classes=8)
# model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])



In [6]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest',
    channel_shift_range=0.2,
    brightness_range=[0.2, 1.0]
)

datagen.fit(X_train)

val_datagen = ImageDataGenerator()
val_datagen.fit(X_valid)

In [7]:
import math
from keras.callbacks import LearningRateScheduler

def lr_schedule(epoch):
    initial_lr = 0.0001  
    drop_rate = 0.2  # Factor by which the learning rate will be reduced
    epochs_drop = 10  # Every 'epochs_drop' epoch, the learning rate is reduced
    lr = initial_lr * math.pow(drop_rate, math.floor((1+epoch)/epochs_drop))
    return lr

# Create the LearningRateScheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule)

In [10]:

history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
          validation_data=val_datagen.flow(X_valid, y_valid, batch_size=32),
          epochs=20,
          verbose=1,
          shuffle=True,
          callbacks=[lr_scheduler],
          steps_per_epoch=len(X_train) / 32)

plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Graph')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

Epoch 1/20


2023-12-06 00:24:57.638482: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inefficientnetb2/block1b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

KeyboardInterrupt: 

In [None]:
preds = model.predict(X_valid, verbose=1)
print("Validation Log Loss: {}".format(log_loss(y_valid, preds)))


Validation Log Loss: 3.7214369442198576




In [None]:
test_files = [im for im in os.listdir(TEST_DIR)]
test2_files = [os.path.join('test_stg2', im) for im in os.listdir(TEST_DIR2)]
test_files.extend(test2_files)
print(test_files[-20:])

test = np.ndarray((len(test_files), WIDTH, HEIGHT, CHANNELS), dtype=np.uint8)

# read_image(os.path.join('data', 'test_stg2/image_07462.jpg'))

for i, im in enumerate(test_files): 
    if 'test_stg2' in im:
        test[i] = resizeImage(os.path.join('data', im))
    else:
        test[i] = resizeImage(os.path.join(TEST_DIR, im))
    
test_preds = model.predict(test, verbose=1)

['test_stg2/image_06257.jpg', 'test_stg2/image_12125.jpg', 'test_stg2/image_00900.jpg', 'test_stg2/image_05029.jpg', 'test_stg2/image_04708.jpg', 'test_stg2/image_11323.jpg', 'test_stg2/image_06756.jpg', 'test_stg2/image_04798.jpg', 'test_stg2/image_07244.jpg', 'test_stg2/image_12145.jpg', 'test_stg2/image_03109.jpg', 'test_stg2/image_06982.jpg', 'test_stg2/image_02670.jpg', 'test_stg2/image_11529.jpg', 'test_stg2/image_00747.jpg', 'test_stg2/image_02959.jpg', 'test_stg2/image_04513.jpg', 'test_stg2/image_08260.jpg', 'test_stg2/image_00338.jpg', 'test_stg2/image_10729.jpg']


KeyboardInterrupt: 

In [None]:
# Save our Submission File
submission = pd.DataFrame(test_preds, columns=FISH_CLASSES)
submission.insert(0, 'image', test_files)
submission.head()
submission.to_csv('submission.csv', index=False)