In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
import keras
from tensorflow.keras.optimizers import Adam, SGD
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, 
                          Dropout, Rescaling, RandomFlip, RandomRotation, BatchNormalization)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow import keras
from tensorflow.keras.preprocessing import image_dataset_from_directory

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

import cv2
import zipfile
import os
import glob
import shutil

## Unzip train files

In [2]:
with zipfile.ZipFile('/kaggle/input/the-nature-conservancy-fisheries-monitoring/train.zip', 'r') as train_zip:
    train_zip.extractall('')

base_dir = '/kaggle/working'
train_dir = os.path.join(base_dir, 'train')

In [3]:
train_dir = '/kaggle/working/train'
fishes = os.listdir(train_dir)
fishes

['NoF', 'LAG', 'OTHER', 'SHARK', 'YFT', '.DS_Store', 'ALB', 'DOL', 'BET']

In [4]:
del fishes[4]

## Check disbalanse classes

In [5]:
dict_ = {}
for i in fishes:
    print(i, ': ', len(os.listdir(train_dir + '/' + i)), sep='')
    dict_[i] = len(os.listdir(train_dir + '/' + i))

NoF: 465
LAG: 67
OTHER: 299
SHARK: 176


NotADirectoryError: [Errno 20] Not a directory: '/kaggle/working/train/.DS_Store'

In [None]:
plt.figure(figsize=(12,4))
sns.barplot(x=list(dict_.keys()), y=list(dict_.values()), alpha=0.8)
plt.xlabel('Fish', fontsize=12)
plt.ylabel('Number of Images', fontsize=12)
plt.show()

## Let’s visualize few data from trainingshutil.rmtree('/kaggle/working/train/')

In [None]:
def drow_fish(fish):
    fig = plt.figure(figsize=(16, 8))
    train_dir = f'/kaggle/working/train/{fish}'
    train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
    for i, path in enumerate(train_list[:5], 1):
        subplot = fig.add_subplot(2, 5, i)
        subplot.set_title('%s' %path.split('/')[-2])
        img = cv2.imread(path)[...,::-1]
        img = cv2.resize(img, (224,224))
        plt.imshow(img)

In [None]:
for i in fishes:
    drow_fish(i)

## Prepare directory of training images

In [None]:
shutil.rmtree('/kaggle/working/__MACOSX')
os.makedirs('/kaggle/working/images')
shutil.move('/kaggle/working/train/DOL', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/LAG', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/NoF', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/SHARK', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/YFT', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/OTHER', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/BET', '/kaggle/working/images/')
shutil.move('/kaggle/working/train/ALB', '/kaggle/working/images/')
shutil.rmtree('/kaggle/working/train')

In [None]:
img_height, img_width = 500, 500
batch_size = 5

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  '/kaggle/working/images/',
  validation_split=0.2,
  subset="training",
  seed=42,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  shuffle=True,
  label_mode='categorical')


val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  '/kaggle/working/images/',
  validation_split=0.2,
  subset="validation",
  seed=42,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  shuffle=True,
  label_mode='categorical')

In [None]:
train_ds.class_names

In [None]:
rescale = Sequential([
  Rescaling(1./255)
])

data_augmentation = Sequential([
  RandomFlip("horizontal_and_vertical"),
  RandomRotation(0.2),
])

In [None]:
aug_ds = train_ds.map(
  lambda x, y: (data_augmentation(x, training=True), y))

## ResNet50 - first base model

In [None]:
base_model= tf.keras.applications.ResNet50(include_top=False,
                   input_shape=(img_height, img_width, 3),
                   pooling='max',
                   weights='imagenet', classes=8)
for layer in base_model.layers:
        layer.trainable=False

## VGG16 - second base model

In [None]:
base_model_2 = tf.keras.applications.VGG16(include_top=False,
                   input_shape=(img_height, img_width, 3),
                   pooling='max',
                   weights='imagenet', classes=8)
for layer in base_model_2.layers:
        layer.trainable=False

## Checkpoints

In [None]:
early_stop = EarlyStopping(monitor="precision",min_delta=0, patience=5,
                           verbose=0, mode="min", baseline=None, restore_best_weights=True)
check = ModelCheckpoint(filepath='convnet.keras', save_best_only=True, monitor='precision')
learning_rate_reduction = ReduceLROnPlateau(monitor='precision', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

## Build model on Resnet

In [None]:
model_1 = Sequential()
model_1.add(base_model)
model_1.add(Flatten())
model_1.add(Dense(256, activation='relu'))
model_1.add(Dropout(0.5))
model_1.add(Dense(8, activation='softmax'))

model_1.summary()

## Metrics

In [None]:
metrics = [
    tf.keras.metrics.FalseNegatives(name="fn"),
    tf.keras.metrics.FalsePositives(name="fp"),
    tf.keras.metrics.TrueNegatives(name="tn"),
    tf.keras.metrics.TruePositives(name="tp"),
    tf.keras.metrics.Precision(name="precision"),
    tf.keras.metrics.Recall(name="recall"),
]

model_1.compile(loss=tf.keras.losses.categorical_crossentropy,
                optimizer=SGD(learning_rate=1e-3, decay=1e-6, momentum=0.9, nesterov=True),
                metrics=metrics)

## Classes weights

In [None]:
weight_alb = 1. / dict_['ALB']
weight_bet = 1. / dict_['BET']
weight_dol = 1. / dict_['DOL']
weight_lag = 1. / dict_['LAG']
weight_nof = 1. / dict_['NoF']
weight_other = 1. / dict_['OTHER']
weight_shark = 1. / dict_['SHARK']
weight_yft = 1. / dict_['YFT']

class_weight = {0: weight_alb, 
                1: weight_bet,
                2: weight_dol, 
                3: weight_lag,
                4: weight_nof, 
                5: weight_other,
                6: weight_shark, 
                7: weight_yft}

In [None]:
model_1.fit(
    aug_ds,
    epochs=10,
    verbose=1,
    callbacks=[early_stop, check, learning_rate_reduction],
    validation_data=val_ds,
    class_weight=class_weight,
)

## Build model on VGG

In [None]:
model_2 = Sequential()
model_2.add(base_model)
model_2.add(Flatten())
model_2.add(Dense(256, activation='relu'))
model_2.add(Dropout(0.5))
model_2.add(Dense(8, activation='softmax'))

model_2.summary()

model_2.compile(loss=tf.keras.losses.categorical_crossentropy,
                optimizer=SGD(learning_rate=1e-2, decay=1e-6, momentum=0.9, nesterov=True),
                metrics=metrics)

model_2.fit(
    train_ds,
    epochs=10,
    verbose=1,
    callbacks=[early_stop, check, learning_rate_reduction],
    validation_data=val_ds,
    class_weight=class_weight,
)

## Test directory

In [None]:
from py7zr import py7zr

with py7zr.SevenZipFile('/kaggle/input/the-nature-conservancy-fisheries-monitoring/test_stg2.7z', mode='r') as z:
    z.extractall('test')

In [None]:
os.listdir('/kaggle/working/test')

In [None]:
with zipfile.ZipFile('/kaggle/input/the-nature-conservancy-fisheries-monitoring/test_stg1.zip', 'r') as train_zip:
    train_zip.extractall('')

base_dir = '/kaggle/working'
test_dir = os.path.join(base_dir, 'test1')

In [None]:
file_names = os.listdir('/kaggle/working/test_stg1')
    
for file_name in file_names:
    shutil.move(os.path.join('/kaggle/working/test_stg1', file_name), '/kaggle/working/test')

In [None]:
file_names = os.listdir('/kaggle/working/test/test_stg2')
    
for file_name in file_names:
    shutil.move(os.path.join('/kaggle/working/test/test_stg2', file_name), '/kaggle/working/test')

In [None]:
shutil.rmtree('/kaggle/working/test/test_stg2')
shutil.rmtree('/kaggle/working/test_stg1')

## Sample sub

In [None]:
test_files_st1 = glob.glob('/kaggle/working/test_stg1/*.jpg')
test_files_st2 = glob.glob('/kaggle/working/test/test_stg2/*.jpg')



IMG_SIZE = (500,500)


def load_image_vgg(path, target_size=IMG_SIZE):
    img = cv2.imread(path)[...,::-1]
    img = cv2.resize(img, target_size)
    return vgg16.preprocess_input(img)


def load_image_resnet(path, target_size=IMG_SIZE):
    img = cv2.imread(path)[...,::-1]
    img = cv2.resize(img, target_size)
    return resnet50.preprocess_input(img)


def predict_generator_vgg(files):
    while True:
        for path in files:
            yield np.array([load_image_vgg(path)])
            

def predict_generator_resnet(files):
    while True:
        for path in files:
            yield np.array([load_image_resnet(path)])

In [None]:
test_pred_1 = model_1.predict(
    predict_generator_resnet(test_files_st1), steps=len(test_files_st1))

test_pred_11 = model_1.predict(
    predict_generator_resnet(test_files_st2), steps=len(test_files_st2))

test_pred_21 = model_2.predict(
    predict_generator_vgg(test_files_st1), steps=len(test_files_st1))

test_pred_22 = model_2.predict(
    predict_generator_vgg(test_files_st2), steps=len(test_files_st2))

## PLT result

In [None]:
fish = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK','YFT']
f, ax = plt.subplots(5, 5, figsize = (15, 15))

for i in range(0,25):
    imgBGR = cv2.imread(test_files_st2[i])
    imgRGB = cv2.cvtColor(imgBGR, cv2.COLOR_BGR2RGB)
    
    # a if condition else b
    predicted_class = fish[np.argmax(test_pred_22[i])]

    ax[i//5, i%5].imshow(imgRGB)
    ax[i//5, i%5].axis('off')
    ax[i//5, i%5].set_title("Predicted:{}".format(predicted_class))    

plt.show()

## Predict file 

In [None]:
test_files_st1, test_files_st2

In [None]:
result_list_1 = [x.split('/')[-1] for x in test_files_st1]
result_list_11 = [x.split('/')[-2] + '/' + x.split('/')[-1] for x in test_files_st2]
len(result_list_1), len(result_list_11)

In [None]:
result1 = pd.concat([pd.DataFrame(result_list_1), pd.DataFrame(test_pred_1)], axis=1, ignore_index=True)
result11 = pd.concat([pd.DataFrame(result_list_11), pd.DataFrame(test_pred_11)],axis=1, ignore_index=True)

In [None]:
result2 = pd.concat([pd.DataFrame(result_list_1), pd.DataFrame(test_pred_21)], axis=1, ignore_index=True)
result22 = pd.concat([pd.DataFrame(result_list_11), pd.DataFrame(test_pred_22)],axis=1, ignore_index=True)

In [None]:
names = ['image','ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT']
result_1 = pd.concat([result1, result11], ignore_index=True)
result_2 = pd.concat([result2, result22], ignore_index=True)
result_1.columns = names
result_2.columns = names
result_2

In [None]:
result_1.to_csv('result_1.csv', index=False)
result_2.to_csv('result_2.csv', index=False)