<a href="https://www.kaggle.com/code/sergeyche/aquatrash?scriptVersionId=110191406" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import os
import cv2
import random
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
from tensorflow.keras.layers import Conv2D, Dropout, Dense, Flatten, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import class_weight

In [None]:
df = pd.read_csv('../input/aquatrash/annotations.csv')

In [None]:
df.sample(10)

In [None]:
BASE_PATH = '../input/aquatrash/Images'
CLASSES = np.unique(df['class_name'])

### Check class balance

In [None]:
df['class_name'].value_counts()

### Encoder for categorical data

In [None]:
label_binarizer = LabelBinarizer()
label_binarizer.fit(df['class_name'])

### Compute class_weights for pay more attention to 

In [None]:
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(df['class_name']), y=df['class_name'])
clss_weights = dict(enumerate(class_weights))
clss_weights

### I split data immediatelly

In [None]:
train_data, test_data = train_test_split(df, test_size=0.2, shuffle=True)
train_data, valid_data = train_test_split(train_data, test_size=0.2, shuffle=True)

### This function will load each image, get absolute coords of bounding box and return them with binarizer label i.e. to_categorical.

In [None]:
def prepare_data(data):
    images = []
    coords = []
    labels = []
    for row in data.iloc:
        # extract each row from .csv
        image_name, start_x, start_y, end_x, end_y, label = row
        # load an image for save it size and append to list store
        image = tf.keras.utils.load_img(f'{BASE_PATH}/{image_name}')
        image = tf.keras.utils.img_to_array(image)
        height, width = image.shape[:2]
        image = tf.image.resize(image, (224, 224))
        # set coords of bounding box to absolute
        abs_start_x = float(start_x) / width
        abs_start_y = float(start_y) / height
        abs_end_x = float(end_x) / width
        abs_end_y = float(end_y) / height
        images.append(image)
        coords.append((abs_start_x, abs_start_y, abs_end_x, abs_end_y))
        labels.append(label)
    images = np.array(images, dtype='float32') / 255
    coords = np.array(coords, dtype='float32')
    labels = label_binarizer.transform(labels)
    
    return images, coords, labels

### Train model. For model I take VGG19 and add to FC outputs.

In [None]:
vgg = tf.keras.applications.vgg19.VGG19(
    include_top=False,
    input_shape=(224, 224, 3)
)

vgg.trainable = False
flatten = Flatten()(vgg.output)

### First output for bbox coords

In [None]:
bbox = Dropout(0.5)(flatten)
bbox = Dense(512, activation='relu')(bbox)
bbox = Dense(128, activation='relu')(bbox)
bbox = Dropout(0.5)(bbox)
bbox_result = Dense(4, activation='sigmoid', name='bbox_result')(bbox)

### Second output for labels

In [None]:
label = Dropout(0.5)(flatten)
label = Dense(512, activation='relu')(label)
label = Dense(256, activation='relu')(label)
label = Dropout(0.5)(label)
label_result = Dense(len(label_binarizer.classes_), activation='softmax', name='label_result')(label)

model = Model(vgg.input, [bbox_result, label_result])

In [None]:
losses = {
    "label_result": "categorical_crossentropy",
    "bbox_result": "mean_squared_error",
}

loss_weights = {
    "label_result": 1.0,
    "bbox_result": 1.0
}

model.compile(loss=losses, optimizer=Adam(0.00007), metrics=["accuracy"])

In [None]:
train_images, train_coords, train_labels = prepare_data(train_data)
test_images, test_coords, test_labels = prepare_data(test_data)

### Each output will have self target

In [None]:
train_targets = {
    "label_result": train_labels,
    "bbox_result": train_coords,
}

test_targets = {
    "label_result": test_labels,
    "bbox_result": test_coords,
}

In [None]:
history = model.fit(
    train_images, train_targets,
    validation_data=(test_images, test_targets),
    epochs=100,
    batch_size=16
)

### Plotting accuracy and loss

In [None]:
plt.figure(figsize=(15, 9))
plt.plot(history.history['bbox_result_loss'], label='bbox_loss')
plt.plot(history.history['label_result_loss'], label='label_loss')

plt.plot(history.history['val_bbox_result_loss'], label='bbox_loss_val')
plt.plot(history.history['val_label_result_loss'], label='label_loss_val')
plt.legend()
plt.show()

### Time for predict

In [None]:
 valid_images, valid_coords, valid_labels = prepare_data(valid_data)

In [None]:
(box_pred, label_pred) = model.predict(valid_images)

In [None]:
plt.figure(figsize=(40, 20))
for i in range(1, 9):
    plt.subplot(2, 4, i)
    rand_idx = random.randint(0, len(valid_images)-1)
    image = cv2.imread(BASE_PATH+'/'+valid_data['image_name'].iloc[rand_idx])
    (start_x, start_y, end_x, end_y) = box_pred[rand_idx]
    start_x = int(start_x*image.shape[1])
    start_y = int(start_y*image.shape[0])
    end_x = int(end_x*image.shape[1])
    end_y = int(end_y*image.shape[0])
    pred_label = CLASSES[np.argmax(label_pred[rand_idx])]
    true_label = CLASSES[np.argmax(valid_labels[rand_idx])]
    color = (0, 255, 0) if true_label==pred_label else (255, 0, 0)
    cv2.putText(image, pred_label, (start_x, start_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 255, 0), 2)
    cv2.rectangle(image, (start_x, start_y), (end_x, end_y), color, 2)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(image)