In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import matplotlib.pyplot as plt
import ast
import cv2
import seaborn as sns

In [None]:
reef_df = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/train.csv')

In [None]:
print(reef_df.head())
print('No. of samples: ', len(reef_df))

In [None]:
missing_annotations = reef_df[reef_df['annotations'] == '[]']
print(missing_annotations.head())
print('No. of missing samples: ', len(missing_annotations))

In [None]:
print(f"Percentage of samples with BBox: {(1-len(missing_annotations)/len(reef_df))*100:0.2f}%")

In [None]:
# remove no bbox images
reef_df = reef_df[reef_df['annotations'] != '[]'].reset_index()
reef_df.isna().sum()

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
ROOT_DIR = "/kaggle/input/tensorflow-great-barrier-reef/"

In [None]:
# cast string to list of dictionary
reef_df['annotations'] = reef_df['annotations'].progress_apply(eval)
reef_df['img_path'] = ROOT_DIR + '/train_images/video_' + \
                            reef_df.video_id.astype(str)+'/' + reef_df.video_frame.astype(str)+'.jpg'

In [None]:
reef_df.head()

In [None]:
def display(index):
    img_meta = reef_df.iloc[index]
    vid = img_meta['video_id']
    vframe = img_meta['video_frame']
    path = '/kaggle/input/tensorflow-great-barrier-reef/train_images/video_'+f'{vid}'+'/'+f'{vframe}'+'.jpg'
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    bboxes = img_meta['annotations']
    for bbox in bboxes:
        x1, x2, y1, y2 = bbox['x'], bbox['x']+bbox['width'], bbox['y'], bbox['y']+bbox['height']
        cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
    plt.imshow(img)
    plt.show()
    
display(40)

In [None]:
sns.countplot(x='video_id', data=reef_df).set(title='Training samples from each video')
plt.show()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(20,5))
reef_df['num_bbox'] = reef_df['annotations'].apply(lambda x: len(x))
sns.countplot(data=reef_df, x="num_bbox", hue='video_id').set(title='Number of COTS in each video frame')
plt.show()

In [None]:
# # Check if all the size of images equal(takes a while so don't rerun it)
# img_sizes = reef_df['img_path'].progress_apply(lambda x: cv2.imread(x).shape)
# print("Does all images have same size?", img_size.all())

In [None]:
image = cv2.imread(reef_df.iloc[16]['img_path'])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [None]:
augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.3),
    layers.RandomCrop(720,720),
])

In [None]:
# example images after augmentation
plt.figure(figsize=(10, 10))
for i in range(9):
  augmented_image = augmentation(image)
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(augmented_image)
plt.savefig('/kaggle/working/aug')

In [None]:
def display_transformed_image(img, index, ax):
    img_meta = reef_df.iloc[index]
    vid = img_meta['video_id']
    vframe = img_meta['video_frame']
    bboxes = img_meta['annotations']
    for bbox in bboxes:
        x1, x2, y1, y2 = bbox['x'], bbox['x']+bbox['width'], bbox['y'], bbox['y']+bbox['height']
        cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
    ax.imshow(img)

In [None]:
def he_hsv(img_demo):
    img_hsv = cv2.cvtColor(img_demo, cv2.COLOR_RGB2HSV)
    # Histogram equalisation on the V-channel
    img_hsv[:, :, 2] = cv2.equalizeHist(img_hsv[:, :, 2])
    image_hsv = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    return image_hsv

def clahe_hsv(img):
    hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = hsv_img[:,:,0], hsv_img[:,:,1], hsv_img[:,:,2]
    clahe = cv2.createCLAHE(clipLimit = 100.0, tileGridSize = (10,10))
    v = clahe.apply(v)
    hsv_img = np.dstack((h,s,v))
    rgb = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB)
    return rgb

index = 40
image = cv2.imread(reef_df.iloc[index]['img_path'])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

fig, ax = plt.subplots(1,3,figsize=(20,20))
display_transformed_image(image, index, ax[0])
display_transformed_image(he_hsv(image), index, ax[1])
display_transformed_image(clahe_hsv(image), index, ax[2])
plt.show()

In [None]:
def bbox_to_yolo():
    yolo_annotations = list()
    for index, row in reef_df.iterrows():
        bboxes = row['annotations']
        yolo_bboxes = list()
        for bbox in bboxes:
            x,y = bbox['x']+bbox['width']/2, bbox['y']+bbox['height']/2
            yolo_bboxes.append({'x': x, 'y': y})
        yolo_annotations.append(yolo_bboxes)
    reef_df['yolo_annotations'] = yolo_annotations

bbox_to_yolo()

In [None]:
reef_df.head()

In [None]:
import numpy as np
from sklearn.model_selection import GroupKFold
groups = reef_df['sequence']
group_kfold = GroupKFold(n_splits=5)
X = reef_df.drop(['annotations'], axis=1)
y = reef_df['annotations']
for train_index, test_index in group_kfold.split(X, y, groups):
    print("TRAIN:", train_index, "TEST:", test_index)

In [None]:
reef_df['sequence'].value_counts()

In [None]:
#testing commit

In [None]:
print("Echo editing")

In [None]:
print("pushing test")

In [None]:
#dylan test commit

In [1]:
### Editing from github end.
### Testing