# Necessary libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from skimage.io import imread
from skimage.segmentation import mark_boundaries
from skimage.util import montage
from skimage.morphology import label

import gc
gc.enable()

## Authenticating for kaggle

In [5]:
from google.colab import files
files.upload()  # Upload the kaggle.json file

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"horvthbotondtrylvw","key":"809fb1d01438b6e4dc65d2324c14c1f7"}'}

## Uploading the authentication file

In [6]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

## Importing the input data from the kaggle competitions

In [7]:
!kaggle competitions download -c airbus-ship-detection -p /content

Downloading airbus-ship-detection.zip to /content
100% 28.6G/28.6G [06:22<00:00, 46.5MB/s]
100% 28.6G/28.6G [06:22<00:00, 80.2MB/s]


## Unzipping the folder (after this step is complete, the airbus-ship-detection.zip can be safely deleted - so it will not take up 30GB unnecessarily anymore)

In [8]:
!unzip /content/airbus-ship-detection.zip -d /content/airbus-ship-detection

[1;30;43mA streamkimeneten csak az utolsó 5000 sor látható.[0m
  inflating: /content/airbus-ship-detection/train_v2/f9775b70e.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f977a470c.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f9780bfaf.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f9782bdfe.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f9785ca49.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f9785e462.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97a719d5.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97afa376.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97b85e3e.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97bd0b11.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97bd6b80.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97bfb1b2.jpg  
  inflating: /content/airbus-ship-detection/train_v2/f97c8cbbe.jpg  
  inflating: /content/airbus-ship-dete

# Preparing and loading the data

### Defining directory paths as variables

In [2]:
# Train and test directories
train_image_dir = '/content/airbus-ship-detection/train_v2/'
test_image_dir = "/content/airbus-ship-detection/test_v2/"

### Opening the train directory and sorting it's elements

In [3]:
train_images = os.listdir(train_image_dir)
train_images.sort()
print(f"There are {len(train_images)} images in train directory.")

There are 192556 images in train directory.


### Preparing ship-segmented masks

In [4]:
masks = pd.read_csv("/content/airbus-ship-detection/train_ship_segmentations_v2.csv")
masks.head(10)

Unnamed: 0,ImageId,EncodedPixels
0,00003e153.jpg,
1,0001124c7.jpg,
2,000155de5.jpg,264661 17 265429 33 266197 33 266965 33 267733...
3,000194a2d.jpg,360486 1 361252 4 362019 5 362785 8 363552 10 ...
4,000194a2d.jpg,51834 9 52602 9 53370 9 54138 9 54906 9 55674 ...
5,000194a2d.jpg,198320 10 199088 10 199856 10 200624 10 201392...
6,000194a2d.jpg,55683 1 56451 1 57219 1 57987 1 58755 1 59523 ...
7,000194a2d.jpg,254389 9 255157 17 255925 17 256693 17 257461 ...
8,0001b1832.jpg,
9,00021ddc3.jpg,108287 1 109054 3 109821 4 110588 5 111356 5 1...


### Converting the Run-length-encoding format into arrays, where the ship-containing pixels have 1 as value and the background-containing pixels have 0

In [5]:
def rle_decode(mask_rle, shape=(768,768)):
    '''
    Input arguments -
    mask_rle: Mask of one ship in the train image
    shape: Output shape of the image array
    '''
    s = mask_rle.split()                                                               # Split the mask of each ship that is in RLE format
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]     # Get the start pixels and lengths for which image has ship
    ends = starts + lengths - 1                                                        # Get the end pixels where we need to stop
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)                                  # A 1D vec full of zeros of size = 768*768
    for lo, hi in zip(starts, ends):                                                   # For each start to end pixels where ship exists
        img[lo:hi+1] = 1                                                               # Fill those values with 1 in the main 1D vector
    '''
    Returns -
    Transposed array of the mask: Contains 1s and 0s. 1 for ship and 0 for background
    '''
    return img.reshape(shape).T

def masks_as_image(in_mask_list):
    '''
    Input -
    in_mask_list: List of the masks of each ship in one whole training image
    '''
    all_masks = np.zeros((768, 768), dtype = np.int16)                                 # Creating 0s for the background
    for mask in in_mask_list:                                                          # For each ship rle data in the list of mask rle
        if isinstance(mask, str):                                                      # If the datatype is string
            all_masks += rle_decode(mask)                                              # Use rle_decode to create one mask for whole image
    '''
    Returns -
    Full mask of the training image whose RLE data has been passed as an input
    '''
    return np.expand_dims(all_masks, -1)

In [6]:
# Adding a new feature to the masks data frame named as ship. If Encoded pixel in any row is a string, there is a ship else there isn't.
masks['ships'] = masks['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)
unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'}).reset_index()
unique_img_ids.index+=1 # Incrimenting all the index by 1
# Adding two new features to unique_img_ids data frame. If ship exists in image, val is 1 else 0. And it's vec form
unique_img_ids['has_ship'] = unique_img_ids['ships'].map(lambda x: 1.0 if x>0 else 0.0)
# Eetriving the old masks data frame
masks.drop(['ships'], axis=1, inplace=True)
masks.index+=1

### Train 75%
### Validation 25%
### Testing images were in the project, so they will be used to evaluate the model

In [7]:
# Train - Test split
from sklearn.model_selection import train_test_split
train_ids, valid_ids = train_test_split(unique_img_ids, test_size = 0.25, stratify = unique_img_ids['ships'])

In [8]:
# Create train data frame
train_df = pd.merge(masks, train_ids)

# Create test data frame
valid_df = pd.merge(masks, valid_ids)

In [9]:
print("There are ~")
print(train_df.shape[0], 'training masks,')
print(valid_df.shape[0], 'validation masks.')

There are ~
173795 training masks,
57928 validation masks.


## Balancing the data, because most images do not have a ship on them

In [10]:
# Clipping the max value of grouped_ship_count to be 7, minimum to be 0
train_df['grouped_ship_count'] = train_df.ships.map(lambda x: (x+1)//2).clip(0,7)
# Check
train_df.grouped_ship_count.value_counts()

Unnamed: 0_level_0,count
grouped_ship_count,Unnamed: 1_level_1
0,112500
1,31838
2,11512
3,6428
4,4039
5,2898
6,2304
7,2276


### The dataset is large, so reducing samples from the majority class probably still leaves enough data to effectively train the model

In [11]:
def sample_ships(in_df, base_rep_val=1200):
    '''
    Input Args:
    in_df - dataframe we want to apply this function
    base_val - random sample of this value to be taken from the data frame
    '''
    if in_df['ships'].values[0]==0:
        return in_df.sample(base_rep_val//3)  # Random 1200/3 = 400 samples taken whose ship count is 0 in an image
    else:
        return in_df.sample(base_rep_val)    # Random 1200 samples taken whose ship count is not 0 in an image

In [12]:
# Creating groups of ship counts and applying the sample_ships functions to randomly undersample the ships
balanced_train_df = train_df.groupby('grouped_ship_count').apply(sample_ships)
balanced_train_df.grouped_ship_count.value_counts() # In each group there are now 1200 ships except 0 as we have decreased it even more to 400

Unnamed: 0_level_0,count
grouped_ship_count,Unnamed: 1_level_1
1,1200
2,1200
3,1200
4,1200
5,1200
6,1200
7,1200
0,400


### Showing previous and balanced data

In [None]:
plt.figure(figsize = (15, 5))
plt.suptitle("Train Data", fontsize = 18, color = 'r', weight = 'bold')
plt.subplot(1, 2, 1)
import seaborn as sns
sns.countplot(train_df.ships, palette = 'Set2')
plt.title("Ship Counts - Before Balancing", color = 'm', fontsize = 15)
plt.ylabel("Count", color = 'tab:pink', fontsize = 13)
plt.xlabel("# Ships in an image", color = 'tab:pink', fontsize = 13)
plt.subplot(1, 2, 2)
sns.countplot(balanced_train_df.ships, palette = 'Set2')
plt.title("Ship Counts - After Balancing", color = 'm', fontsize = 15)
plt.xlabel("# Ships in an image", color = 'tab:pink', fontsize = 13)
plt.tight_layout()

KeyboardInterrupt: 

In [13]:
# Parameters
BATCH_SIZE = 4                 # Train batch size
EDGE_CROP = 16                 # While building the model
NB_EPOCHS = 5                  # Training epochs
GAUSSIAN_NOISE = 0.1           # To be used in a layer in the model
UPSAMPLE_MODE = 'SIMPLE'       # SIMPLE ==> UpSampling2D, else Conv2DTranspose
NET_SCALING = None             # Downsampling inside the network
IMG_SCALING = (1, 1)           # Downsampling in preprocessing
VALID_IMG_COUNT = 400          # Valid batch size
MAX_TRAIN_STEPS = 200          # Maximum number of steps_per_epoch in training

In [14]:
# Image and Mask Generator
def make_image_gen(in_df, batch_size = BATCH_SIZE):
    '''
    Inputs -
    in_df - data frame on which the function will be applied
    batch_size - number of training examples in one iteration
    '''
    all_batches = list(in_df.groupby('ImageId'))                             # Group ImageIds and create list of that dataframe
    out_rgb = []                                                             # Image list
    out_mask = []                                                            # Mask list
    while True:                                                              # Loop for every data
        np.random.shuffle(all_batches)                                       # Shuffling the data
        for c_img_id, c_masks in all_batches:                                # For img_id and msk_rle in all_batches
            rgb_path = os.path.join(train_image_dir, c_img_id)               # Get the img path
            c_img = imread(rgb_path)                                         # img array
            c_mask = masks_as_image(c_masks['EncodedPixels'].values)         # Create mask of rle data for each ship in an img
            out_rgb += [c_img]                                               # Append the current img in the out_rgb / img list
            out_mask += [c_mask]                                             # Append the current mask in the out_mask / mask list
            if len(out_rgb)>=batch_size:                                     # If length of list is more or equal to batch size then
                yield np.stack(out_rgb)/255.0, np.stack(out_mask)            # Yeild the scaled img array (b/w 0 and 1) and mask array (0 for bg and 1 for ship)
                out_rgb, out_mask=[], []                                     # Empty the lists to create another batch

## Creating train and validation data

In [15]:
# Generate train data
train_gen = make_image_gen(balanced_train_df)

# Image and Mask
train_x, train_y = next(train_gen)

# Print the summary
print(f"train_x ~\nShape: {train_x.shape}\nMin value: {train_x.min()}\nMax value: {train_x.max()}")
print(f"\ntrain_y ~\nShape: {train_y.shape}\nMin value: {train_y.min()}\nMax value: {train_y.max()}")

train_x ~
Shape: (4, 768, 768, 3)
Min value: 0.0
Max value: 1.0

train_y ~
Shape: (4, 768, 768, 1)
Min value: 0
Max value: 1


In [16]:
# Prepare validation data
valid_x, valid_y = next(make_image_gen(valid_df, VALID_IMG_COUNT))
print(f"valid_x ~\nShape: {valid_x.shape}\nMin value: {valid_x.min()}\nMax value: {valid_x.max()}")
print(f"\nvalid_y ~\nShape: {valid_y.shape}\nMin value: {valid_y.min()}\nMax value: {valid_y.max()}")

valid_x ~
Shape: (400, 768, 768, 3)
Min value: 0.0
Max value: 1.0

valid_y ~
Shape: (400, 768, 768, 1)
Min value: 0
Max value: 1


In [25]:
!pip install tensorflow keras



### Using a Generator(ImageDataGenerator) to efficiently load and augment data
### The ships can be in any part of the picture in any direction - so rotation, horizontal and vertical flipping is relevant

In [28]:
# Augmenting Data using ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Preparing image data generator arguments
dg_args = dict(rotation_range=20,   # Randomly rotate images by 20 degrees
    width_shift_range=0.1,          # Shift images by 10% horizontally
    height_shift_range=0.1,         # Shift images by 10% vertically
    zoom_range=[0.8, 1.2],          # Randomly zoom in or out by 20%
    horizontal_flip=True,           # Flip images horizontally
    brightness_range=[0.8, 1.2],    # Randomly change brightness
    fill_mode='nearest',            # Use 'nearest' to fill any gaps in images
    data_format='channels_last')    # Format (batch, height, width, channels)

In [29]:
image_gen = ImageDataGenerator(**dg_args)
label_gen = ImageDataGenerator(**dg_args)

def create_aug_gen(in_gen, seed = None):
    '''
    Takes in -
    in_gen - train data generator, seed value
    '''
    np.random.seed(seed if seed is not None else np.random.choice(range(9999)))  # Randomly assign seed value if not provided
    for in_x, in_y in in_gen:                                                    # For imgs and msks in train data generator
        seed = 12                                                                # Seed value for imgs and msks must be same else augmentation won't be same

        # Create augmented imgs
        g_x = image_gen.flow(255*in_x,                                           # Inverse scaling on imgs for augmentation
                             batch_size = in_x.shape[0],                         # batch_size = 3
                             seed = seed,                                        # Seed
                             shuffle=True)                                       # Shuffle the data

        # Create augmented masks
        g_y = label_gen.flow(in_y,
                             batch_size = in_x.shape[0],
                             seed = seed,
                             shuffle=True)

        '''Yeilds - augmented scaled imgs and msks array'''
        yield next(g_x)/255.0, next(g_y)

In [30]:
# Augment the train data
cur_gen = create_aug_gen(train_gen, seed = 42)
t_x, t_y = next(cur_gen)
print('x', t_x.shape, t_x.dtype, t_x.min(), t_x.max())
print('y', t_y.shape, t_y.dtype, t_y.min(), t_y.max())

x (4, 768, 768, 3) float32 0.0 1.0
y (4, 768, 768, 1) float32 0.0 1.0


In [31]:
gc.collect() # Block all the garbage that has been generated

0

## A brief introduction on U-NET architecture <a class="anchor"  id="h6"></a>

<img src="https://miro.medium.com/max/720/1*f7YOaE4TWubwaFF7Z1fzNw.png" width = 60%>

- The name U-NET itself is due to the shape of its architecture.
- Each blue box corresponds to a multi-channel feature map.
- The number of channels are denoted on top of the box.
- The x-y size is provided at the lower left edge of the box.
- The arrows shows the respective operations as mentioned on the bottom right of the image.
- This architecture consists of three sections: The contraction, The bottleneck, and the expansion section.
- But the heart of this architecture lies in the expansion section.
- This action would ensure that the features that are learned while contracting the image will be used to reconstruct it.

*For in-depth understanding do read this amazing [line by line explanation](https://towardsdatascience.com/unet-line-by-line-explanation-9b191c76baf5).*

## Building and Training U-NET from Scratch <a class="anchor"  id="h7"></a>

<img src = "https://i.stack.imgur.com/o5TBk.png" width = 45%>       

<img src = "https://www.researchgate.net/publication/333593451/figure/fig2/AS:765890261966848@1559613876098/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max.png" width = 45% height = 30%>

## /todo Building the model

#### Calculating the output shape of the feature map depending on strides, kernel size, input size, padding

<img src = "https://i.stack.imgur.com/qPKxm.png" width = 45%>

#### Metric and Loss for compiling the model

**Dice Coeffiient:-**        
<img src = "https://cdn-images-1.medium.com/max/1600/0*HuENmnLgplFLg7Xv" width = 45%>

<img src = "https://drive.google.com/uc?id=1XoP-1kwuScIj2Ee7rYrARylF8DRyJDF6" width = 45%>       

<img src = "https://pbs.twimg.com/media/FBmVmdHWQAAU7gq.png" width = 45% height = 20%>

<img src = "https://drive.google.com/uc?id=1oFWisqT_z0AKXvp1-JQ3LSjcwWrxjz2J" width = 45%>


*[Here](https://arxiv.org/pdf/2006.14822.pdf), you can find a survey of Loss functions for semantic segmentations.*        
*More info can be found [here](https://www2.cs.sfu.ca/~hamarneh/ecopy/cmig2019.pdf)*

- *Just in case you are not aware about callbacks we use in keras you can learn more about it [here](https://keras.io/api/callbacks/).*

## /todo Callbacks

## /todo Evaluation, prediction