<a href="https://colab.research.google.com/github/ArretVice/Whales_tails_classification/blob/master/Its_never_tails.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Imports**

In [0]:
#%%capture
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import random
import zipfile
from PIL import Image, ImageOps
import h5py
from tqdm import tqdm
from collections import OrderedDict


# keras stuff
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, Input, add
from keras.layers import Activation, LeakyReLU, ELU, BatchNormalization, Lambda
import keras.backend as K
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.regularizers import l1_l2

Using TensorFlow backend.


**Download and unzip the data for competition**

In [0]:
# downloading
!pip install kaggle
from google.colab import files
kaggle_token=files.upload() # select the kaggle.json file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!kaggle competitions download -c humpback-whale-identification

# unpacking and delete initial .zip files

archive_types=['train','test']
for archive_type in archive_types:
    with zipfile.ZipFile(archive_type+'.zip') as z:
        os.chdir('../content')
        try:
            os.mkdir(archive_type)
        except:
            pass    
        os.chdir(archive_type)
        print(f'Extracting {archive_type} data...')
        z.extractall()
        print(archive_type.capitalize()+' data extracted and ready.')
        os.chdir('..')
        os.remove(archive_type+'.zip')
os.mkdir('saved_weights')



Saving kaggle.json to kaggle.json
Downloading sample_submission.csv to /content
  0% 0.00/498k [00:00<?, ?B/s]
100% 498k/498k [00:00<00:00, 63.0MB/s]
Downloading train.csv to /content
  0% 0.00/594k [00:00<?, ?B/s]
100% 594k/594k [00:00<00:00, 80.7MB/s]
Downloading test.zip to /content
 99% 1.34G/1.35G [00:16<00:00, 73.1MB/s]
100% 1.35G/1.35G [00:16<00:00, 85.4MB/s]
Downloading train.zip to /content
100% 4.15G/4.16G [01:14<00:00, 76.8MB/s]
100% 4.16G/4.16G [01:14<00:00, 59.6MB/s]
Extracting train data...
Train data extracted and ready.
Extracting test data...
Test data extracted and ready.


In [0]:
os.listdir()

['.config',
 'train',
 'saved_weights',
 'sample_submission.csv',
 'train.csv',
 'test',
 'kaggle.json',
 'sample_data']

**Load and explore data**

In [0]:
df=pd.read_csv('train.csv')
df=df.rename(columns={'Image':'image'})
unique_labels=sorted(set(df.Id.values))
Id_to_label_dict={key: value for value, key in enumerate(unique_labels)}
df['whale_type']=df.Id.apply(lambda x: Id_to_label_dict[x])
df.sample(5)

Unnamed: 0,image,Id,whale_type
23272,ea685fbba.jpg,w_60cf87c,1870
5272,35fb1669b.jpg,new_whale,0
5452,37bc1b579.jpg,w_778e474,2342
25210,fe57772d4.jpg,new_whale,0
17446,af0194be3.jpg,new_whale,0


In [0]:
# # stolen from here:
# # https://www.kaggle.com/hrmello/flow-from-dataframe-a-memory-friendly-approach


# # rewrite in SSF mode
# images=df[['image','Id']].sample(5).values.tolist()
# fig, m_axs = plt.subplots(1, len(images), figsize = (20, 10))

# for ii, c_ax in enumerate(m_axs):
#     image_to_show=Image.open('train/'+images[ii][0]).convert('L')
#     c_ax.imshow(image_to_show)
#     c_ax.set_title(images[ii][1])

**Custom functions for image preprocessing**

In [0]:
def custom_resize_image(full_image_path, target_width_height, epsilon=0.005):
    
    '''
    Resize image to target size, preserving aspect ratio by adding 
    black borders where necessary.
    '''
    
    best_ratio=target_width_height[0]/target_width_height[1]
    image_to_show=Image.open(full_image_path).convert('L')
    
    # add border to image for it to have aspect ratio close to <best_ratio>
    if abs(image_to_show.width/image_to_show.height-best_ratio)>epsilon:
        target_height=image_to_show.height
        target_width=image_to_show.width
        if image_to_show.width>image_to_show.height*best_ratio:
            target_height=int(image_to_show.width/best_ratio)
        else:
            target_width=int(image_to_show.height*best_ratio)
        top_bottom_borders=(target_height-image_to_show.height)//2
        sides_border=(target_width-image_to_show.width)//2
        image_to_show=ImageOps.expand(image_to_show,
                                      border=(sides_border,top_bottom_borders))

    # resize to target size
    image_to_show=image_to_show.resize(target_width_height)
    return image_to_show

In [0]:
def augment_image(image_to_show, num_rotations=5,
                  min_rotation=-15, max_rotation=15,
                  enable_upside_down=True, enable_rotations=True,
                  enable_mirroring=True):
    
    '''
    Augment input image with mirroring, flipping and rotations.
    Image's pixel values are normalized with /255.
    '''
    
    # list with prepared images
    prepared_images=[]

    # initial image
    initial=(np.array(image_to_show)/255).reshape(
        (image_to_show.size[1], image_to_show.size[0], 1))
    prepared_images.append(initial)

    # mirroring (left to right)
    if enable_mirroring:
        mirrored=(np.array(ImageOps.mirror(image_to_show))/255).reshape(
        (image_to_show.size[1], image_to_show.size[0], 1))
        prepared_images.append(mirrored)

    # flipping (upside down)
    if enable_upside_down:
        flipped=(np.array(ImageOps.flip(image_to_show))/255).reshape(
        (image_to_show.size[1], image_to_show.size[0], 1))
        prepared_images.append(flipped)

    # rotations
    if enable_rotations:
        rotations=np.linspace(min_rotation, max_rotation,
                              num_rotations).astype(int)
        
        for rotation in rotations:
            rotated=(np.array(image_to_show.rotate(rotation))/255).reshape(
        (image_to_show.size[1], image_to_show.size[0], 1))
            prepared_images.append(rotated)
    
    return np.stack(prepared_images)

In [0]:
def randomly_augment_image(image_to_show):
    
    '''
    Randomly augment image so it doesn't exactly look like original image.
    This is used when there is only one image of a class available.
    '''
    
    # randomly mirror
    mirrored=False
    option=[True, False]
    if random.choice(option):
        image_to_show=ImageOps.mirror(image_to_show)
        mirrored=True
        
    # randomly rotate
    possible_rotations=list(range(-15, 16))
    if not mirrored:
        # this way if image is not mirrored, it will at least be rotated
        possible_rotations.remove(0)
        
    rotation=random.choice(possible_rotations)
    image_to_show=image_to_show.rotate(rotation)
    
#     # randomly flip - this one is to be tested
#     if random.choice(option):
#         image_to_show=ImageOps.flip(image_to_show)
#         mirrored=True
        
    return image_to_show

In [0]:
def chunk_iterator(iterable, chunk_len):
    
    '''Simple iterator to help iterate over mini-batches.'''
    
    stack=[]
    for item in iterable:
        if len(stack)>=chunk_len:
            yield stack
            stack=[]
        stack.append(item)
    else:
        yield stack

In [0]:
def pair_generator(df, image_folder, target_w_h, n_rota, min_rota, max_rota,
                   en_updown, en_rota, en_mirror):
    
    '''
    Generates pairs of pairs of images - one pair of the same type,
    one pair of different types (any_whale vs any_whale)
    Returns list of pairs of image file names with labels 0 or 1.
    '''
    
    whale_types=df['whale_type'].values.tolist()
    
    while True:
        random.shuffle(whale_types)
        
        for wtype in whale_types:
            
            # revisit this part (because recreating lists on each iteration) #
            current_type_list=df[df['whale_type']==wtype]['image'].values.tolist()
            other_types_list=df[df['whale_type']!=wtype]['image'].values.tolist()
            # ############################################################## #
            
            x1, x2, y = {}, {}, {}
            for t in ['pos','neg']:
                flag=False
                x1[t]=random.choice(current_type_list)
                
                if t=='pos':
                    x2[t]=random.choice(current_type_list)
                    y[t]=1
                    # random augmentation flag if image is the same
                    if x2[t]==x1[t]: flag=True

                else:
                    x2[t]=random.choice(other_types_list)
                    y[t]=0
                    
                                    
                x1[t]=custom_resize_image(image_folder+x1[t], target_w_h)
                
                x1[t]=augment_image(x1[t],
                                    num_rotations=n_rota,
                                    min_rotation=min_rota,
                                    max_rotation=max_rota, 
                                    enable_upside_down=en_updown,
                                    enable_rotations=en_rota, 
                                    enable_mirroring=en_mirror)
                
                
                x2[t]=custom_resize_image(image_folder+x2[t], target_w_h)
                
                # apply random augmentation if image is the same
                if flag:
                    x2[t]=randomly_augment_image(x2[t])
                
                x2[t]=augment_image(x2[t],
                                    num_rotations=n_rota,
                                    min_rotation=min_rota,
                                    max_rotation=max_rota, 
                                    enable_upside_down=en_updown,
                                    enable_rotations=en_rota, 
                                    enable_mirroring=en_mirror)
                
                y[t]=y[t]*np.ones((len(x1[t]),1))
                
                yield [x1[t], x2[t]], y[t]

In [0]:
def pair_generator_v2(df, image_folder, target_w_h, n_rota, min_rota, max_rota,
                   en_updown, en_rota, en_mirror):
    
    '''
    Generates pairs of pairs of images 
    (one pair of the same type, one pair of different types):
    1. known_whale vs known_whale
    2. known_whale vs any_whale (both known and unknown or 'new_whale')
    Returns list of pairs of image file names with labels 0 or 1.
    '''

    any_whales=df['whale_type'].values.tolist()
    known_whales=df[df['whale_type']!=0]
    known_whales=known_whales['whale_type'].values.tolist()
    possible_options=['known_vs_known','known_vs_any']
    random.shuffle(known_whales)
    random.shuffle(any_whales)
    
    while True:
        for option in possible_options:

            if option=='known_vs_known':
                wtype=random.choice(known_whales)
            else:
                wtype=random.choice(any_whales)

            current_type_list=df[df['whale_type']==wtype]['image'].values.tolist()
            other_types_list=df[df['whale_type']!=wtype]['image'].values.tolist()

            x1, x2, y = {}, {}, {}

            for t in ['pos','neg']:

                flag=False
                x1[t]=random.choice(current_type_list)

                if t=='pos':
                    x2[t]=random.choice(current_type_list)
                    y[t]=1
                    # random augmentation flag if image is the same
                    if x2[t]==x1[t]: flag=True

                else:
                    x2[t]=random.choice(other_types_list)
                    y[t]=0


                x1[t]=custom_resize_image(image_folder+x1[t], target_w_h)

                x1[t]=augment_image(x1[t],
                                    num_rotations=n_rota,
                                    min_rotation=min_rota,
                                    max_rotation=max_rota, 
                                    enable_upside_down=en_updown,
                                    enable_rotations=en_rota, 
                                    enable_mirroring=en_mirror)


                x2[t]=custom_resize_image(image_folder+x2[t], target_w_h)

                # apply random augmentation if image is the same
                if flag:
                    x2[t]=randomly_augment_image(x2[t])

                x2[t]=augment_image(x2[t],
                                    num_rotations=n_rota,
                                    min_rotation=min_rota,
                                    max_rotation=max_rota, 
                                    enable_upside_down=en_updown,
                                    enable_rotations=en_rota, 
                                    enable_mirroring=en_mirror)

                y[t]=y[t]*np.ones((len(x1[t]),1))

                yield [x1[t], x2[t]], y[t]

In [0]:
def pair_generator_v3(df, image_folder, target_w_h, n_rota, min_rota, max_rota,
                   en_updown, en_rota, en_mirror):

    '''
        Generates pairs of pairs of images 
        (one pair of the same type, one pair of different types):
        1. known_whale vs known_whale
        2. known_whale vs any_whale (both known and unknown or 'new_whale')
        3. known_whale vs unknown_whale ('new_whale' class in dataset)
        Returns list of pairs of image file names with labels 0 or 1.
    '''

    any_whales=df['whale_type'].values.tolist()
    known_whales=df[df['whale_type']!=0]
    known_whales=known_whales['whale_type'].values.tolist()
    unknown_whales=0

    possible_options=['known_vs_known','known_vs_any','known_vs_unknown']

    random.shuffle(known_whales)
    random.shuffle(any_whales)

    while True:
        for option in possible_options:

            if option=='known_vs_known':
                wtype=random.choice(known_whales)
            elif option=='known_vs_unknown':
                wtype=0
            else:
                wtype=random.choice(any_whales)

            current_type_list=df[df['whale_type']==wtype]['image'].values.tolist()
            other_types_list=df[df['whale_type']!=wtype]['image'].values.tolist()
            
            x1, x2, y = {}, {}, {}

            for t in ['pos','neg']:

                flag=False
                x1[t]=random.choice(current_type_list)
                
                if t=='pos':
                    x2[t]=random.choice(current_type_list)
                    y[t]=1
                    # random augmentation flag if image is the same
                    if x2[t]==x1[t]: flag=True

                else:
                    x2[t]=random.choice(other_types_list)
                    y[t]=0
                    
                                    
                x1[t]=custom_resize_image(image_folder+x1[t], target_w_h)
                
                x1[t]=augment_image(x1[t],
                                    num_rotations=n_rota,
                                    min_rotation=min_rota,
                                    max_rotation=max_rota, 
                                    enable_upside_down=en_updown,
                                    enable_rotations=en_rota, 
                                    enable_mirroring=en_mirror)
                
                
                x2[t]=custom_resize_image(image_folder+x2[t], target_w_h)
                
                # apply random augmentation if image is the same
                if flag:
                    x2[t]=randomly_augment_image(x2[t])
                
                x2[t]=augment_image(x2[t],
                                    num_rotations=n_rota,
                                    min_rotation=min_rota,
                                    max_rotation=max_rota, 
                                    enable_upside_down=en_updown,
                                    enable_rotations=en_rota, 
                                    enable_mirroring=en_mirror)
                
                y[t]=y[t]*np.ones((len(x1[t]),1))
                
                yield [x1[t], x2[t]], y[t]

**Creating model**

    Model parameters:

In [0]:
TARGET_W_H = (600, 300) # img will be resized to this size to match input layer
REG=l1_l2(0.01, 0.001) # L1 and L2 regularization

EN_UPDOWN = False # add upside-down image to training batch
EN_MIRROR = True # add mirrored image to training batch

EN_ROTA = False   # add rotated images to training batch 
N_ROTA = 6       # number of images to add
MIN_ROTA = -10   # min angle to rotate
MAX_ROTA = 10    # max angle to rotate

#DATA = df[df['whale_type']!=0] # only known whales
DATA = df # all whales

    Model architecture:

In [0]:
# base siamese model
encoder_part=Sequential()

# encoder branch of a model

encoder_part.add(Conv2D(32, kernel_size=(5,5), padding='valid', strides=(1,1),
                        kernel_regularizer=REG,
                        input_shape = (TARGET_W_H[1], TARGET_W_H[0], 1) ))
encoder_part.add(BatchNormalization())
encoder_part.add(Activation('relu'))
encoder_part.add(MaxPool2D(pool_size=(2,2), strides=(2,2), padding='valid'))


encoder_part.add(Conv2D(64, kernel_size=(5,5), padding='valid', strides=(1,1),
                       kernel_regularizer=REG))
encoder_part.add(BatchNormalization())
encoder_part.add(Activation('relu'))
encoder_part.add(MaxPool2D(pool_size=(2,2), strides=(2,2), padding='valid'))

encoder_part.add(Conv2D(128, kernel_size=(3,3), padding='valid', strides=(1,1),
                       kernel_regularizer=REG))
encoder_part.add(BatchNormalization())
encoder_part.add(Activation('relu'))
encoder_part.add(MaxPool2D(pool_size=(2,2), strides=(2,2), padding='valid'))

encoder_part.add(Conv2D(256, kernel_size=(3,3), padding='valid', strides=(1,1),
                       kernel_regularizer=REG,))
encoder_part.add(BatchNormalization())
encoder_part.add(Activation('relu'))
encoder_part.add(MaxPool2D(pool_size=(2,2), strides=(2,2), padding='valid'))

encoder_part.add(Flatten())
#encoder_part.add(Dropout(0.2))
encoder_part.add(Dense(256))
encoder_part.add(BatchNormalization())
encoder_part.add(Activation('sigmoid'))

# pairs of images
input_image_1 = Input((TARGET_W_H[1], TARGET_W_H[0], 1))
input_image_2 = Input((TARGET_W_H[1], TARGET_W_H[0], 1))

encoded_image_1 = encoder_part(input_image_1)
encoded_image_2 = encoder_part(input_image_2)

# L1 distance layer between the two encoded outputs
l1_distance_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
l1_distance = l1_distance_layer([encoded_image_1, encoded_image_2])

# prediction
prediction = Dense(units=1, activation='sigmoid')(l1_distance)
siamese_model = Model(inputs=[input_image_1, input_image_2],
                      outputs=prediction)

In [0]:
# upload pretrained model (optional)
files.upload()

Saving model_1024.hdf5 to model_1024.hdf5


In [0]:
# load model if its available
try:
    siamese_model=load_model('saved_weights/weights.hdf5')
    print('Model loaded')
except:
    print('Unable to load saved model')

Model loaded


In [0]:
siamese_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 300, 600, 1)  0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           (None, 300, 600, 1)  0                                            
__________________________________________________________________________________________________
sequential_6 (Sequential)       (None, 256)          37124480    input_11[0][0]                   
                                                                 input_12[0][0]                   
__________________________________________________________________________________________________
lambda_6 (Lambda)               (None, 256)          0           sequential_6[1][0]               
          

In [0]:
opt=Adam(lr=1e-3, decay=1e-9)
chkp = ModelCheckpoint(monitor='binary_accuracy',
                       mode='max',
                       filepath='saved_weights/weights.hdf5',
                       verbose=1,
                       save_best_only=True)

siamese_model.compile(
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
    optimizer=opt)

In [0]:
siamese_model.fit_generator(
    pair_generator_v2(DATA, 'train/', TARGET_W_H, N_ROTA, MIN_ROTA,
                   MAX_ROTA, EN_UPDOWN, EN_ROTA, EN_MIRROR),
    epochs=50,
    steps_per_epoch=5000,
    callbacks=[chkp])

Epoch 1/50

KeyboardInterrupt: ignored

In [0]:
# download saved model
files.download('saved_weights/weights.hdf5')

**Encoding training images**

In [0]:
# getting encoder part from model
encoder=siamese_model.layers[2]

In [0]:
# encode each image using encoder part of siamese model
def encode_image(image, path='train/'):
    img=custom_resize_image(path+image, TARGET_W_H)
    img=np.array(img)/255
    img=img.reshape(1, TARGET_W_H[1], TARGET_W_H[0], 1)
    pred=encoder.predict(img)
    pred=pred.reshape(-1)
    return pred  

In [0]:
# adding an encoding column to data frame
df['encoding']=df['image'].map(encode_image)

In [0]:
# sort by whale Id
df=df[['Id','encoding']]
df=df.sort_values('Id').reset_index(drop=True)

In [0]:
# save encoded df
df.to_csv('encoded_df.csv', index=False)

In [0]:
# download saved data frame for future use
files.download('encoded_df.csv')

In [0]:
df.sample(5)

**Predicting on test data**

    Encode test images

In [0]:
test_images=sorted(os.listdir('test'))
df_test=pd.DataFrame()
df_test['Image']=test_images

In [0]:
def encode_test_image(image, path='test/'):
    img=custom_resize_image(path+image, TARGET_W_H)
    img=np.array(img)/255
    img=img.reshape(1, TARGET_W_H[1], TARGET_W_H[0], 1)
    pred=encoder.predict(img)
    pred=pred.reshape(-1)
    return pred

In [0]:
df_test['image_encoding']=df_test['Image'].map(encode_test_image)

In [0]:
df_test.sample(5)

Unnamed: 0,Image,image_encoding
6658,d509ce3e2.jpg,"[0.028265195, 0.5033969, 0.014187057, 0.247651..."
5946,bec2f30d5.jpg,"[0.015536418, 0.5867194, 0.014556223, 0.581924..."
4343,8b18358a2.jpg,"[0.43523717, 0.4687406, 0.42616588, 0.44306695..."
935,1c7f0f337.jpg,"[0.012962001, 0.10209691, 0.004889608, 0.17017..."
3752,7861b6fce.jpg,"[0.019346505, 0.48799893, 0.016251186, 0.91839..."


    Calculate L1 distances

In [0]:
def calculate_distance(x1, top_k=5, df_train=df, df_test=df_test):
    
    # calculate distances
    distances=[np.linalg.norm(np.abs(x1-encoding)) for encoding in df_train['encoding']]
    ids=[idx for idx in df_train['Id']]
    pairs=zip(ids, distances)
    
    # create dict with distances
    distances={k:0 for k in ids}
    for (idx, distance) in pairs:
        if distances[idx]>distance or distances[idx]==0:
            distances[idx]=distance
            
    # sort by value and pick top_k labels            
    distances=OrderedDict(sorted(distances.items(), key=lambda t: t[1]))
    distances=list(distances.keys())[:top_k]
    result=[label for label in distances]
    
    return ' '.join(result)

In [0]:
# # pandas.map with progress bar
# # https://stackoverflow.com/questions/52153037/how-to-use-tqdm-with-map-for-dataframes

# tqdm.pandas() # looks terrible in colab notebooks
# df_test['Id']=df_test['image_encoding'].progress_map(calculate_distance)
df_test['Id']=df_test['image_encoding'].map(calculate_distance)
print('Test images are encoded!')

Test images are encoded!


In [0]:
# images with top 5 predicted labels
subs=df_test[['Image','Id']]

In [0]:
subs.head()

Unnamed: 0,Image,Id
0,00028a005.jpg,w_cf8ce56 w_edce644 w_3a241cf new_whale w_f602022
1,000dcf7d8.jpg,w_f0fe284 new_whale w_700ebb4 w_698fcbe w_8cee3d3
2,000e7c7df.jpg,w_70d0b3c w_14461a7 w_4c218b5 w_ae6ac74 w_e16924b
3,0019c34f4.jpg,w_c158581 w_bd1c3d5 w_d875f4d w_7547b9a w_093d284
4,001a4d292.jpg,w_d1e0f06 new_whale w_697c75f w_3d67c3b w_aaf3463


In [0]:
# create a submission file
subs.to_csv('submissions.csv', index=False)

In [0]:
# download submission file
files.download('submissions.csv')