# Humpback Whale Identifaction

In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import seaborn as sns
from glob import glob

from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.applications.resnet50 import ResNet50
from keras.callbacks import TensorBoard

from functools import partial
from multiprocessing import Pool

from PIL import Image

import numpy as np

%matplotlib inline

## 0. Importing Data and Primitive Analysis

In [2]:
train_df = pd.read_csv('Datas/train.csv')
train_df.head()
train_images = glob("Datas/train/*jpg")
test_images = glob("../input/test/*jpg")

In [3]:
train_df["Image"] = train_df["Image"].map( lambda x : "Datas/train/"+x)
ImageToLabelDict = dict(zip(train_df["Image"], train_df["Id"]))

In [4]:
SIZE = 100
#image are imported and resized
def ImportImage( filename):
    img = Image.open(filename).resize( (SIZE,SIZE))
    img = np.array(img)
    if img.ndim == 2: #imported BW picture and converting to "dumb RGB"
        img = np.tile( img, (3,1,1)).transpose((1,2,0))
    return img
x_train = np.array([ImportImage(img) for img in train_images])
x = x_train

In [5]:
class LabelOneHotEncoder():
    def __init__(self):
        self.ohe = OneHotEncoder()
        self.le = LabelEncoder()
    def fit_transform(self, x):
        features = self.le.fit_transform( x)
        return self.ohe.fit_transform( features.reshape(-1,1))
    def transform( self, x):
        return self.ohe.transform( self.la.transform( x.reshape(-1,1)))
    def inverse_tranform( self, x):
        return self.le.inverse_transform( self.ohe.inverse_tranform( x))
    def inverse_labels( self, x):
        return self.le.inverse_transform(x)

In [6]:
y = list(map(ImageToLabelDict.get, train_images))
lohe = LabelOneHotEncoder()
y_cat = lohe.fit_transform(y)

In [7]:
#use of an image generator for preprocessing and data augmentation
x_train = x_train.reshape( (-1,SIZE,SIZE,3))
input_shape = x_train[0].shape
#x_train = x_train.astype("float32")
y_train = y_cat

image_gen = ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
	rescale=1./255,
    rotation_range=15,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True)

#training the image preprocessing
image_gen.fit(x_train, augment=True)

## 1. Advanced Data Tackling

In [8]:
def plotImages(images_arr, n_images=4):
    fig, axes = plt.subplots(n_images, n_images, figsize=(12,12))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        if img.ndim != 2:
            img = img.reshape((SIZE,SIZE))
        ax.imshow( img, cmap="Greys_r")
        ax.set_xticks(())
        ax.set_yticks(())
    plt.tight_layout()

## 2. Image augmentation with Keras prebuilt

In [9]:
#use of an image generator for preprocessing and data augmentation
x = x.reshape((-1,SIZE,SIZE,1))
x_train = x.astype("float32")
y_train = y_cat

image_gen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=15,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True)

#training the image preprocessing
image_gen.fit(x_train, augment=True)

In [10]:
x_train = x_train.reshape( (-1,SIZE,SIZE,3))
input_shape = x_train[0].shape
#x_train = x_train.astype("float32")
y_train = y_cat

## 3. Building and Training Model

In [12]:
batch_size = 16
num_classes = len(y_cat.toarray()[0])
epochs = 10
input_shape = x_train[0].shap

AttributeError: 'numpy.ndarray' object has no attribute 'shap'

In [None]:
model = Sequential()

#picking vgg16 as pretrained (base) model https://keras.io/applications/#vgg16
conv_base = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
for layer in conv_base.layers:
    layer.trainable = False

#maybe unfreeze last layer
conv_base.layers[-2].trainable = True

model.add(conv_base)
model.add(Flatten())
model.add(Dropout(0.33))
model.add(Dense(48, activation='relu')) #64
model.add(Dropout(0.33))
model.add(Dense(48, activation='relu')) #48
model.add(Dropout(0.33))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()
model.fit_generator(image_gen.flow(x_train, y_train.toarray(), batch_size=batch_size),
          steps_per_epoch=x_train.shape[0] // epochs,
          epochs=epochs,
          verbose=1,
          callbacks=[TensorBoard(log_dir='./tmp/log', write_graph=True)])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 3, 3, 512)         14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 4608)              0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 4608)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 48)                221232    
_________________________________________________________________
dropout_8 (Dropout)          (None, 48)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 48)                2352      
_________________________________________________________________
dropout_9 (Dropout)          (None, 48)                0         
__________

In [27]:
model = Sequential()

#picking vgg16 as pretrained (base) model https://keras.io/applications/#vgg16
conv_base = ResNet50(weights="imagenet", include_top=False, input_shape=input_shape)
for layer in conv_base.layers:
    layer.trainable = False

#maybe unfreeze last layer
conv_base.layers[-2].trainable = True

model.add(conv_base)
model.add(Flatten())
model.add(Dropout(0.33))
model.add(Dense(48, activation='relu')) #64
model.add(Dropout(0.33))
model.add(Dense(48, activation='relu')) #48
model.add(Dropout(0.33))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()
model.fit_generator(image_gen.flow(x_train, y_train.toarray(), batch_size=batch_size),
          steps_per_epoch=x_train.shape[0] // epochs,
          epochs=epochs,
          verbose=1)

ValueError: Input size must be at least 197x197; got `input_shape=(100, 100, 3)`