# Load Dependencies and initialize variables

In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import keras
from keras.optimizers import SGD
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, Lambda, Cropping2D, MaxPooling2D
from keras.utils import np_utils
from collections import Counter
%matplotlib inline
print(keras.__version__)

Using TensorFlow backend.


2.0.3


In [2]:
# define classes
classes = ["adult_males", "subadult_males", "adult_females", "juveniles", "pups"]
classes_with_negative = ["adult_males", "subadult_males", "adult_females", "juveniles", "pups", "negative"]

file_names = os.listdir('/input/Train/')
file_names = sorted(file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

write_dir = 'submission.csv'

# dataframe to grab coords fromimg = cv2.imread(direct+"/input/Train/6.jpg")

coords = pd.read_csv('coords.csv')

# Re-arrange sea-lion classes

In [3]:
#removing into their own classes
adult_males = coords[coords.klass == 0]
subadult_males = coords[coords.klass == 1]
adult_females = coords[coords.klass == 2]
juveniles = coords[coords.klass == 3]
pups = coords[coords.klass == 4]

#removing the klass column since its the class value
adult_males = adult_males.drop('klass', axis=1)
adult_females = adult_females.drop('klass', axis=1)
subadult_males = subadult_males.drop('klass', axis=1)
juveniles = juveniles.drop('klass', axis=1)
pups = pups.drop('klass', axis=1)

#create new df to modify
coords_df = pd.DataFrame(index=file_names,columns=classes)

In [4]:
tmpcls = {'adult_males': adult_males, 'subadult_males': subadult_males,
        'adult_females': adult_females, 'juveniles': juveniles,
        'pups':pups}
catted = pd.concat(tmpcls)

g = catted.groupby(['image_id', pd.Grouper(level=0)])[['x', 'y']]
coords_df = g.apply(lambda x: list(zip(*x.values.T))).unstack()

indexer = []
for i in coords_df.index:
    indexer.append(str(i)+'.jpg')

coords_df.index = indexer

coords_df.fillna('[]', inplace=True)

# Making into useable form

In [5]:
x = []
y = []

#Track errors

for fname in coords_df.index[:55]:
    # Read currnt File
    image_2 = cv2.imread("/input/Train/" + fname)
    #copy image2 (undotted)
    cut = np.copy(image_2)
    #Checks off, works fine
    
    # Iterate through
    try:
        for lion_class in classes:

            for coordinates in coords_df[lion_class][fname]:

                thumb = image_2[ coordinates[1]-16 : coordinates[1]+16 , coordinates[0]-16 : coordinates[0]+16, : ]

                if np.shape(thumb) == (32, 32, 3):

                    x.append(thumb)

                    y.append(lion_class)
                
    except (IndexError):
        pass
    
    # Adding Negative Values to arrays: x and y
    for i in range(0,np.shape(cut)[0],112):
        
        for j in range(0,np.shape(cut)[1],112):                
            
            thumb = cut[ i:i+32, j:j+32, :]
            
            if np.amin(cv2.cvtColor(thumb, cv2.COLOR_BGR2GRAY)) != 0:
                
                if np.shape(thumb) == (32,32,3):
                    
                    x.append(thumb)
                    
                    y.append("negative")
x = np.array(x)
y = np.array(y)

In [6]:
def ifErrors():
    print('In file: ', tpp) 
    print('At class: ',qpp)
    print('With coords: \n' , coords_df.ix[tpp][qpp])
    print('last error at coords: ' ,epp)
    plt.imshow(x[len(x)-1])

# ML! Using Keras
---
Tensorflow backend

In [15]:
encoder = LabelBinarizer()
encoder.fit(y)
y = encoder.transform(y).astype(float)
#y = label_binarize(y, classes=classes)

In [16]:
model = Sequential()

model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(32,32,3)))

model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_1', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_2', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_1', padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_2', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_1', padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_2', padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_3', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_1', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_2', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_3', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_1', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_2', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_3', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(6, activation='softmax'))

sgd = SGD(lr=0.02, decay=1e-6, momentum=0.6, nesterov=True)

model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x, y, epochs=15,
                    validation_split=0.2,
                    verbose=2)

Train on 69620 samples, validate on 17406 samples
Epoch 1/15
190s - loss: 0.2110 - acc: 0.9522 - val_loss: 0.2179 - val_acc: 0.9489
Epoch 2/15
189s - loss: 0.1412 - acc: 0.9599 - val_loss: 0.1815 - val_acc: 0.9478
Epoch 3/15
189s - loss: 0.1364 - acc: 0.9608 - val_loss: 0.1741 - val_acc: 0.9479
Epoch 4/15


In [None]:
model.save('NOAA_MODEL.h5')

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

This cell is not yet ready 


for fname in test_files:
    img = cv2.imread( direct + 'input/Test/' + fname)
    
    x_test = 

In [None]:
img = cv2.imread("/input/Train/14.jpg")

x_test = []

for i in range(0,np.shape(img)[0],32):
    for j in range(0,np.shape(img)[1],32):                
        thumb = img[ i:i+32, j:j+32, :]        
        if np.shape(thumb) == (32,32,3):
            x_test.append(thumb)

x_test = np.array(x_test)

In [None]:
y_predicted = model.predict(x_test, verbose=0)

In [None]:
y_predicted = encoder.inverse_transform(y_predicted)

In [None]:
print(Counter(y_predicted).items())

In [None]:
reference = pd.read_csv('/input/train.csv')
reference.ix[14]