# Please enter your data

UserName: 

# Imports and variables

In [1]:
# Importing important stuff
import sys
sys.path.append('..')
from First_start.imports import *
from First_start.multi_gpu import *
from First_start.init_vars import *
from First_start.start_functions import *
%load_ext autoreload
%autoreload 2
%matplotlib inline

Using TensorFlow backend.


Imports imported
multi_gpu imported
Vars initialized (FISH_DICTS, seed)


# Explanation


Sidenote:

If you ever need more or new ideas, check out the kernels on fisheries-monitoring on Kaggle:

https://www.kaggle.com/c/the-nature-conservancy-fisheries-monitoring/kernels

## Fish types

In [None]:
Image(url= "https://kaggle2.blob.core.windows.net/competitions/kaggle/5568/media/species-ref-key.jpg")

# Important Functions

Make_parallel(model, gpu_count): You have 4 gpus, to use all four, use this function with your keras model



# Code

In [None]:
# Path to kaggle data folder
paths = ['../Data/Kaggle/train/*', '../Data/Imagenet/*']

# Get x and y variables
# Model gives back:
# x_train, y_train, x_test, y_test, image_df, dummy_df, but now it give no testset back cause test_size=0
x_train, y_train, _, _, image_df, dummy_df = get_xy(folders_paths=paths, test_size=0.0,
                                                              img_size=(224, 224), seed=seed)

# Let's check the shape of those variables
print (x_train.shape, y_train.shape)

In [None]:
# Count of the Kaggle fish types
image_df.groupby('fish_type').count().sort_values('image')

# Fish Classification Methods

## Neural Networks, VGG16 and VGG19:
Some teams used self-built neural networks or the VGG16 or VGG19 pretrained networks. This method takes time although it is faster than before because we use GPUs now.

The VGG16 or VGG19 models can be found here. Those are pretrained neural networks, mostly pretrained with the imagenet data.

VGG16: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3 

VGG19: https://gist.github.com/baraldilorenzo/8d096f48a1be4a2d660d

Below, you can also find own neural networks built by the groups.

In [None]:
#VGG16
vgg16 = VGG16(include_top=False)
x_train_vgg16 = vgg16.predict(x_train)
#x_test_vgg16 = vgg16.predict(x_test)

#VGG19
vgg19 = VGG19(weights='imagenet')
if True: # If true, VGG19 will not use all layers but stop at layer block4_pooling
    vgg19 = Model(input=vgg19.input, output=vgg19.get_layer('block4_pool').output)
x_train_vgg19 = vgg19.predict(x_train)
#x_test_vgg19 = vgg19.predict(x_test)

# See the shape, to further work on it
x_train_vgg19.shape, x_train_vgg16.shape

## Neural Networks that were built last time by the team members

In [None]:
def model_standalone(optimizer, init):
    model = Sequential()
    model.add(Convolution2D(32, 3, 3, input_shape=(3, 224, 224), border_mode='same', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Convolution2D(32, 3, 3, activation='relu', border_mode='same', W_constraint=maxnorm(3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.5))
    model.add(Dense(8, activation='softmax'))
    # Compile model
    epochs = 1 # dont leave it like this, only for testing!
    lrate = 0.01
    decay = lrate/epochs
    sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'], init=init)
    return model

def model_VGG16_top(optimizer, init):
    # Model to put on top of VGG16
    model = Sequential()
    model.add(Convolution2D(64, 3, 3, input_shape=(7, 7, 512), border_mode='same', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Convolution2D(32, 3, 3, activation='relu', border_mode='same', W_constraint=maxnorm(3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.5))
    model.add(Dense(8, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'], init=init)
    return model

def model_VGG19_top(optimizer, init):
    # Model to put on top of VGG16
    model = Sequential()
    model.add(Flatten(input_shape=(14, 14, 512)))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(8, activation='sigmoid'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'], init=init)
    return model

# Initialize variables
param_grid = {'optimizer': ['adam', 'sgd'],
              'nb_epoch': [2],
              'batch_size': [5],
              'init': ['uniform']
             }

model_base = model_VGG16_top
x = x_train_vgg16 # right now, test is empty as test_size = 0 at the top function
y = y_train # same for y, no test examples

In [None]:
# Build model
model = KerasClassifier(build_fn=model_base, verbose=0)
grid = GridSearchCV(model, param_grid, )

# Fit model
results = grid.fit(x, y)

# Analyzing
means = results.cv_results_['mean_test_score']
stds = results.cv_results_['std_test_score']
params = results.cv_results_['params']

print('Best: {}, using {}'.format(results.best_score_, results.best_params_))

for mean, stdev, param in zip(means, stds, params):
    print('M={} (sd={}) with {}'.format(mean, stdev, param))

# Fish Finding Methods

## SIFT Model

Team2 developed a possibility to locate fish on a picture using the SIFT model. It seems to work fine. For further information, check the SIFT notebook of Team2 and ask the team2 members. Below is an example of the SIFT model. It matches the features of a picture and tries to match them with another picture to find the fish on the picture.

In [None]:
# First, define a train and a test picture and load both pictures
sift_path = '../Data/Kaggle/train/'
train_pic = sift_path + 'LAG/img_00091.jpg'
test_pic = sift_path + 'LAG/img_01512.jpg'
train_pic = cv2.imread(train_pic,0)
test_pic = cv2.imread(test_pic,0) 

# Creating a picture of the fish manually so that features of the fish can be found on this picture
# (this is the left picture, that was manually found on the big picture, a picture of solely the fish)
img_rows, img_cols= 350, 425
template = np.zeros([ img_rows, img_cols], dtype='uint8') # initialisation of the template
template[:, :] = train_pic[100:450,525:950] # I try multiple times to find the correct rectangle. 

# Plotting it just to check if the fish is really in this picture. Perfect, I would say!
plt.subplots(figsize=(10, 7))
plt.subplot(121),plt.imshow(template, cmap='gray') 
plt.subplot(122), plt.imshow(train_pic, cmap='gray')

Now, that we have manually made a picture of only the fish in the big picture, we can train the SIFT model,
a feature matching model. The SIFT algorithm learns the features of the above picture and tries to match them
the test picture and find the fish on this picture.

In [None]:
# Copy the width and the height (assuming that this fish will also take about the same space in the picture)
w, h = template.shape[::-1]


# All the 6 methods for comparison in a list
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
            'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']

for meth in methods:
     img = test_pic
     method = eval(meth)
 
     # Apply template Matching
     res = cv2.matchTemplate(img,template,method)
     min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
 
     # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
     if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
         top_left = min_loc
     else:
         top_left = max_loc
     bottom_right = (top_left[0] + w, top_left[1] + h)
 
     cv2.rectangle(img,top_left, bottom_right, 255, 2)
     fig, ax = plt.subplots(figsize=(12, 7))
     plt.subplot(121),plt.imshow(res,cmap = 'gray')
     plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
     plt.subplot(122),plt.imshow(img,cmap = 'gray') #,aspect='auto'
     plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
     plt.suptitle(meth)
 
     plt.show()
    
# Below, we see the results of those algorithms. From the squares, we can see that the features of the fish
# were detected in the most cases. For further information, please see the "SIFT model" notebook. Also, team2
# suggested, that waves could interfere with the fish features and therefore, it is difficulty sometimes for the
# model to find the right fish (see their attempts to find a shark with the SIFT model in the SIFT notebook)

# How does evaluation work?
Evaluation works easily and I can explain it to you guys. However, it is only possible two times per day!

In [None]:
# Evaluate (only possible two times per day!)
x_final_names, x_final = get_final_test(files_path='../Data/Kaggle/test1/*.jpg')

# Predict outcome
x_final_vgg16 = vgg16.predict(x_final)
print(x_final_vgg16.shape)
x_pred = results.predict_proba(x_final_vgg16)

# Make pretty and save
proba_df = pd.concat([pd.Series(x_final_names), pd.DataFrame(x_pred)], axis=1)
proba_df.columns = [col.split('_')[-1] for col in dummy_df.columns]
proba_df.image = proba_df.image.apply(lambda name: name.split('/')[-1])
proba_df.to_csv('output_file.csv', index=False)
proba_df.head()