# Train FishyFish

#### Dependencies

In [1]:
import numpy as np
import tensorflow as tf
from importlib import reload
import os
import pickle
import fish_data as fd
from datetime import datetime
import pandas as pd

## Load dictionaries and datafiles

In [2]:
with open('label_dictionary.pickle', 'rb') as handle :
    label_dictionary = pickle.load(handle)

In [3]:
box_preds = pd.read_pickle('box_preds.pickle')
labels_df = pd.read_pickle('onehot_df.pickle')
annotated_boxes = pd.read_pickle('annotated_boxes.pickle')
embedding_df = pd.read_pickle('embedding_dataframe.pickle')
FiNoF_prob = pd.read_pickle('FiNoF_prob.pickle')

In [4]:
master = fd.generate_filenames_list()
valid_fnames = []
train_fnames = [] 

for key in master :
    if label_dictionary.get(key).get('valid_set') == True :
        valid_fnames.append(key)
    else :
        train_fnames.append(key)

print("There are {} filenames in the training set".format(len(train_fnames)))
print("There are {} filenames in the validation set".format(len(valid_fnames)))


There are 3577 filenames in the training set
There are 200 filenames in the validation set


In [5]:
valid_size = len(valid_fnames)

In [6]:
del label_dictionary

## Set up FishyFish

In [7]:
version_ID = 'v1.0'
initiate_FishyFish = True 

wd = os.getcwd()
md = wd+'/FishyFish/'+version_ID
if not os.path.exists(md) :
    os.makedirs(md)
tensorboard_path = md+'/Tensorboard_logs'

In [8]:
%run -i 'FishyFish/FishyFish_PARAMS.py'

In [9]:
%run -i 'FishyFish/FishyFish_GRAPH.py'

In [10]:
class_weight_dictionary = {0 : .5449, 
                           1 : .9470, 
                           2 : .9690, 
                           3 : .9823, 
                           4 : .9208,
                           5 : .9534, 
                           6 : .8057,
                           7 : .8769}

In [11]:
reload(fd)
help(fd.prepare_FishyFish_batch)

Help on function prepare_FishyFish_batch in module fish_data:

prepare_FishyFish_batch(f_list, embedding_df, annotated_fovea_directory, predicted_fovea_directory, annotated_boxes, box_preds, label_df, FiNoF_prob_series, class_weight_dictionary, fov_weight_predicted=0.2, fov_crop=64)
    Function retrieves arrays for training or prediction of FishyFish model.



In [12]:
valid_embeddings, valid_FiNoF, valid_OH_labels, v_label_weights, valid_fov_stack, v_fovea_weights = (
    fd.prepare_FishyFish_batch(f_list = valid_fnames, embedding_df = embedding_df, 
                                           annotated_fovea_directory = 'data/annotated_fovea_train/', 
                                           predicted_fovea_directory = 'data/predicted_fovea_train/', 
                                           annotated_boxes = annotated_boxes,
                                           box_preds = box_preds,
                                           label_df = labels_df, 
                                           FiNoF_prob_series = FiNoF_prob, 
                                           class_weight_dictionary = class_weight_dictionary,
                                           fov_weight_predicted = 0.2, fov_crop =64))

## Train FishyFish

In [13]:
%run -i 'FishyFish/FishyFish_SESSION.py'

Weight and bias variables initialized!

Checkpoint saver initialized!

Tensorboard initialized!
To view your tensorboard dashboard summary, run the following on the command line:

tensorboard --logdir='/Users/ccthomps/Documents/ML_Projects/Kaggle Competitions/FF3/FishyFish/v1.0/Tensorboard_logs'


TRAINING FishyFish v1.0...
Batch Cost value: 2.74246
Batch Cost value: 2.66478
Batch Cost value: 2.65528
Batch Cost value: 2.65302
Batch Cost value: 2.53499
Batch Cost value: 2.75206
Epoch 1 completed : 3456 coarse images observed in 357.936472 s (9.99339346452518 images/sec). Model Saved!
Batch Cost value: 2.55449
Batch Cost value: 2.51175
Batch Cost value: 2.43609
Batch Cost value: 2.39202
Batch Cost value: 2.44911
Epoch 2 completed : 6912 coarse images observed in 343.271835 s (10.420313102588215 images/sec). Model Saved!
Epoch 3 completed : 10368 coarse images observed in 327.095985 s (10.935627962538275 images/sec). Model Saved!
Epoch 4 completed : 13824 coarse images observed in 349.951

#### Note
Update fish_data to retrieve images faster

In [16]:
reload(fd)
help(fd.prepare_FishyFish_batch)

Help on function prepare_FishyFish_batch in module fish_data:

prepare_FishyFish_batch(f_list, embedding_df, annotated_fovea_directory, predicted_fovea_directory, annotated_boxes, box_preds, label_df, FiNoF_prob_series, class_weight_dictionary, fov_weight_predicted=0.2, fov_crop=64)
    Function retrieves arrays for training or prediction of FishyFish model.



In [17]:
initiate_FishyFish = False

In [18]:
%run -i 'FishyFish/FishyFish_SESSION.py'

Loading FishyFish version v1.0
Metadata dictionary loaded!
Initializing restorer...
Weights and biases retrieved!  Picking up at 7 epochs completed : 24192 training images observed
Checkpoint saver initialized!

Tensorboard initialized!
To view your tensorboard dashboard summary, run the following on the command line:

tensorboard --logdir='/Users/ccthomps/Documents/ML_Projects/Kaggle Competitions/FF3/FishyFish/v1.0/Tensorboard_logs'


TRAINING FishyFish v1.0...
Epoch 8 completed : 27648 coarse images observed in 105.136339 s (34.022489598006636 images/sec). Model Saved!
Epoch 9 completed : 31104 coarse images observed in 102.770539 s (34.80569465535254 images/sec). Model Saved!
Epoch 10 completed : 34560 coarse images observed in 101.748661 s (35.15525378756581 images/sec). Model Saved!
Epoch 11 completed : 38016 coarse images observed in 71.718883 s (49.87528877157777 images/sec). Model Saved!
Epoch 12 completed : 41472 coarse images observed in 73.451684 s (48.69867925696571 images/

## Predict Stage 2 Test set

#### Delete unneeded objects

In [19]:
del box_preds 
del labels_df 
del annotated_boxes
del embedding_df 
del FiNoF_prob 

#### Load dataframes

In [26]:
t_embedding_df = pd.read_pickle('test_embeddings_df.pickle')
t_FiNoF_df = pd.read_pickle('test_FiNoF_dataframe.pickle')

In [27]:
t_FiNoF_df.head()

Unnamed: 0,FiNoF
data/test_stg2/image_00001.jpg,0.841103
data/test_stg2/image_00002.jpg,0.882154
data/test_stg2/image_00003.jpg,0.833156
data/test_stg2/image_00004.jpg,0.626342
data/test_stg2/image_00005.jpg,0.810221


In [28]:
test_fnames = fd.generate_filenames_list('data/test_stg2/', False)
print(len(test_fnames))
test_fnames.remove('data/test_stg2/.DS_Store')
print(len(test_fnames))

12154
12153


In [34]:
fov_crop = 64
batch_size = 128

In [31]:
from scipy import misc

In [41]:
%run -i 'FishyFish/FishyFish_GRAPH.py'

Tensor("Training/Network/concat:0", shape=(128, 289), dtype=float32)
Tensor("Validation/Network/concat:0", shape=(200, 289), dtype=float32)
Tensor("Prediction/Network/concat:0", shape=(?, 289), dtype=float32)


In [46]:
%run -i 'FishyFish/FishyFish_SESSION.py'

Loading FishyFish version v1.0
Metadata dictionary loaded!
Initializing restorer...
Weights and biases retrieved!  Picking up at 100 epochs completed : 345600 training images observed
Checkpoint saver initialized!

Tensorboard initialized!
To view your tensorboard dashboard summary, run the following on the command line:

tensorboard --logdir='/Users/ccthomps/Documents/ML_Projects/Kaggle Competitions/FF3/FishyFish/v1.0/Tensorboard_logs'


TRAINING FishyFish v1.0...


### New Session Call

In [43]:
initiate_FishyFish = False
with tf.Session(graph = fishyfish) as session :

    # check for metadata dictionary
    if 'meta_dictionary.pickle' in os.listdir(md) and initiate_FishyFish != True:
        print("Loading FishyFish version {}".format(version_ID))
        with open(md+'/meta_dictionary.pickle', 'rb') as  handle :
            meta_dict = pickle.load(handle)
        print("Metadata dictionary loaded!")
        total_fovea = meta_dict.get(np.max([key for key in meta_dict])).get('examples_trained')
        epochs_completed = meta_dict.get(np.max([key for key in meta_dict])).get('Num_epochs')
        restorer = tf.train.Saver()
        print("Initializing restorer...")
        restorer.restore(session, tf.train.latest_checkpoint(md))
        print("Weights and biases retrieved!  Picking up at {} epochs completed : {} training images observed".format(epochs_completed, total_fovea))
    
    print("Running Predictor on Test Stage 2 images...")
    
    keys_list = test_fnames.copy()
    
    predictions = pd.DataFrame([], columns = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NoF'], index = [])
    cursor = 0
    while len(keys_list) > batch_size :
        batch_keys = []
        for _ in range(batch_size) :
            batch_keys.append(keys_list.pop(0))
        
        batch_embeddings = np.array(t_embedding_df.loc[batch_keys, :])
        batch_FiNoF = np.array(t_FiNoF_df.loc[batch_keys, :])
        
        for i,key in enumerate(batch_keys) :
            new_key = 'data/predicted_fovea_test_stg2/'+key[15:]
            fov = misc.imread(new_key, mode = 'RGB')
            
            rand_y = np.random.randint(0,8)
            rand_x = np.random.randint(0,8)
            fov = fov[rand_y:rand_y+fov_crop, rand_x:rand_x+fov_crop, :]
            fov = fd.process_fovea(fov, pixel_norm = 'centre', mutation = False)
            
            if fov.shape[0] != fov_crop or fov.shape[1] != fov_crop :
                fov = misc.imresize(fov, size = [fov_crop, fov_crop, 3])

            if i == 0 :
                fov_stack = np.expand_dims(fov, 0)
            else :
                fov_stack = np.concatenate([fov_stack, np.expand_dims(fov, 0)], 0)
        
                
            if cursor % 1000 == 0 :
                print("{} Images Predicted".format(cursor))
                print(predictions.tail())
            cursor += 1
        
        feed_dict = {test_fovea : fov_stack,
                     test_embedding : batch_embeddings    ,
                     test_fish_prob : batch_FiNoF   }
        batch_predictions = session.run(test_predictions, feed_dict = feed_dict)
        
        predictions = predictions.append(pd.DataFrame(batch_predictions, columns = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NoF'],
                                                                         index = ['img'+key[20:] for key in batch_keys]))
    # last session call with keys_list is less than batch size
    print("Last batch!")
    batch_embeddings = np.array(t_embedding_df.loc[keys_list, :])
    batch_FiNoF = np.array(t_FiNoF_df.loc[keys_list, :])
    
    for i, key in enumerate(keys_list) : 
        new_key = 'data/predicted_fovea_test_stg2/'+key[15:]
        fov = misc.imread(new_key, mode = 'RGB')

        rand_y = np.random.randint(0,8)
        rand_x = np.random.randint(0,8)
        fov = fov[rand_y:rand_y+fov_crop, rand_x:rand_x+fov_crop, :]
        fov = fd.process_fovea(fov, pixel_norm = 'centre', mutation = False)

        if fov.shape[0] != fov_crop or fov.shape[1] != fov_crop :
            fov = misc.imresize(fov, size = [fov_crop, fov_crop, 3])

        if i == 0 :
            fov_stack = np.expand_dims(fov, 0)
        else :
            fov_stack = np.concatenate([fov_stack, np.expand_dims(fov, 0)], 0)

    
    feed_dict = {test_fovea : fov_stack,
                 test_embedding : batch_embeddings ,
                 test_fish_prob : batch_FiNoF   }
    batch_predictions = session.run(test_predictions, feed_dict = feed_dict)
        
    predictions = predictions.append(pd.DataFrame(batch_predictions, columns = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NoF'],
                                                                         index = ['img'+key[20:] for key in keys_list]))
    print("Predictions dataframe completed: {}".format(predictions.shape))

Loading FishyFish version v1.0
Metadata dictionary loaded!
Initializing restorer...
Weights and biases retrieved!  Picking up at 100 epochs completed : 345600 training images observed
Running Predictor on Test Stage 2 images...
0 Images Predicted
Empty DataFrame
Columns: [ALB, BET, DOL, LAG, OTHER, SHARK, YFT, NoF]
Index: []
1000 Images Predicted
                    ALB       BET       DOL       LAG     OTHER     SHARK  \
img_00892.jpg  0.548212  0.026032  0.017724  0.014391  0.079373  0.019568   
img_00893.jpg  0.586959  0.040778  0.007689  0.008119  0.050512  0.026874   
img_00894.jpg  0.641482  0.027025  0.011967  0.018537  0.058704  0.030805   
img_00895.jpg  0.497042  0.027035  0.010030  0.010612  0.067048  0.020047   
img_00896.jpg  0.475203  0.020909  0.013028  0.012355  0.083507  0.027271   

                    YFT       NoF  
img_00892.jpg  0.239968  0.054732  
img_00893.jpg  0.156802  0.122267  
img_00894.jpg  0.132384  0.079095  
img_00895.jpg  0.293905  0.074281  
img_0089

In [44]:
predictions = predictions[['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']]
print(predictions.shape)
print(predictions.head())

(12153, 8)
                    ALB       BET       DOL       LAG       NoF     OTHER  \
img_00001.jpg  0.535022  0.028432  0.014646  0.010732  0.055540  0.105633   
img_00002.jpg  0.570507  0.032258  0.016639  0.011342  0.087563  0.084693   
img_00003.jpg  0.471532  0.019831  0.013213  0.008973  0.179539  0.057357   
img_00004.jpg  0.460593  0.022332  0.010387  0.010235  0.125917  0.100986   
img_00005.jpg  0.627514  0.033911  0.012950  0.008420  0.097066  0.051287   

                  SHARK       YFT  
img_00001.jpg  0.022855  0.227140  
img_00002.jpg  0.023967  0.173033  
img_00003.jpg  0.042900  0.206654  
img_00004.jpg  0.026336  0.243214  
img_00005.jpg  0.020732  0.148121  


In [45]:
predictions.to_csv('submission1.gzip', header = True, index_label = 'image', compression = 'gzip')

In [47]:
predictions.head()

Unnamed: 0,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
img_00001.jpg,0.535022,0.028432,0.014646,0.010732,0.05554,0.105633,0.022855,0.22714
img_00002.jpg,0.570507,0.032258,0.016639,0.011342,0.087563,0.084693,0.023967,0.173033
img_00003.jpg,0.471532,0.019831,0.013213,0.008973,0.179539,0.057357,0.0429,0.206654
img_00004.jpg,0.460593,0.022332,0.010387,0.010235,0.125917,0.100986,0.026336,0.243214
img_00005.jpg,0.627514,0.033911,0.01295,0.00842,0.097066,0.051287,0.020732,0.148121


In [48]:
preds_test = pd.read_csv('submission1.csv')

In [74]:
preds_test.tail()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
12148,img_12149.jpg,0.590766,0.026334,0.009492,0.010876,0.096344,0.083578,0.014749,0.167859
12149,img_12150.jpg,0.513181,0.019348,0.012869,0.008757,0.172034,0.048063,0.033198,0.19255
12150,img_12151.jpg,0.58797,0.016504,0.014765,0.010676,0.120458,0.07077,0.020984,0.157875
12151,img_12152.jpg,0.538854,0.022332,0.011924,0.019628,0.090406,0.081634,0.03135,0.203869
12152,img_12153.jpg,0.666385,0.02759,0.012147,0.00835,0.082996,0.062171,0.021622,0.118739


In [51]:
os.listdir()

['.DS_Store',
 '.git',
 '.ipynb_checkpoints',
 '__pycache__',
 'annotated_boxes.pickle',
 'Box Annotations.ipynb',
 'box_preds.pickle',
 'data',
 'embedding_dataframe.pickle',
 'FiNoF_prob.pickle',
 'fish_data.py',
 'Fishery Data Exploration and Preprocessing.ipynb',
 'FishFinder',
 'FishFinder_Model_v1.0.ipynb',
 'FishFinder_Model_v1.1.ipynb',
 'FishFinder_Model_v1.2.ipynb',
 'FishNoF',
 'FishNoF_Model.ipynb',
 'FishyFish',
 'FishyFish Data Retrieval.ipynb',
 'label_dictionary.pickle',
 'onehot_df.pickle',
 'prediction_dictionary.pickle',
 'sample_submission_stg1.csv',
 'sample_submission_stg2.csv',
 'sample_submission_stg2.csv.zip',
 'Store_coarse_images.ipynb',
 'submission1.csv',
 'submission1.gzip',
 'test_box_preds_df.pickle',
 'test_embeddings_df.pickle',
 'test_FiNoF_dataframe.pickle',
 'Train FishyFish v1.0.ipynb']

In [59]:
empty = pd.read_csv('sample_submission_stg1.csv')

In [60]:
empty.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
1,img_00007.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
2,img_00009.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
3,img_00018.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
4,img_00027.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283


In [63]:
full = empty.append(preds_test)

In [65]:
full.to_csv('submission1_v2.csv', index = False)

In [73]:
test_full = pd.read_csv('submission1_v2.csv')
test_full.tail()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
13148,img_12149.jpg,0.590766,0.026334,0.009492,0.010876,0.096344,0.083578,0.014749,0.167859
13149,img_12150.jpg,0.513181,0.019348,0.012869,0.008757,0.172034,0.048063,0.033198,0.19255
13150,img_12151.jpg,0.58797,0.016504,0.014765,0.010676,0.120458,0.07077,0.020984,0.157875
13151,img_12152.jpg,0.538854,0.022332,0.011924,0.019628,0.090406,0.081634,0.03135,0.203869
13152,img_12153.jpg,0.666385,0.02759,0.012147,0.00835,0.082996,0.062171,0.021622,0.118739


In [68]:
sub_template = pd.read_csv('sample_submission_stg2.csv')

In [72]:
sub_template.tail()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
13148,test_stg2/image_12149.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
13149,test_stg2/image_12150.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
13150,test_stg2/image_12151.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
13151,test_stg2/image_12152.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283
13152,test_stg2/image_12153.jpg,0.455003,0.052938,0.030969,0.017734,0.123081,0.079142,0.046585,0.194283


In [75]:
indexes = sub_template['image']

In [76]:
test_full['image'] = indexes

In [77]:
test_full.tail()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
13148,test_stg2/image_12149.jpg,0.590766,0.026334,0.009492,0.010876,0.096344,0.083578,0.014749,0.167859
13149,test_stg2/image_12150.jpg,0.513181,0.019348,0.012869,0.008757,0.172034,0.048063,0.033198,0.19255
13150,test_stg2/image_12151.jpg,0.58797,0.016504,0.014765,0.010676,0.120458,0.07077,0.020984,0.157875
13151,test_stg2/image_12152.jpg,0.538854,0.022332,0.011924,0.019628,0.090406,0.081634,0.03135,0.203869
13152,test_stg2/image_12153.jpg,0.666385,0.02759,0.012147,0.00835,0.082996,0.062171,0.021622,0.118739


In [79]:
test_full.to_csv('sub_hurry.csv', index = False)

In [80]:
test_full.shape

(13153, 9)