# Retrieve Data for FishyFish Training

#### Dependencies

In [15]:
import numpy as np
import tensorflow as tf
from importlib import reload
import os
import pickle
import fish_data as fd
from datetime import datetime
import pandas as pd
from scipy import misc

## Load label_dictionary

In [4]:
with open('label_dictionary.pickle', 'rb') as handle :
    label_dictionary = pickle.load(handle)

In [5]:
print(len(label_dictionary))

3777


## Create DataFrame of annotated fovea embeddings

In [4]:
labels = pd.DataFrame([], columns = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NoF'], index = [])
box_preds = pd.DataFrame([], columns = ['scale', 'y_offset', 'x_offset'], index = [])
annotated_boxes = pd.DataFrame([], columns = ['scale', 'y_offset', 'x_offset'], index = [])
FiNoF_prob = pd.Series([], index = [])

In [5]:
for key in list(label_dictionary.keys()) :
    #print(label_dictionary.get(key))
    #print(np.expand_dims(label_dictionary.get(key).get('box_preds'), 0))
    labels = labels.append(pd.DataFrame(label_dictionary.get(key).get('onehot'), index = [key], columns =['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NoF']) )
    box_preds = box_preds.append( pd.DataFrame(np.expand_dims(label_dictionary.get(key).get('box_preds'), 0), index = [key] , columns = ['scale', 'y_offset', 'x_offset'] ))
    
    if label_dictionary.get(key).get('scale') is not None :
        annotated_boxes = annotated_boxes.append(pd.DataFrame(np.expand_dims(np.concatenate([label_dictionary.get(key).get('scale'),label_dictionary.get(key).get('coord') ], 0),0), 
                                                             columns = ['scale', 'y_offset', 'x_offset'], index = [key]))
    FiNoF_prob = FiNoF_prob.append(pd.Series(label_dictionary.get(key).get('FiNoF'), index = [key]))

In [6]:
embedding_df = pd.read_pickle('embedding_dataframe.pickle')
print(embedding_df.shape)

(3777, 32)


In [7]:
labels.to_pickle('onehot_df.pickle')
box_preds.to_pickle('box_preds.pickle')
annotated_boxes.to_pickle('annotated_boxes.pickle')
FiNoF_prob.to_pickle('FiNoF_prob.pickle')

## Store Annotated Fovea in appropriate directories

In [18]:
for key in list(label_dictionary.keys()) :
    if label_dictionary.get(key).get('scale') is not None :
        scale = label_dictionary.get(key).get('scale')
        yxcoord = label_dictionary.get(key).get('coord')
        fov = fd.retrieve_fovea(key, yxcoord, scale[0], fov_dim = 72)
        new_path = 'data/annotated_fovea_train/'+key[11:]
        misc.imsave(new_path, fov, format = 'JPEG' )

## Store Predicted Fovea in appropriate directories

In [25]:
for key in list(label_dictionary.keys()) :
    box = label_dictionary.get(key).get('box_preds')
    scale = box[0]
    yxcoord = box[1:]
    fov = fd.retrieve_fovea(key, yxcoord, scale, fov_dim = 72)
    new_path = 'data/predicted_fovea_train/'+key[11:]
    misc.imsave(new_path, fov, format = 'JPEG' )

In [27]:
os.listdir('data/annotated_fovea_train/')

['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

# Test Stage 2 Images

#### Get all filenames

In [45]:
test_fnames = fd.generate_filenames_list('data/test_stg2/', False)
print(len(test_fnames))


12154


In [46]:
test_fnames.remove('data/test_stg2/.DS_Store')
print(len(test_fnames))

12153


### Run through FishFinder and store FiNoF , box specs in pandas dataframe

In [47]:
version_ID = 'v1.2'
initiate_FishFinder = False 

wd = os.getcwd()
md = wd+'/FishFinder/'+version_ID
if not os.path.exists(md) :
    os.makedirs(md)
tensorboard_path = md+'/Tensorboard_logs'


In [48]:
%run -i 'FishFinder/FishFinder_PARAMS.py'

In [49]:
%run -i 'FishFinder/FishFinder_GRAPH.py'

In [52]:
with tf.Session(graph = fish_finder) as session :
    if 'meta_dictionary.pickle' in os.listdir(md) and initiate_FishFinder != True:
        print("Loading FishFinder_MT version {}".format(version_ID))
        with open(md+'/meta_dictionary.pickle', 'rb') as  handle :
            meta_dict = pickle.load(handle)
        print("Metadata dictionary loaded!")
        total_fovea = meta_dict.get(np.max([key for key in meta_dict])).get('fovea_trained')
        epochs_completed = meta_dict.get(np.max([key for key in meta_dict])).get('Num_epochs')
        restorer = tf.train.Saver()
        print("Initializing restorer...")
        restorer.restore(session, tf.train.latest_checkpoint(md))
        print("Weights and biases retrieved!  Picking up at {} epochs completed : {} training images observed".format(epochs_completed, total_fovea))
    print("Running Predictor on Test Stage 2 images...")
    
    keys_list = test_fnames.copy()
    t_embedding_arr = np.zeros([len(keys_list), 32])
    t_FiNoF = np.zeros([len(keys_list), 1])
    t_box_preds = np.zeros([len(keys_list),3])
    cursor = 0
    while len(keys_list) > batch_size :
        for i in range(batch_size) :
            coarse = misc.imresize(misc.imread(keys_list.pop(0), mode = 'RGB'), size = [64, 112,3], mode = 'RGB')
            if i == 0 :
                coarse_arr = np.expand_dims(coarse, 0)
            else :
                coarse_arr = np.concatenate([coarse_arr, np.expand_dims(coarse,0)], 0)
        
        feed_dict = {coarse_images_for_prediction : coarse_arr}
        FiNoF_Probability, Box_Predictions, coarse_embedding = session.run([stack_FishNoF_preds, stack_box_preds, stack_dense_output], feed_dict = feed_dict)
        
        for i in range(batch_size) :
            t_FiNoF[cursor, :] = FiNoF_Probability[i]
            t_box_preds[cursor, :] = Box_Predictions[i,:]
            t_embedding_arr[cursor, :] = coarse_embedding[i, :]
            if (cursor % 1024) == 0 :
                print("{} images embedded".format(cursor))
                print("Length of prediction_keys : {}".format(len(keys_list)))
            cursor += 1
                
    for i in range(len(keys_list)) :
        coarse = misc.imresize(misc.imread(keys_list.pop(0), mode = 'RGB'), size = [64, 112,3], mode = 'RGB')
        if i == 0 :
            coarse_arr = np.expand_dims(coarse, 0)
        else :
            coarse_arr = np.concatenate([coarse_arr, np.expand_dims(coarse,0)], 0)
        
    feed_dict = {coarse_images_for_prediction : coarse_arr}
    FiNoF_Probability, Box_Predictions, coarse_embedding = session.run([stack_FishNoF_preds, stack_box_preds, stack_dense_output], feed_dict = feed_dict)

    for i in range(batch_size) :
        t_FiNoF[cursor, :] = FiNoF_Probability[i]
        t_box_preds[cursor, :] = Box_Predictions[i,:]
        t_embedding_arr[cursor, :] = coarse_embedding[i, :]
        cursor += 1

            

Loading FishFinder_MT version v1.2
Metadata dictionary loaded!
Initializing restorer...
Weights and biases retrieved!  Picking up at 665 epochs completed : 2230912 training images observed
Running Predictor on Test Stage 2 images...
0 images embedded
Length of prediction_keys : 12025
1024 images embedded
Length of prediction_keys : 11001
2048 images embedded
Length of prediction_keys : 9977
3072 images embedded
Length of prediction_keys : 8953
4096 images embedded
Length of prediction_keys : 7929
5120 images embedded
Length of prediction_keys : 6905
6144 images embedded
Length of prediction_keys : 5881
7168 images embedded
Length of prediction_keys : 4857
8192 images embedded
Length of prediction_keys : 3833
9216 images embedded
Length of prediction_keys : 2809
10240 images embedded
Length of prediction_keys : 1785
11264 images embedded
Length of prediction_keys : 761


IndexError: index 121 is out of bounds for axis 0 with size 121

#### Note that is ok.  Made mistake in number of iterations for the final garbage loop

In [53]:
t_FiNoF.shape

(12153, 1)

In [54]:
t_box_preds.shape

(12153, 3)

In [55]:
t_embedding_arr.shape

(12153, 32)

In [56]:
t_FiNoF_df = pd.DataFrame(t_FiNoF, columns = ['FiNoF'], index = test_fnames.copy())
print(t_FiNoF_df.head())

                                   FiNoF
data/test_stg2/image_00001.jpg  0.841103
data/test_stg2/image_00002.jpg  0.882154
data/test_stg2/image_00003.jpg  0.833156
data/test_stg2/image_00004.jpg  0.626342
data/test_stg2/image_00005.jpg  0.810221


In [57]:
t_box_preds_df = pd.DataFrame(t_box_preds, columns = ['scale', 'y_off', 'x_off'], index = test_fnames.copy())
t_embedding_df = pd.DataFrame(t_embedding_arr, columns = list(range(32)), index = test_fnames.copy())

In [58]:
t_embedding_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
data/test_stg2/image_00001.jpg,-0.758492,-0.889309,-0.759618,-0.271521,0.825022,-0.988796,-0.996101,-0.845239,0.820566,-0.472032,...,-0.835609,0.999243,0.99991,-0.878443,0.984017,-0.7169,0.949253,0.967637,0.468045,-0.93742
data/test_stg2/image_00002.jpg,-0.997257,0.828025,-0.982014,-0.5901,0.754886,-0.956849,-0.990188,0.50284,0.999367,-0.89262,...,-0.945684,0.999979,0.998693,-0.995464,0.20839,-0.346967,0.877636,-0.783396,-0.995806,-0.997825
data/test_stg2/image_00003.jpg,-0.982845,0.130662,-0.299713,-0.942765,-0.771721,-0.99901,-0.947415,-0.063972,0.998624,-0.452502,...,-0.97159,0.999959,0.999806,-0.917787,0.133872,-0.615132,0.998438,0.743631,-0.99369,-0.984879
data/test_stg2/image_00004.jpg,-0.424008,-0.979936,0.533171,-0.358645,0.336902,-0.641456,-0.94202,-0.965239,-0.233703,0.469364,...,-0.993246,0.986702,0.972661,-0.131879,-0.275115,-0.932732,0.802958,-0.441994,-0.854718,-0.999425
data/test_stg2/image_00005.jpg,-0.97528,0.503223,-0.484824,-0.941582,0.791352,-0.946153,-0.771835,-0.976238,0.789828,0.318252,...,-0.591972,0.999536,0.999898,-0.451718,0.943795,-0.945395,0.665864,0.384008,-0.997119,-0.996266


In [59]:
t_box_preds_df.head()

Unnamed: 0,scale,y_off,x_off
data/test_stg2/image_00001.jpg,0.288571,0.227286,0.219619
data/test_stg2/image_00002.jpg,0.24504,0.426259,0.24495
data/test_stg2/image_00003.jpg,0.314292,0.205844,0.205153
data/test_stg2/image_00004.jpg,0.413568,0.273896,0.397684
data/test_stg2/image_00005.jpg,0.279387,0.369376,0.198613


In [61]:
t_FiNoF_df.to_pickle('test_FiNoF_dataframe.pickle')

In [62]:
t_embedding_df.to_pickle('test_embeddings_df.pickle')

In [63]:
t_box_preds_df.to_pickle('test_box_preds_df.pickle')

## Make Fovea for Test Stage 2

In [75]:
for key in test_fnames :
    scale = t_box_preds_df['scale'].loc[key]
    yxcoord = np.array(t_box_preds_df.loc[key, ['y_off', 'x_off']])

    fov = fd.retrieve_fovea(key, yxcoord, scale, fov_dim = 72)
    new_path = 'data/predicted_fovea_test_stg2/'+key[15:]

    misc.imsave(new_path, fov, format = 'JPEG' )

In [76]:
key

'data/test_stg2/image_12153.jpg'

In [79]:
'img'+key[20:]

'img_12153.jpg'