# Nature Conservancy Fisheries Kaggle Competition
## Starter Dictionary Generation


#### Dependencies

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import json
import pickle
import matplotlib.pyplot as plt
% matplotlib inline
from scipy import misc
import os 
import fish_data as fd

### Training set df from csv file
csv file stored from `PROPIGATE_NoF.ipynb` 

In [2]:
pre_training_df = pd.read_csv('fovea_labels_NoF_prop.csv', index_col = 0)

In [3]:
pre_training_df.head()

Unnamed: 0,filename,fovea_coordinates,fovea_label,image_label
0,data/train/ALB/img_07377.jpg,"(0, 0)",Unknown,ALB
1,data/train/ALB/img_07377.jpg,"(0, 246)",Unknown,ALB
2,data/train/ALB/img_07377.jpg,"(0, 492)",Unknown,ALB
3,data/train/ALB/img_07377.jpg,"(0, 736)",Unknown,ALB
4,data/train/ALB/img_07377.jpg,"(112, 0)",Unknown,ALB


In [4]:
training_df = pre_training_df.ix[ pre_training_df['fovea_label'] != 'Unknown'].copy()

In [5]:
print("Shape of training_df: {}".format(training_df.shape))
print("Unique fovea labels: {}".format(training_df['fovea_label'].unique()))

Shape of training_df: (13370, 4)
Unique fovea labels: ['NoF' 'BET' 'YFT' 'OTHER' 'DOL' 'ALB' 'LAG' 'SHARK']


In [6]:
training_df = training_df.reset_index(drop = True)
training_df.head()

Unnamed: 0,filename,fovea_coordinates,fovea_label,image_label
0,data/train/ALB/img_07377.jpg,"(224, 736)",NoF,ALB
1,data/train/NoF/img_00673.jpg,"(0, 0)",NoF,NoF
2,data/train/NoF/img_00673.jpg,"(0, 223)",NoF,NoF
3,data/train/NoF/img_00673.jpg,"(0, 446)",NoF,NoF
4,data/train/NoF/img_00673.jpg,"(0, 669)",NoF,NoF


### Generate training_set dictionary

In [7]:
training_dict = {}

for ix in training_df.index :
    yx = training_df.ix[ix, 'fovea_coordinates']
    y_off = int(yx[1:yx.find(',')].strip())
    x_off = int(yx[yx.find(',')+1:-1].strip())
    key = training_df.ix[ix,'filename']+';_yx_'+str(y_off)+'_'+str(x_off)
    training_dict[key] = {'f' :  training_df.ix[ix,'filename'] , 
                          'fovea_label' :  training_df.ix[ix,'fovea_label'] , 
                          'image_label' :   training_df.ix[ix,'image_label'], 
                          'coordinates' : {'y_offset' : y_off, 'x_offset' : x_off}, 
                          'scale' : 0.75 }

In [8]:
print(len(training_dict))

13370


In [9]:
with open('training_dictionary.pickle', 'wb') as ftd:
    pickle.dump(training_dict, ftd)

In [10]:
with open('training_dictionary.pickle', 'rb') as handle :
    test_open = pickle.load(handle)
print("Dictionary of training set examples loaded correctly: {}".format(len(test_open) == len(training_dict)))

Dictionary of training set examples loaded correctly: True


### Generate high-resolution image dictionary

In [11]:
f_list = fd.generate_filenames_list(subdirectory = 'data/train/', subfolders = True)
f_list = f_list + fd.generate_filenames_list(subdirectory = 'data/test_stg1/', subfolders = False) 

In [12]:
len(f_list)

4777

In [13]:
f_list[0:10]

['data/train/ALB/img_00003.jpg',
 'data/train/ALB/img_00010.jpg',
 'data/train/ALB/img_00012.jpg',
 'data/train/ALB/img_00015.jpg',
 'data/train/ALB/img_00019.jpg',
 'data/train/ALB/img_00020.jpg',
 'data/train/ALB/img_00029.jpg',
 'data/train/ALB/img_00032.jpg',
 'data/train/ALB/img_00037.jpg',
 'data/train/ALB/img_00038.jpg']

In [14]:
f_list[-10:]

['data/test_stg1/img_07833.jpg',
 'data/test_stg1/img_07859.jpg',
 'data/test_stg1/img_07872.jpg',
 'data/test_stg1/img_07893.jpg',
 'data/test_stg1/img_07895.jpg',
 'data/test_stg1/img_07905.jpg',
 'data/test_stg1/img_07906.jpg',
 'data/test_stg1/img_07908.jpg',
 'data/test_stg1/img_07910.jpg',
 'data/test_stg1/img_07921.jpg']

In [15]:
image_dictionary = {}

for f in f_list :
    if f.find('test_stg1') != -1 :
        img_lab = 'TEST'
    else :
        img_lab = f[f.find('train/')+6:f.find('/img_')]
    
    shape = misc.imread(f, mode = 'RGB').shape
    image_dictionary[f] = {'f': f, 'image_label' : img_lab, 'y_size' : shape[0], 'x_size' : shape[1]}

In [16]:
print(len(image_dictionary))

4777


In [17]:
image_dictionary.get(f_list[0])

{'f': 'data/train/ALB/img_00003.jpg',
 'image_label': 'ALB',
 'x_size': 1280,
 'y_size': 720}

In [18]:
image_dictionary.get(f_list[-1])

{'f': 'data/test_stg1/img_07921.jpg',
 'image_label': 'TEST',
 'x_size': 1280,
 'y_size': 720}

In [19]:
with open('image_dictionary.pickle', 'wb') as fid:
    pickle.dump(image_dictionary, fid)

In [20]:
with open('image_dictionary.pickle', 'rb') as handle :
    test_open = pickle.load(handle)
print("Dictionary of training set examples loaded correctly: {}".format(len(test_open) == len(image_dictionary)))

Dictionary of training set examples loaded correctly: True
