# Model for Nature Conservancy Fisheries Kaggle Competition

#### Dependencies

In [1]:
import fish_data as fd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
import os

#### Helper functions

In [2]:
help(fd)

Help on module fish_data:

NAME
    fish_data

DESCRIPTION
    fish_data module contains the helper functions for the model build of the
    Nature Conservancy Fisheries Kaggle Competition.
    
    Dependencies:
        * numpy as np
        * os
        * scipy.ndimage as ndimage
        * scipy.misc as misc
        * scipy.special as special
        * matplotlib.pyplot as plt

FUNCTIONS
    count_nodes(std_y, std_x, pool_steps, final_depth)
        Calculates the number of flattened nodes after a number of 'VALID' pool
        steps of strides = [1,2,2,1]
    
    generate_balanced_epoch(min_each, shuffle=True)
        Function to generate a list of filenames to be used for each training epoch
        with a corresponding label array.  Most file names will be used  multiple  times
        in order that each fish is drawn into a training batch an equivalent number of
        times.
    
    generate_filenames_list()
        Iterates through the 'data/train' folders of the working dir

#### Generate a list of filenames

In [3]:
fish_filenames = fd.generate_filenames_list()
print("There are {} filenames in the master set list".format(len(fish_filenames)))

There are 3777 filenames in the master set list


In [4]:
fish_label_arr = fd.make_label(fish_filenames, 0, len(fish_filenames))
fish_label_arr.shape
fish_label_arr[0:5,:]

array([[1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0]])

In [5]:
valid_size = 300
files_train, files_val, y_train, y_val = train_test_split(fish_filenames, fish_label_arr, test_size = valid_size)
print([x.shape for x in [y_train, y_val]])

[(3477, 8), (300, 8)]


#### Debugging Graph and session calls with input pipeline

## Graph and Session Runs

#### Graph parameters

In [11]:
#Preprocessing
std_y = 300
std_x = 500

# General
num_channels = 3
num_labels = 8
batch_size = 30
stddev = 0.2

# convolution
kernel_sizes = [12, 3, 3, 3, 3, 3]
conv_depths = [64, 128, 256, 512, 256, 128]
final_depth = conv_depths[-1]


#dropout
kp = 0.75

# fully connected
fc1_depth = 256
fc2_depth = 64

#regularization
beta = 1e-1 

# Learning rate
init_rate = 5e-3
per_steps = 6000
decay_rate = 0.75


#### Session parameters

In [12]:
# epochs
num_epochs = 5
# path for tensorboard summary file to be written
logs_path = os.getcwd()+'/TB_logs'
valid_every = 25

In [16]:
%run -i 'GRAPH.py'

In [17]:
%run -i 'SESSION.py'

Initialized!

(100, 167, 3) [1 0 0 0 0 0 0 0]
(100, 167, 3) [1 0 0 0 0 0 0 0]
(100, 167, 3) [0 0 0 0 1 0 0 0]
(100, 167, 3) [1 0 0 0 0 0 0 0]
(100, 167, 3) [1 0 0 0 0 0 0 0]
(100, 167, 3) [1 0 0 0 0 0 0 0]
(100, 167, 3) [0 0 0 0 0 0 0 1]
(100, 167, 3) [0 0 0 0 1 0 0 0]
(100, 167, 3) [1 0 0 0 0 0 0 0]
(100, 167, 3) [1 0 0 0 0 0 0 0]


<tensorflow.python.ops.variables.Variable at 0x1145720b8>