# Model for Nature Conservancy Fisheries Kaggle Competition

#### Dependencies

In [None]:
import fish_data as fd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
import os

#### Helper functions

In [None]:
help(fd)

#### Generate the list of filenames for balanced training batch collections with corresponding label array

In [None]:
min_each = 2000
pre_X, pre_y = fd.generate_epoch_set_list_and_label_array(min_each)

#### Split into training and validation sets

In [None]:
valid_size = 500
X_train_filenames, X_valid_filenames, y_train, y_valid = train_test_split(pre_X, pre_y, test_size = valid_size)

In [None]:
print("File names in 'X_train_filenames: {}".format(len(X_train_filenames)))
print("Shape of y_train: {}".format(y_train.shape))
print("File names in 'X_valid_filenames: {}".format(len(X_valid_filenames)))
print("Shape of y_valid: {}".format(y_valid.shape))



#### Set the master standardize parameters

In [None]:
std_y = 200
std_x = 330
print("Aspect ratio: {}".format(std_x/std_y))

#### Download and standardize the validation image set into the environment.

In [None]:
X_test = fd.make_batch(X_valid_filenames, len(X_valid_filenames)-2, 4, std_y, std_x, mutate = False) #make_batch to straddle the end of epoch
print("Fn `make_batch` spans the end of an epoch correctly: {}".format(X_test.shape == (4,std_y, std_x, 3)))
y_test = fd.make_label(y_valid, y_valid.shape[0]-2,4)
print("Fn `make_label` spans the end of an epoch correctly: {}".format(y_test.shape == (4, 8)))

In [None]:

X_valid = fd.make_batch(X_valid_filenames, 0, len(X_valid_filenames), std_y, std_x, mutate = True)

In [None]:
print("TEST")
print("  Shape of validation array is correct: {}".format(X_valid.shape == (valid_size, std_y, std_x, 3)))
print("  Average pixel value: {}".format(np.mean(X_valid)))

for i in range(3) :
    print(X_valid_filenames[i])
    plt.figure(figsize=(10,20))
    plt.subplot(1,4,1)
    plt.imshow(X_valid[i][:,:,0])
    plt.subplot(1,4,2)
    plt.imshow(X_valid[i][:,:,1])
    plt.subplot(1,4,3)
    plt.imshow(X_valid[i][:,:,2])
    plt.subplot(1,4,4)
    plt.imshow( ((X_valid[i]*255.0) + (255.0/2)).astype(int))
    plt.show()

## Graph and Session Runs

#### Graph parameters

In [None]:
# General
num_channels = 3
num_labels = 8
batch_size = 50
stddev = 0.5

# convolution
kernel_sizes = [3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2]
stride = 1
conv_depths = [8, 16, 32, 64, 128, 256, 512, 512, 512, 512, 512, 512]
final_depth = conv_depths[-1]


#dropout
kp_convs = 1.
kp_hidden = 0.75

# fully connected
fc1_depth = 256
fc2_depth = 64

#regularization
beta = 1e-1

# Learning rate
init_rate = 1e-2


In [None]:
nodes = []
for i in range(12) :
    nodes.append(fd.count_nodes(std_y, std_x, i//2, conv_depths[i]))
nodes = nodes + [fc1_depth, fc2_depth]

plt.figure()
plt.plot(range(1,15), nodes, '-')
plt.xlabel('Layer')
plt.ylabel('Num of Nodes')
plt.yscale('log')
plt.xlim(0, 15)
plt.show()
    

#### Session parameters

In [None]:
# epochs
num_epochs = 5
# path for tensorboard summary file to be written
logs_path = os.getcwd()+'/TB_logs'
valid_every = 50

In [None]:
%run -i 'GRAPH.py'

In [None]:
%run -i 'SESSION.py'