# Model for Nature Conservancy Fisheries Kaggle Competition

#### Dependencies

In [None]:
import fish_data as fd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
import os

#### Helper functions

In [None]:
help(fd)

#### Generate the list of filenames for balanced training batch collections with corresponding label array

In [None]:
std_y = 500
std_x = 750
print("Aspect ratio: {}".format(std_x/std_y))

In [None]:
valid_size = 150

In [None]:
valid_filenames = np.random.choice(fd.generate_filenames_list(), valid_size)

In [None]:
y_valid = fd.make_label(valid_filenames, 0, valid_size)
print("Valid size and num_labels are correct for the valid set: {}".format(y_valid.shape == (valid_size, 8)))
X_valid = fd.make_batch(valid_filenames, 0, valid_size, std_y, std_x, normalize = True, mutate = False)
print("Valid images have been resized and stacked: {}".format(X_valid.shape == (valid_size, std_y, std_x, 3)))

In [None]:
for i in range(3) :
    print(valid_filenames[i])
    print(y_valid[i, :])
    fd.show_panel(X_valid[i, :, :, :])

In [None]:
min_each = 1875
X_filenames = fd.generate_balanced_epoch(min_each, shuffle = True)


In [None]:
print("TEST batch generation at the end of an epoch")
X_batch_test = fd.make_batch(X_filenames, offset = len(X_filenames)-1, batch_size = 2, 
                                            std_y = std_y, std_x= std_x, normalize = True, mutate = True)
y_batch_test = fd.make_label(X_filenames, offset = len(X_filenames)-1, 
                                               batch_size = 2)
print("  Shape of batch array is correct: {}".format(X_batch_test.shape == (2, std_y, std_x, 3)))
print("  Average pixel value is less than 0.1: {}".format(np.mean(X_valid) <= 0.1))
print("  End of epoch navigated correctly: {}".format(all(y_batch_test[-1,:] == fd.make_label(X_filenames, 
                                                                                         offset = 0, 
                                                                                         batch_size = 1))  ))
print("")
for i, ix in enumerate([-1, 0]) :
    print(X_filenames[ix])
    print(y_batch_test[i])
    fd.show_panel(X_batch_test[i])

## Graph and Session Runs

#### Graph parameters

In [None]:
# General
num_channels = 3
num_labels = 8
batch_size = 25
stddev = 0.2

# convolution
kernel_sizes = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
stride = 1
conv_depths = [8, 8, 16, 16, 32, 32, 64, 64, 128, 128, 256, 256]
final_depth = conv_depths[-1]


#dropout
kp_hidden = 0.50

# fully connected
fc1_depth = 256
fc2_depth = 64

#regularization
beta = 1e-1 * (1/len(conv_depths))

# Learning rate
init_rate = 1e-2


In [None]:
nodes = []
for i in [0,1,2,3,4,4,5,5,6,6,7,7] :
    nodes.append(fd.count_nodes(std_y, std_x, i, conv_depths[i]))

plt.figure()
plt.plot(range(1,13), nodes, '-')
plt.xlabel('Layer')
plt.ylabel('Num of Nodes')
plt.yscale('log')
plt.xlim(0, 14)
plt.show()
    

#### Session parameters

In [None]:
# epochs
num_epochs = 5
# path for tensorboard summary file to be written
logs_path = os.getcwd()+'/TB_logs'
valid_every = 25

In [None]:
%run -i 'GRAPH.py'

In [None]:
%run -i 'SESSION.py'