# Model for Nature Conservancy Fisheries Kaggle Competition

#### Dependencies

In [None]:
import fish_data as fd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report 
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pandas as pd
import json

#### Helper functions

In [None]:
help(fd)

#### Generate a list of filenames

In [None]:
fish_filenames = fd.generate_filenames_list('data/train/', subfolders = True)
print("There are {} filenames in the master set list".format(len(fish_filenames)))
test_filenames = fd.generate_filenames_list('data/test_stg1/', subfolders = False)
print("There are {} filenames in the test set list".format(len(test_filenames)))

#### Generate the labels for the master set list

In [None]:
fish_label_arr = fd.make_labels(fish_filenames, 'train/', '/img')
fish_label_arr.shape
print("One-hot labels generated correctly: {}".format(all(np.sum(fish_label_arr, 0) == [1719, 200, 117, 67, 465, 299, 176, 734]) ))

In [None]:
f_list, f_labels = fd.generate_balanced_filenames_epoch(fish_filenames, fish_label_arr, shuffle = False)

#### Shuffle and split the master set list into training and validation sets

In [None]:
valid_size = 300
files_train, files_val, y_train, y_val = train_test_split(f_list, f_labels, test_size = valid_size)
print("Validation set size: {}".format(y_val.shape[0]))
print("Training set size: {}".format(y_train.shape[0]))

In [None]:
val_data, val_labels = fd.process_batch(files_val, y_val, offset = 0, batch_size = valid_size, 
                        std_size = 256, crop_size = 224, crop_mode = 'centre', normalize = 'custom', 
                        pixel_offset = 100, pixel_factor = 100.0,
                        mutation = False, verbose = True)

In [None]:
example_batch, example_labels = fd.process_batch(files_train, y_train, offset = 0, batch_size = 5, 
                        std_size = 256, crop_size = 224, crop_mode = 'random', normalize = None, 
                        pixel_offset = 100, pixel_factor = 100.0,
                        mutation = True, verbose = True)

In [None]:
for x in range(5):
    print("Fish Label: {}".format(np.argmax(example_labels[x,:],0)))
    fd.show_panel(example_batch[x,:,:,:])

## Graph and Session Runs

#### Graph parameters

In [None]:
%run -i 'PARAMETERS.py'

#### Session parameters

In [None]:
version_ID = 'v2.1.0.0'

In [None]:
%run -i 'GRAPH.py'

In [None]:
%run -i 'SESSION.py'

#### Notes during run 
