# FishNoFish JPG
Heather Fryling

Northeastern University

2023

Run this notebook to try out FishNet with the FishNoFish dataset. You will need to unzip your copy of the dataset and point the train, val, and test directories to the appropriate directories for your setup.

In [None]:
# A simple CNN to classify fish/no fish in jpeg images scaled and cropped to 64x64.
# This project can run on a CPU.

In [None]:
# Imports of external libraries
import os
import sys
import numpy as np
import torch
import cv2
import torchvision
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
import json

In [None]:
# Place the path to your project directory here.
project_dir = './'
data_type ='jpg'
# Trial id can be any unique identifier. It will be the name of the folder where results for the current run are stored.
trial_id = 0

In [None]:
res_dir = os.path.join(project_dir, 'results', data_type, str(trial_id))
model_dir = os.path.join(res_dir, 'models')
plot_dir = os.path.join(res_dir, 'plots')
csv_dir = os.path.join(res_dir, 'data')
os.makedirs(res_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
os.makedirs(plot_dir, exist_ok=True)
os.makedirs(csv_dir, exist_ok=True)

In [None]:
code_dir = os.path.join(project_dir, 'src')

In [None]:
# parameters
learning_rate=0.001
betas=(0.9, 0.999)
eps=1e-08
dropout = .7
epochs = 250

In [None]:
params = {
    'learning_rate' : learning_rate,
    'betas': betas,
    'epsilon': eps,
    'dropout': dropout,
    'epochs': epochs
}

In [None]:
param_json_path = os.path.join(res_dir, 'params.json')
with open(param_json_path, 'w') as f:
    json.dump(params, f)

In [None]:
# custom imports
from src.dataset_definitions.fish_jpg_image_dataset import FishJPGImageDataset # Change this line to change the dataset type.
from src.util.custom_transforms import random_square_crop, center_square_crop
from src.util.data_manipulation import train_val_dataset
from src.neural_net.fish_nn import FishNet64
from src.neural_net.nn_functions import train
from src.neural_net.nn_tester import NeuralNetTester
from src.util.plotting import plot_loss, plot_accuracy

In [None]:
# Point these to the location where you have unzipped and stored the data.
data_dir = os.path.join(project_dir, 'data', 'fishnofish64', data_type)
training_dir = os.path.join(data_dir, 'training')
validation_dir = os.path.join(data_dir, 'validation')
test_dir = os.path.join(data_dir, 'test')
os.listdir(test_dir)
os.listdir(validation_dir)

In [None]:
# Get the length of the short edge of the images.
# This is just for confirmation.
min_dimension = float('inf')
fish_dir = os.path.join(training_dir, 'NoFish')
for f in os.listdir(fish_dir):
  img = cv2.imread(os.path.join(fish_dir, f))
  transform = torchvision.transforms.ToTensor()
  img = transform(img)
  rows = img.shape[1]
  cols = img.shape[2]
  min_dimension = min(min_dimension, rows)
  min_dimension = min(min_dimension, cols)
min_dimension

In [None]:
# Train and validation sets get a random square crop to 64x64. The test set gets a center square crop of 64x64.
custom_transforms = [random_square_crop, torch.tensor]
test_transforms = [center_square_crop, torch.tensor]

In [None]:
# Load dataset to examine it.
train_dataset = FishJPGImageDataset(training_dir, custom_transforms)
validation_dataset = FishJPGImageDataset(validation_dir, custom_transforms)
test_dataset = FishJPGImageDataset(test_dir, test_transforms)

In [None]:
img, label = train_dataset[0]
img.shape

In [None]:
# Load the first image into a numpy array to examine it.
# Permute the columns so that channels come last.
img_np = np.array(torch.permute(img, (1, 2, 0)))
img_np.shape

In [None]:
# Distribution of values in the image. Should be 0-1.
plt.hist(img_np.ravel(), bins=50, density=True)
plt.xlabel("pixel values")
plt.ylabel("relative frequency")
plt.title("distribution of pixels")

In [None]:
# View a sample image.
plt.imshow(img_np)
plt.title('example image')
plt.show

In [None]:
# Double check the image is 64x64.
img_np.shape

In [None]:
# Initialize the neural net.
net = FishNet64(dropout_perc=dropout)

In [None]:
# Define criterion.
criterion = F.nll_loss

In [None]:
len(train_dataset)

In [None]:
# Define data loaders for train and test.
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=32, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, betas=betas, eps=eps)
log_interval = 10
train_losses = []
train_accuracy = []
train_counter = []
val_losses = []
val_accuracy = []

In [None]:
val_tester = NeuralNetTester(model_dir)

In [None]:
for epoch in range(epochs):
  train(epoch, len(train_dataset), net, train_loader, optimizer, criterion, log_interval, train_losses, train_counter, train_accuracy)
  val_tester.test(epoch, net, optimizer, val_loader, criterion, val_losses, val_accuracy, filter=True, validation=True) # Stop for heuristic if it appears to not be training

In [None]:
plot_loss(val_losses, train_losses, train_counter, len(train_dataset), os.path.join(plot_dir, f'{data_type}_train_loss.jpg'))
plot_accuracy(val_accuracy, train_accuracy, train_counter, len(train_dataset), os.path.join(plot_dir, f'{data_type}_train_accuracy.jpg'))

In [None]:
print('Maximum train accuracy:', max(train_accuracy))

In [None]:
print('Maximum validation accuracy:', max(val_accuracy))

In [None]:
for i in range(len(val_accuracy)):
  if val_accuracy[i] >= .9:
    print(i, val_accuracy[i])

In [None]:
# Saving training info as csv.
jpg_val_df = pd.DataFrame()
jpg_val_df['val_accuracy'] = val_accuracy
jpg_val_df['val_loss'] = val_losses

In [None]:
jpg_train_df = pd.DataFrame()
jpg_train_df['train_accuracy'] = train_accuracy
jpg_train_df['train_loss'] = train_losses

In [None]:
jpg_val_df.to_csv(os.path.join(csv_dir, f'{data_type}_val.csv'))
jpg_train_df.to_csv(os.path.join(csv_dir, f'{data_type}_train.csv'))

In [None]:
print('Testing with the best validation accuracy model.')
bestmodelfname = f'best_val_acc_model.pt'
checkpoint = torch.load(os.path.join(model_dir, bestmodelfname))
print(checkpoint['epoch'])
bestmodel = FishNet64(dropout_perc = dropout)
bestmodel.load_state_dict(checkpoint['model_state_dict'])
bestmodel.eval()
test_losses = []
test_accuracy = []
test_tester = NeuralNetTester(save_path=None)
test_tester.test(epoch, net, optimizer, test_loader, criterion, test_losses, test_accuracy, filter=True, validation=False)
print("Test loss", test_losses[0])
print("Test accuracy", test_accuracy[0])

In [None]:
print('Testing with the best validation loss model.')
bestmodelfname = f'best_val_loss_model.pt'
checkpoint = torch.load(os.path.join(model_dir, bestmodelfname))
print(checkpoint['epoch'])
bestmodel = FishNet64(dropout_perc = dropout)
bestmodel.load_state_dict(checkpoint['model_state_dict'])
bestmodel.eval()
test_losses = []
test_accuracy = []
test_tester = NeuralNetTester(save_path=None)
test_tester.test(epoch, net, optimizer, test_loader, criterion, test_losses, test_accuracy, filter=True, validation=False)
print("Test loss", test_losses[0])
print("Test accuracy", test_accuracy[0])