# Predict mussel biomass (grams) from mussel pixels

Use environment `torch-py36` for `pydensecrf` module

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm

import pydensecrf.densecrf as dcrf
import pydensecrf.utils as utils

%matplotlib inline

In [None]:
DATA_PATH = r'/scratch/gallowaa/cciw/Data'
MASK_PATH = '/scratch/ssd/gallowaa/cciw/dataset_raw/Test/Lab/done'

In [None]:
imagetable_path = os.path.join(DATA_PATH, 'Tables', 'ImageTable.csv')
image_df = pd.read_csv(imagetable_path, index_col=0)
analysis_path = os.path.join(DATA_PATH, 'Tables', 'Analysis.csv')
dive_path = os.path.join(DATA_PATH, 'Tables', 'Dives.csv')
analysis_df = pd.read_csv(analysis_path, index_col=0, dtype={'Count':float})
dive_df = pd.read_csv(dive_path, index_col=0, parse_dates=['Date'])
data_df = pd.merge(analysis_df, dive_df, on='Dive Index', how='outer')

In [None]:
# Search for all image files in testing set...
#all_images = glob(os.path.join(DATA_PATH,'Videos_and_stills/TestingSet/Lab/*/*/*/Images/Quad*/*.jpg'))
all_images = glob(os.path.join(MASK_PATH, '*_final.png'))
print(len(all_images))
all_images.sort()
#all_images

In [None]:
#29, 29, 23, 29, 23, 29, 27, 29, 27, 29, 29, 29, 29, 29, 29, 25, 27, 29, 27, 29, 29, 29, 26, 29, Nan, 29, 29
all_images

In [None]:
"""
Just for fun, this is an array for correcting the camera distance, 
using the number of vertical and horizontal squares found in image. 
Can write code to automatically determine these settings from image
if desired."""
scale = np.array([[16, 25], # 2907-3
                  [15, 25], # 2908-1_7-10
                  [16, 22], # 2908-1_8-16
                  [16, 20], # 2908-3
                  [13, 25], # 2909-1
                  [15, 20], # 2910-1_7-09
                  [13, 25], # 2910-1_8-16
                  [16, 23], # 3537-1_7-02
                  [14, 25], # 3537-1_8-07
                  [13, 24], # 3537-3
                  [15, 25], # 3538-1_7-03
                  [15, 25], # 3538-1_8-08
                  [16, 25], # 3538-2_7-03
                  [14, 25], # 3538-2_8-08
                  [14, 25], # 3538-3_8-08
                  [16, 25], # 3539-1_2018-07-03
                  [14.5, 19], # 3539-1_2018-08-07
                  [16, 25], # 3539-2
                  [11, 21], # 3539-2_8-07
                  [15, 22], # 3539-3_8-07
                  [16, 23], # 3552-1_7-05
                  [14, 25], # 3552-1_8-09
                  [15, 23], # 3552-2_7-04
                  [14, 25], # 3553-3
                  [14, 25], # 3554-1
                  [14, 25], # 3783-2
                  [15, 23], # 3784-1_7-05
                  [13, 25], # 3784-1_8-14
                  [13, 20], # 3784-2
                  [15.5, 24.25], # 3788-1
                  [16, 25], # 3788-2
                  [13, 25], # 3796-1
                  [16, 25], # 3798-2
                  [15.5, 25], # 3800-3
                  [15, 23],   # 3801-1
                  [15.5, 25]])# 3801-3

In [None]:
pix_ct = []
for i in tqdm(range(len(all_images))):
    im   = cv2.imread(all_images[i])
    _, cts = np.unique(im, return_counts=True) 
    pix_ct.append(cts[1] / cts.sum())

# Compare biomass and fraction of mussel pixels

In [None]:
pix_ct_np = np.asarray(pix_ct)

lab_targets = np.zeros((len(all_images), 2)) # 0 = biomass, 1 = count
#lab_targets = np.zeros((len(scale[0]), 2)) # 0 = biomass, 1 = count

for i in range(len(all_images)):
    
    # adjust the pixel_ct by size of grid
    pix_ct_np[i] = pix_ct_np[i] * (np.prod(scale[i]) / (16 * 25))
    
    if 'scale' in all_images[i]:
        root_fname = all_images[i].split('/')[-1].split('.')[0].split('_scale')[0][4:-8]
    else:
        root_fname = all_images[i].split('/')[-1].split('.')[0].split('_mask')[0][4:-8]
        
    guid = image_df[image_df['Name'].str.contains(root_fname)]['Analysis Index'].astype('int64')
    row = data_df[data_df['Analysis Index'].values == np.unique(guid.values)]
    lab_targets[i, 0] = row['Biomass'].values
    lab_targets[i, 1] = row['Count'].values

lab_targets[np.isnan(lab_targets)] = 0
y = lab_targets[:, 0] / lab_targets[:, 0].max()
r_val = np.corrcoef(pix_ct_np, y)[1, 0]

A = np.vstack([pix_ct_np, np.ones(len(pix_ct_np))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax.scatter(pix_ct_np, y, marker='o', s=40, facecolors='none', edgecolors='b')
ax.set_ylabel('Mussel Biomass (g)')
ax.set_ylim(0, 1.05)
ax.set_xlim(0, 0.15)
ax.set_xlabel('Fraction of Pixels Labelled Mussel')

x = np.linspace(0, 0.15)
ax.plot(x, m*x + c, 'b', linestyle='-')
ax.annotate(r'r = %.4f' % r_val, xy=(.06, .805), fontsize=16, xycoords='axes fraction')

ax.grid()
fname = 'TestingSet_Lab_biomass_v_fract_mussel_pixels_v3'

plt.tight_layout()
#fig.savefig(fname + '.png')
#fig.savefig(fname + '.eps', format='eps')