### Evaluation of UQ Mapping classification from BenthoBox

In [209]:
import pandas as pd
import sys
import os
sys.path.append('/Users/uqmgonz1/Documents/GitHub/')
from Benthobox_evaluation.toolbox import mean_absolute_error as mae
import numpy as np
import scikits.bootstrap as bootstrap
import scipy
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.offline as pyo
import plotly.tools as tls
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
pyo.init_notebook_mode(connected=True)
import matplotlib.pyplot as plt




#read reference label annotations (CoralNet)
uq=pd.read_csv("/Users/uqmgonz1/Dropbox/projects/UQ_Mapping/Confirmed Annotations Heron Feb2019.csv")
#read Benthobox annotations
bbox=pd.read_csv("/Users/uqmgonz1/Dropbox/projects/UQ_Mapping/PointTags_scaled.csv")
#read mapping UQ v. KER codes file
lmap=pd.read_csv("/Users/uqmgonz1/Dropbox/projects/UQ_Mapping/KER_UQ_map.csv")
#read UQ simplication file
uqsimp=pd.read_csv("/Users/uqmgonz1/Dropbox/projects/UQ_Mapping/UQ_simplified.csv")

#Simplify UQ dataset
uq.Label=uq.Label.replace(uqsimp.old_label.tolist(),uqsimp.label.tolist())
#Translate KER CODES labelset to UQ
bbox['Label']=bbox.Identifier.replace(lmap.KER.tolist(),lmap['Heron Code'].tolist())


In [210]:
## SUMMARISE AND COMBINE DATASETS
#BenthoBox
bbox['Name']=[os.path.basename(iname) for iname in bbox['Image Name']]
bbox['Method']='Net_Scaled'
pred=bbox[['Method','Name','Label']]
#UQ
uq['Method']='Observed'
obs=uq[['Method', 'Name','Label']]
obs=obs.rename(index=str, columns={'KER':'obs'})
obs=obs[obs.Name.isin(pred.Name)]
pred=pred[pred.Name.isin(obs.Name)]
##Combined
df=obs

df=df.append(pred)

df=df.groupby(['Method','Name','Label']).size().reset_index(name="count")
df=df.groupby(['Method','Name','Label']).agg({'count': 'sum'})
df=df.groupby(level=['Method','Name']).apply(lambda x: 100 * x / float(x.sum())).reset_index()
df=df.pivot_table(index=['Name','Label'], columns='Method',
              values='count').reset_index().fillna(value=0)

df['error']=abs(df['Observed']-df['Net_Scaled'])

df=df.groupby(['Label'])['error'].agg({'mean': np.mean, 
                             'std': np.std, 
                             'cilow': lambda x: bootstrap.ci(x, statfunction=scipy.mean)[0],
                             'cimax':lambda x: bootstrap.ci(x, statfunction=scipy.mean)[1],
                           }).reset_index()



In [211]:
## PLOT ERROR
df[['cilow',"cimax"]]=df[['cilow',"cimax"]].astype('float')
df['cilow']=df['mean']-df['cilow']
df['cimax']=df['cimax']-df['mean']
df=df[~df.isna()]

data=[
go.Bar(
    x=df['Label'],
    y=df['mean'],
    name='BenthoBox_scaled',
    error_y=dict(
            type='data',
            symmetric=False,
            array=df['cimax'],
            arrayminus=df['cilow']))]

layout = go.Layout(
    title='Abundance estimation Error'
)

fig=go.Figure(data=data, layout=layout)
# fig
pyo.iplot(fig, filename='error-bar')


In [212]:
df

Unnamed: 0,Label,mean,std,cilow,cimax
0,ACR-BRA,7.022556,9.507944,1.278195,2.0
1,ACR-OTH,3.731707,4.009399,0.682927,1.146341
2,ACR-PE,8.622222,9.75052,1.496296,1.822222
3,ALC-SF,9.081967,10.96696,1.655738,2.327869
4,ALG_OTH,6.878981,5.322124,0.802548,0.878981
5,BRA_DIG_Ac,9.513514,15.08793,3.513514,6.756757
6,BRA_OTH,4.037037,3.654736,0.814815,1.148148
7,BRA_TAB-Ac,12.347826,15.80912,2.186335,2.881988
8,CAL_CCA_DC,7.360465,8.22976,1.046512,1.476744
9,Caul,9.333333,6.408328,4.0,5.333333
