In [1]:
import geopandas
import scipy.io
import numpy as np
import pandas as pd
import xarray as xr
from EchoPro import EchoPro
epro_2019 = EchoPro(init_file_path='./config_files/initialization_config.yml',
                    survey_year_file_path='./config_files/survey_year_2019_config.yml',
                    source=3,
                    bio_data_type=1,
                    age_data_status=1, 
                    exclude_age1=True)

A check of the initialization file needs to be done!
A check of the survey year file needs to be done!
Loading biological data ...
getting strata data
Do we need to set stratum_id or just use strata_df? Look into this!




In [2]:
# get geopandas representation of final_biomass_table
# allows us to plot transect data
df = epro_2019.final_biomass_table
gdf = geopandas.GeoDataFrame(df, 
                             geometry=geopandas.points_from_xy(df.Longitude, df.Latitude))

# reseting the index so that we can select data based on column name
gdf = gdf.reset_index()

## Generate keys

In [3]:
# import class to use it's functions 
from EchoPro.load_stratification_data import LoadStrataData

strata_class = LoadStrataData(epro_2019)

# get df relating the haul to the stratum
strata_haul_df = epro_2019.strata_df.reset_index()[['Haul', 'strata']].set_index('Haul')

# get the bins for the lengths
bins_len = epro_2019.params['bio_hake_len_bin']

# get the bins for the ages
bins_age = epro_2019.params['bio_hake_age_bin']

# get all specimen data that is necessary for key generation
spec_w_strata = epro_2019.specimen_df.drop('Specimen_Number', axis=1).copy().reset_index()

# add strata column 
spec_w_strata['Strata'] = spec_w_strata.apply(lambda x: strata_haul_df.loc[x[0]], 
                                              axis=1).values

spec_w_strata.set_index('Strata', inplace=True)

# spec_w_strata = spec_w_strata[(spec_w_strata['Sex'] != 3)].copy() # TODO: this should be for all sexes

age_len_key_da, age_len_key_wgt_da, age_len_key_norm_da = strata_class.get_age_key_das(spec_w_strata, 
                                                                                       bins_len, bins_age)

# TODO: it would probably be better to do an average of station 1 and 2 here... (Chu doesn't do this)
age_len_key_wgt_norm_da = age_len_key_wgt_da/age_len_key_wgt_da.sum(dim=['len_bins', 'age_bins'])

# each stratum's multiplier once normalized weight has been calculated
age2_wgt_proportion_da = 1.0 - age_len_key_wgt_norm_da.isel(age_bins=0).sum(dim='len_bins')/age_len_key_wgt_norm_da.sum(dim=['len_bins', 'age_bins'])

In [4]:
# get all specimen data that is necessary for key generation
spec_w_strata = epro_2019.specimen_df.drop('Specimen_Number', axis=1).copy().reset_index()

# add strata column 
spec_w_strata['Strata'] = spec_w_strata.apply(lambda x: strata_haul_df.loc[x[0]], 
                                              axis=1).values

spec_w_strata.set_index('Strata', inplace=True)

# reg_w0, reg_p = strata_class.get_length_val_reg_vals(len_name='Length', val_name="Weight", df=spec_w_strata)

# len_weight_ALL, len_nALL, norm_len_key_ALL = strata_class.generate_length_val_key(bins_len, reg_w0=None, reg_p=None, 
#                                                                                len_name='Length',
#                                                                                  val_name='Weight', df=spec_w_strata)

# spec_w_strata = spec_w_strata[(spec_w_strata['Sex'] != 3)] # TODO: this should be for all sexes
len_wgt_key_spec_da, len_key_spec_da, len_key_norm_spec_da = strata_class.get_weight_key_das(spec_w_strata, 
                                                                                bins_len, reg_w0=None, reg_p=None, 
                                                                                len_name='Length',
                                                                                val_name='Weight')

In [5]:
length_explode_df = epro_2019.length_df[['Sex', 'Length']].copy()
# add strata column 
length_explode_df['Strata'] = length_explode_df.reset_index().apply(lambda x: strata_haul_df.loc[x[0]], 
                                                                    axis=1).values

length_explode_df.reset_index(inplace=True)

length_explode_df.set_index('Strata', inplace=True)

length_explode_df = length_explode_df.explode(['Sex', 'Length'])

length_explode_df = length_explode_df.astype({'Haul': int, 
                                              'Sex': int, 
                                              'Length': np.float64})

In [6]:

# length_explode_df = length_explode_df[(length_explode_df['Sex'] != 3)] # TODO: this should be for all sexes

unique_strata = length_explode_df.index.unique().values

len_key_norm_length = np.empty((unique_strata.shape[0], bins_len.shape[0]), dtype=np.float64)
len_key_norm_length[:, :] = 0.0

stratum_ind = 0
for stratum in unique_strata:
    input_data = length_explode_df.loc[stratum]['Length'].values
    len_ind = strata_class.get_bin_ind(input_data, bins_len)

    len_key_n = np.array([i.shape[0] for i in len_ind])
    len_key_norm_length[stratum_ind, :] = len_key_n/np.sum(len_key_n)
    
    stratum_ind += 1
    
    
len_key_norm_length_da = xr.DataArray(data=len_key_norm_length, 
                                      coords={'strata': unique_strata, 'len_bins': bins_len})

In [7]:
len_key_norm_length_da.sel(strata=1)

In [8]:
len_key_norm_spec_da.sel(strata=1)

In [9]:
len_key_norm_ave = (len_key_norm_length_da + len_key_norm_spec_da)/2

In [10]:
len_key_norm_ave.sel(strata=1)

## Calculate the biomass density

In [11]:
# get the nasc dataframe 
nasc_df = epro_2019.load_nasc_data()

# calculates the interval for the area calculation
interval = (nasc_df['VL start'].iloc[1:].values - nasc_df['VL start'].iloc[:-1].values)
last_interval = nasc_df['VL end'].iloc[-1] - nasc_df['VL start'].iloc[-1]

interval = np.concatenate([interval, np.array([last_interval])])

median_interval = np.median(interval)

# remove outliers at the end of the transect
ind_outliers = np.argwhere(np.abs(interval - median_interval) > 0.05).flatten()
interval[ind_outliers] = nasc_df['VL end'].values[ind_outliers] - nasc_df['VL start'].values[ind_outliers]

In [12]:
bio_dense_df = nasc_df[['Stratum', 'NASC', 'Haul']].copy()
bio_dense_df['interval'] = interval
bio_dense_df['n_A'] = nasc_df.apply(lambda x: np.round(x.NASC/epro_2019.strata_ds.sig_b.sel(strata=x.Stratum).values), axis=1)
bio_dense_df['A'] = bio_dense_df['interval']*nasc_df['Spacing']
bio_dense_df['N_A'] = bio_dense_df['n_A']*bio_dense_df['A']

In [13]:
# bio_density = bio_dense_df.apply(lambda x: x.n_A*np.dot(len_key_norm_ave.sel(strata=x.Stratum), 
#                                           len_wgt_key_spec_da.sel(strata=x.Stratum)), axis=1)

bio_density_2_prop = bio_dense_df.apply(lambda x: x.n_A*np.dot(len_key_norm_ave.sel(strata=x.Stratum), 
                                        len_wgt_key_spec_da.sel(strata=x.Stratum))*age2_wgt_proportion_da.sel(strata=x.Stratum).values,
                                        axis=1)

# bio_density_2_prop = bio_dense_df.apply(lambda x: x.n_A*np.dot(len_key_norm_ave.sel(strata=x.Stratum), 
#                                         len_weight_ALL)*age2_wgt_proportion_da.sel(strata=x.Stratum).values,
#                                         axis=1)

In [14]:
bio_density_2_prop.reset_index().loc[750: 760]

Unnamed: 0,Transect,0
750,9,0.0
751,9,0.0
752,9,0.0
753,9,0.0
754,9,237.753004
755,9,1304.985465
756,9,2442.065049
757,9,1547.33273
758,9,2266.908851
759,9,1758.381592


In [15]:
epro_2019.final_biomass_table["nwgt_total"].reset_index().loc[750: 760]

Unnamed: 0,Transect,nwgt_total
750,9,0.0
751,9,0.0
752,9,0.0
753,9,0.0
754,9,232.112442
755,9,1273.975595
756,9,2384.025103
757,9,1510.56475
758,9,2213.043001
759,9,1716.60141


In [16]:
arr1 = epro_2019.final_biomass_table["nwgt_total"].values
arr2 = bio_density_2_prop.values

In [17]:
out = (np.abs(arr1 - arr2)/((arr1 + arr2)/2.0))*100

  out = (np.abs(arr1 - arr2)/((arr1 + arr2)/2.0))*100


In [18]:
no_nan_ind = np.logical_not(np.isnan(out)).flatten()

np.mean(out[no_nan_ind])

4.988339721326623

In [19]:
# get df relating the haul to the stratum
strata_haul_df = epro_2019.strata_df.reset_index()[['Haul', 'strata']].set_index('Haul')

# get all specimen data that is necessary for key generation
spec_w_strata = epro_2019.specimen_df.drop('Specimen_Number', axis=1).copy().reset_index()

# add strata column 
spec_w_strata['Strata'] = spec_w_strata.apply(lambda x: strata_haul_df.loc[x[0]], 
                                              axis=1).values

spec_w_strata.set_index('Strata', inplace=True)

In [20]:
length_explode_df = epro_2019.length_df[['Sex', 'Length']].copy()
# add strata column 
length_explode_df['Strata'] = length_explode_df.reset_index().apply(lambda x: strata_haul_df.loc[x[0]], 
                                                                    axis=1).values

length_explode_df.reset_index(inplace=True)

length_explode_df.set_index('Strata', inplace=True)

length_explode_df['Weight'] = np.nan

length_explode_df = length_explode_df.explode(['Sex', 'Length'])

length_explode_df = length_explode_df.astype({'Haul': int, 
                                              'Sex': int, 
                                              'Length': np.float64, 'Weight': np.float64})

In [21]:
from EchoPro.load_stratification_data import LoadStrataData
strata_class = LoadStrataData(epro_2019)
# get the bins for the lengths
bins_len = epro_2019.params['bio_hake_len_bin']
# get the bins for the ages
bins_age = epro_2019.params['bio_hake_age_bin']

bc = strata_class.get_biomass_constants(spec_w_strata, length_explode_df, bins_len, bins_age)
bc


In [22]:
nntk_male = bio_dense_df.apply(lambda x: np.round(x.n_A*(bc.len_M_prop.sel(strata=x.Stratum).values + bc.spec_M_prop.sel(strata=x.Stratum).values)), axis=1)
nntk_female = bio_dense_df.apply(lambda x: np.round(x.n_A*(bc.len_F_prop.sel(strata=x.Stratum).values + bc.spec_F_prop.sel(strata=x.Stratum).values)), axis=1)

bio_dense_df['nntk_male'] = nntk_male
bio_dense_df['nntk_female'] = nntk_female

In [23]:
nWgt_male_int = bio_dense_df.apply(lambda x: x.nntk_male*bc.len_wgt_M_prod.sel(strata=x.Stratum).values, axis=1)
nWgt_female_int = bio_dense_df.apply(lambda x: x.nntk_female*bc.len_wgt_F_prod.sel(strata=x.Stratum).values, axis=1)

bio_dense_df['nWgt_male'] = nWgt_male_int
bio_dense_df['nWgt_female'] = nWgt_female_int

In [24]:
nWgt_unsexed_int = bio_dense_df.apply(lambda x: (x.n_A - x.nntk_male - x.nntk_female)*bc.len_wgt_prod.sel(strata=x.Stratum).values, axis=1)
bio_dense_df['nWgt_unsexed'] = nWgt_unsexed_int

In [25]:
bio_dense_df['nWgt_total'] = bio_dense_df['nWgt_male'] + bio_dense_df['nWgt_female'] + bio_dense_df['nWgt_unsexed']

In [26]:
bio_dense_df['nWgt_total'].reset_index().loc[7554:7560]

Unnamed: 0,Transect,nWgt_total
7554,101,0.0
7555,101,4162.797172
7556,101,40.359888
7557,101,8263.883111
7558,101,221.57409
7559,101,32.021486
7560,101,48.032228


In [27]:
age_len_key_da, age_len_key_wgt_da, age_len_key_norm_da = strata_class.get_age_key_das(spec_w_strata, 
                                                                                       bins_len, bins_age)

# TODO: it would probably be better to do an average of station 1 and 2 here... (Chu doesn't do this)
age_len_key_wgt_norm_da = age_len_key_wgt_da/age_len_key_wgt_da.sum(dim=['len_bins', 'age_bins'])

# each stratum's multiplier once normalized weight has been calculated
age2_wgt_proportion_da = 1.0 - age_len_key_wgt_norm_da.isel(age_bins=0).sum(dim='len_bins')/age_len_key_wgt_norm_da.sum(dim=['len_bins', 'age_bins'])

In [28]:
nWgt_total_2_prop = bio_dense_df.apply(lambda x: x.nWgt_total*age2_wgt_proportion_da.sel(strata=x.Stratum).values,
                                        axis=1)

bio_dense_df['nWgt_total_2_prop'] = nWgt_total_2_prop

In [29]:
bio_dense_df['nWgt_total_2_prop'].reset_index().loc[753:760]

Unnamed: 0,Transect,nWgt_total_2_prop
753,9,0.0
754,9,232.171878
755,9,1274.325612
756,9,2384.692774
757,9,1510.974441
758,9,2213.644003
759,9,1717.056406
760,9,6522.610836


In [30]:
bio_dense_df['nWgt_total_2_prop'].sum()

302063975.4880831

In [36]:
stratum = 3
spec_w_strata[spec_w_strata['Sex'] == 1].loc[stratum].shape[0] + spec_w_strata[spec_w_strata['Sex'] == 2].loc[stratum].shape[0]


# TODO: this does not match data.bio.strata.matrix_NtotALL! need to find issue!


148

In [46]:
stratum = 8

length_explode_df.loc[stratum].shape[0]

292

In [31]:
nntk_male = np.round(bio_dense_df['n_A'].values*(spec_M_prop + len_M_prop))

NameError: name 'spec_M_prop' is not defined

In [None]:
nntk_male[730:740]

In [None]:
(spec_M_prop + len_M_prop)

In [None]:
bio_dense_df['n_A'].reset_index().loc[730:740] #len_wgt_prod

In [None]:
bio_dense_df['NASC'].reset_index().loc[734]

In [None]:
len_strata

In [None]:
# %%time
# lat_INPFC = [np.NINF, 36, 40.5, 43.000, 45.7667, 48.5, 55.0000]  # INPFC
# epro_2019.run_cv_analysis(lat_INPFC, kriged_data=False)

In [None]:
# CV_JH_mean = 0.13377365505687697
# CPU times: user 2min 31s, sys: 312 ms, total: 2min 31s
# Wall time: 2min 31s