In [1]:
import geopandas
import scipy.io
import numpy as np
import pandas as pd
import xarray as xr
from EchoPro import EchoPro
epro_2019 = EchoPro(init_file_path='./config_files/initialization_config.yml',
                    survey_year_file_path='./config_files/survey_year_2019_config.yml',
                    source=3,
                    bio_data_type=1,
                    age_data_status=1, 
                    exclude_age1=True)

A check of the initialization file needs to be done!
A check of the survey year file needs to be done!
Loading biological data ...
getting strata data
Do we need to set stratum_id or just use strata_df? Look into this!




In [2]:
# get geopandas representation of final_biomass_table
# allows us to plot transect data
df = epro_2019.final_biomass_table
gdf = geopandas.GeoDataFrame(df, 
                             geometry=geopandas.points_from_xy(df.Longitude, df.Latitude))

# reseting the index so that we can select data based on column name
gdf = gdf.reset_index()

## Generate keys

In [3]:
# get df relating the haul to the stratum
strata_haul_df = epro_2019.strata_df.reset_index()[['Haul', 'strata']].set_index('Haul')

# get all specimen data that is necessary for key generation
spec_w_strata = epro_2019.specimen_df.drop('Specimen_Number', axis=1).copy().reset_index()

# add strata column 
spec_w_strata['Strata'] = spec_w_strata.apply(lambda x: strata_haul_df.loc[x[0]], 
                                              axis=1).values

spec_w_strata.set_index('Strata', inplace=True)

In [4]:
length_explode_df = epro_2019.length_df[['Sex', 'Length']].copy()
# add strata column 
length_explode_df['Strata'] = length_explode_df.reset_index().apply(lambda x: strata_haul_df.loc[x[0]], 
                                                                    axis=1).values

length_explode_df.reset_index(inplace=True)

length_explode_df.set_index('Strata', inplace=True)

length_explode_df = length_explode_df.explode(['Sex', 'Length'])

length_explode_df = length_explode_df.astype({'Haul': int, 
                                              'Sex': int, 
                                              'Length': np.float64})

In [5]:
from EchoPro.load_stratification_data import LoadStrataData
strata_class = LoadStrataData(epro_2019)
# get the bins for the lengths
bins_len = epro_2019.params['bio_hake_len_bin']
# get the bins for the ages
bins_age = epro_2019.params['bio_hake_age_bin']

bc = strata_class.get_biomass_constants(spec_w_strata, length_explode_df, bins_len, bins_age)
bc


In [6]:
# array([  994.,   591.,   782.,  1353., 11877.,   982.,  1480.,   338.])

## Calculate the biomass density

In [7]:
# get the nasc dataframe 
nasc_df = epro_2019.load_nasc_data()

# calculates the interval for the area calculation
interval = (nasc_df['VL start'].iloc[1:].values - nasc_df['VL start'].iloc[:-1].values)
last_interval = nasc_df['VL end'].iloc[-1] - nasc_df['VL start'].iloc[-1]

interval = np.concatenate([interval, np.array([last_interval])])

median_interval = np.median(interval)

# remove outliers at the end of the transect
ind_outliers = np.argwhere(np.abs(interval - median_interval) > 0.05).flatten()
interval[ind_outliers] = nasc_df['VL end'].values[ind_outliers] - nasc_df['VL start'].values[ind_outliers]

In [8]:
float(epro_2019.strata_ds.sig_b.loc[5])

0.0037121744663321716

In [9]:
# epro_2019.strata_ds.sig_b.loc[5] = 0.00371214565751652

In [10]:
# float(epro_2019.strata_ds.sig_b.sel(strata=5))

In [11]:
# epro_2019.strata_ds.sig_b.sel(strata=5).values = 0.00371214565751652
# float(epro_2019.strata_ds.sig_b.sel(strata=5))

In [12]:
bio_dense_df = nasc_df[['Stratum', 'NASC', 'Haul']].copy()
bio_dense_df['interval'] = interval

In [13]:
wgt_vals = epro_2019.strata_df.reset_index().set_index('Haul')['wt'] 
wgt_vals_ind = wgt_vals.index

# TODO: replace this with DataSet representation
mix_sa_ratio = nasc_df.apply(lambda x: wgt_vals[x.Haul] if x.Haul in wgt_vals_ind else 0.0, axis=1)

nasc_df['mix_sa_ratio'] = mix_sa_ratio

In [14]:
# epro_2019.strata_ds.sig_b.loc[5] = 0.00371214565751652

bio_dense_df['n_A'] = nasc_df.apply(lambda x: np.round((x.mix_sa_ratio*x.NASC)/float(epro_2019.strata_ds.sig_b.sel(strata=x.Stratum))), axis=1)
bio_dense_df['A'] = bio_dense_df['interval']*nasc_df['Spacing']
bio_dense_df['N_A'] = bio_dense_df['n_A']*bio_dense_df['A']

In [15]:
nntk_male = bio_dense_df.apply(lambda x: np.round(x.n_A*float(bc.len_M_prop.sel(strata=x.Stratum) + bc.spec_M_prop.sel(strata=x.Stratum))), axis=1)
nntk_female = bio_dense_df.apply(lambda x: np.round(x.n_A*float(bc.len_F_prop.sel(strata=x.Stratum) + bc.spec_F_prop.sel(strata=x.Stratum))), axis=1)

bio_dense_df['nntk_male'] = nntk_male
bio_dense_df['nntk_female'] = nntk_female

In [16]:
nWgt_male_int = bio_dense_df.apply(lambda x: x.nntk_male*float(bc.len_wgt_M_prod.sel(strata=x.Stratum)), axis=1)
nWgt_female_int = bio_dense_df.apply(lambda x: x.nntk_female*float(bc.len_wgt_F_prod.sel(strata=x.Stratum)), axis=1)

bio_dense_df['nWgt_male'] = nWgt_male_int
bio_dense_df['nWgt_female'] = nWgt_female_int

In [17]:
nWgt_unsexed_int = bio_dense_df.apply(lambda x: (x.n_A - x.nntk_male - x.nntk_female)*float(bc.len_wgt_prod.sel(strata=x.Stratum)), axis=1)
bio_dense_df['nWgt_unsexed'] = nWgt_unsexed_int

In [18]:
bio_dense_df['nWgt_total'] = bio_dense_df['nWgt_male'] + bio_dense_df['nWgt_female'] + bio_dense_df['nWgt_unsexed']

In [19]:
bio_dense_df['nWgt_total'].reset_index().loc[7554:7560]

Unnamed: 0,Transect,nWgt_total
7554,101,0.0
7555,101,4162.797172
7556,101,40.359888
7557,101,8263.883111
7558,101,221.57409
7559,101,32.021486
7560,101,48.032228


In [20]:
spec_w_strata = spec_w_strata.dropna(how='any')
age_len_key_da, age_len_key_wgt_da, age_len_key_norm_da = strata_class.get_age_key_das(spec_w_strata, 
                                                                                       bins_len, bins_age)

# TODO: it would probably be better to do an average of station 1 and 2 here... (Chu doesn't do this)
age_len_key_wgt_norm_da = age_len_key_wgt_da/age_len_key_wgt_da.sum(dim=['len_bins', 'age_bins'])

# each stratum's multiplier once normalized weight has been calculated
age2_wgt_proportion_da = 1.0 - age_len_key_wgt_norm_da.isel(age_bins=0).sum(dim='len_bins')/age_len_key_wgt_norm_da.sum(dim=['len_bins', 'age_bins'])

In [21]:
nWgt_total_2_prop = bio_dense_df.apply(lambda x: x.nWgt_total*float(age2_wgt_proportion_da.sel(strata=x.Stratum)),
                                        axis=1)

bio_dense_df['nWgt_total_2_prop'] = nWgt_total_2_prop

In [22]:
mat = scipy.io.loadmat('../2019_consolidated_files/nWgt_ALL_ind_2_prop.mat')
nwgt_total = mat['nWgt_ALL_int']

In [23]:
np.abs(np.sum(nwgt_total) - bio_dense_df['nWgt_total_2_prop'].sum())

2545.195782184601

In [30]:
epro_2019.strata_df.reset_index().set_index('strata').loc[5]['length_average_haul'].sum()

# sum(data.bio.strata(5).Lave_j) Not matching!! figure it out! 
# look at get_strata_ds()

1893.1079128839192

In [24]:
# %%time
# lat_INPFC = [np.NINF, 36, 40.5, 43.000, 45.7667, 48.5, 55.0000]  # INPFC
# epro_2019.run_cv_analysis(lat_INPFC, kriged_data=False)

In [25]:
# CV_JH_mean = 0.13377365505687697
# CPU times: user 2min 31s, sys: 312 ms, total: 2min 31s
# Wall time: 2min 31s