# EchoPro Python Workflow <a class="tocSkip">

# Import libraries and configure the Jupyter notebook

In [1]:
# libraries used in the Notebook
import matplotlib.pyplot as plt

# Python version of EchoPro
import EchoPro

# Allows us to grab the SemiVariogram class so we can use its models
from EchoPro.semivariogram import SemiVariogram as SV

# Allows us to easily use matplotlib widgets in our Notebook
%matplotlib widget
# %matplotlib inline

# Set up  EchoPro for a specific survey year

## Initialize EchoPro object  using configuration files

* `initialization_config.yml` -- parameters independent of survey year
* `survey_year_2019_config.yml` -- parameters specific to survey year
* `source` -- Define the region of data to use e.g. US, CAN, US & CAN
* `exclude_age1` -- States whether age 1 hake should be included in analysis.

In [2]:
%%time
survey_2019 = EchoPro.Survey(init_file_path='../config_files/initialization_config.yml',
                             survey_year_file_path='../config_files/survey_year_2019_config.yml',
                             source=3, 
                             exclude_age1=True)

A check of the initialization file needs to be done!
A check of the survey year file needs to be done!
CPU times: user 8.39 ms, sys: 852 µs, total: 9.24 ms
Wall time: 8.74 ms


## Load and process input data 
* This data is stored in `survey_2019`

In [3]:
%%time 
survey_2019.load_survey_data() #file_type='biological')

CPU times: user 1.78 s, sys: 21.6 ms, total: 1.8 s
Wall time: 1.82 s


In [4]:
survey_2019.specimen_df.head()

Unnamed: 0_level_0,Sex,Length,Weight,Age
Haul,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,24.0,0.08,1.0
1,1,23.0,0.06,1.0
1,1,22.0,0.06,1.0
1,1,21.0,0.06,1.0
1,2,22.0,0.06,1.0


## Compute the normalized biomass density
* The biomass density is stored in `survey_2019`

In [5]:
%%time
survey_2019.compute_biomass_density()

CPU times: user 331 ms, sys: 3.81 ms, total: 335 ms
Wall time: 335 ms


In [6]:
survey_2019.final_biomass_table.head()

Unnamed: 0_level_0,Latitude,Longitude,Stratum,Spacing,normalized_biomass_density,geometry
Transect,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,34.397267,-121.143005,1,10.0,0.0,POINT (-121.14301 34.39727)
1,34.397391,-121.133196,1,10.0,0.0,POINT (-121.13320 34.39739)
1,34.397435,-121.123057,1,10.0,0.0,POINT (-121.12306 34.39744)
1,34.397394,-121.112871,1,10.0,0.0,POINT (-121.11287 34.39739)
1,34.397437,-121.102888,1,10.0,0.0,POINT (-121.10289 34.39744)


# Jolly-Hampton CV Analysis

* Compute the mean of the Jolly-Hampton CV value on data that has not been Kriged
* Note: the algorithm used to compute this value is random in nature

In [7]:
%%time
CV_JH_mean = survey_2019.run_cv_analysis(kriged_data=False)

CPU times: user 1.56 s, sys: 18.5 ms, total: 1.58 s
Wall time: 1.58 s


In [8]:
print(f"Mean Jolly-Hampton CV: {CV_JH_mean:.4f}")

Mean Jolly-Hampton CV: 0.1338


In [9]:
from EchoPro.cv_analysis.cv_analysis import get_transect_strata_info

In [10]:
import numpy as np 
lat_inpfc = (np.NINF, 36, 40.5, 43.000, 45.7667, 48.5, 55.0000)

In [12]:
transect_info, strata_info = get_transect_strata_info(survey_2019, lat_inpfc, kriged_data=False)

In [13]:
survey_2019.final_biomass_table.head()

Unnamed: 0_level_0,Latitude,Longitude,Stratum,Spacing,normalized_biomass_density,geometry
Transect,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,34.397267,-121.143005,1,10.0,0.0,POINT (-121.14301 34.39727)
1,34.397391,-121.133196,1,10.0,0.0,POINT (-121.13320 34.39739)
1,34.397435,-121.123057,1,10.0,0.0,POINT (-121.12306 34.39744)
1,34.397394,-121.112871,1,10.0,0.0,POINT (-121.11287 34.39739)
1,34.397437,-121.102888,1,10.0,0.0,POINT (-121.10289 34.39744)


In [14]:
transect_info.loc[0]

Unnamed: 0_level_0,max_longitude,min_longitude,mean_latitude,mean_spacing,biomass,distance,area
Transect,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,-120.441922,-121.143005,34.397152,10.0,0.0,34.809043,348.090427
2,-120.682281,-121.395492,34.563804,10.003386,0.0,35.340819,353.527864
3,-120.699543,-121.723972,34.730598,10.020945,0.0,50.660578,507.666861
4,-120.731016,-121.707938,34.897836,10.011714,0.0,48.213676,482.701558
5,-120.726321,-121.70255,35.064322,9.99037,0.0,48.082068,480.357629
6,-120.913148,-121.785844,35.230848,9.992087,0.0,42.895368,428.614233
7,-120.926083,-121.854221,35.397391,10.002347,0.0,45.527039,455.377221
8,-121.155829,-122.055597,35.56426,9.997312,0.0,44.044326,440.324873
9,-121.354667,-122.177445,35.730635,10.00605,323175.9,40.192208,402.165236
10,-121.503212,-122.216211,35.897795,10.002429,1313269.0,34.756645,347.650866


In [15]:
strata_info

Unnamed: 0_level_0,num_transects,total_transect_area,rhom,biomass,var_rhom
lat_INPFC_stratum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,10,4246.476768,,,
1,24,10042.011575,,,
2,13,5773.924105,,,
3,15,7059.964585,,,
4,15,7067.793322,,,
5,36,19318.827007,,,


In [24]:
survey_2019.krig_results_gdf

Unnamed: 0,Latitude of centroid,Longitude of centroid,Area (km^2),Cell portion,geometry,krig_biomass_vp,krig_biomass_ep,krig_biomass_eps,area_calc,krig_biomass_vals
0,49.099727,-126.024144,21.436900,1.000000,POINT (-126.02414 49.09973),0.000000,0.295113,,6.250000,0.000000
1,49.057959,-126.024127,21.436900,1.000000,POINT (-126.02413 49.05796),0.000000,0.029303,,6.250000,0.000000
2,49.016196,-126.024110,21.436900,1.000000,POINT (-126.02411 49.01620),0.000000,0.264295,,6.250000,0.000000
3,48.974438,-126.024093,21.436900,1.000000,POINT (-126.02409 48.97444),0.000000,0.566269,,6.250000,0.000000
4,48.932686,-126.024076,21.436900,1.000000,POINT (-126.02408 48.93269),50128.409807,0.717068,1.234471,6.250000,0.313303
...,...,...,...,...,...,...,...,...,...,...
19838,50.446324,-129.937551,0.000555,0.000026,POINT (-129.93755 50.44632),0.000000,1.734090,,0.000162,0.000000
19839,39.965355,-126.048345,0.000473,0.000022,POINT (-126.04835 39.96536),0.000000,1.738201,,0.000138,0.000000
19840,38.233143,-123.004918,0.000284,0.000013,POINT (-123.00492 38.23314),0.000000,0.868506,,0.000083,0.000000
19841,48.073189,-127.110901,0.000230,0.000011,POINT (-127.11090 48.07319),0.000000,1.775854,,0.000067,0.000000


In [36]:
reduced_table = survey_2019.krig_results_gdf[["Latitude of centroid", "Longitude of centroid", "krig_biomass_vals"]].copy()

In [37]:
# number of "virtual transects" within a latitude degree 
n_transect_per_lat = 5  # TODO: make this an input 

# latitude array with equal increment
reduced_table["lat_eq_inc"] = np.round(reduced_table["Latitude of centroid"]*n_transect_per_lat + 0.5)/n_transect_per_lat

In [42]:
reduced_table.set_index("lat_eq_inc", inplace=True)

In [None]:
# add columns to table
reduced_table["biomass"] = np.nan
reduced_table["distance"] = np.nan
reduced_table["area"] = np.nan

In [44]:
reduced_table.loc[34.6]

Unnamed: 0_level_0,Latitude of centroid,Longitude of centroid,krig_biomass_vals
lat_eq_inc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
34.6,34.598967,-121.554049,0.0
34.6,34.596905,-121.503391,0.0
34.6,34.594819,-121.452735,0.0
34.6,34.592710,-121.402082,0.0
34.6,34.590577,-121.351432,0.0
...,...,...,...
34.6,34.510425,-122.146779,0.0
34.6,34.595840,-122.192680,0.0
34.6,34.414815,-120.707147,0.0
34.6,34.413401,-120.665926,0.0


In [39]:
# unique equal-spacing transects 
uniq_lat_eq_inc = np.unique(reduced_table["lat_eq_inc"])

In [45]:
for ind, uniq_lat in enumerate(uniq_lat_eq_inc):
    
    print(ind, uniq_lat)

0 34.6
1 34.8
2 35.0
3 35.2
4 35.4
5 35.6
6 35.8
7 36.0
8 36.2
9 36.4
10 36.6
11 36.8
12 37.0
13 37.2
14 37.4
15 37.6
16 37.8
17 38.0
18 38.2
19 38.4
20 38.6
21 38.8
22 39.0
23 39.2
24 39.4
25 39.6
26 39.8
27 40.0
28 40.2
29 40.4
30 40.6
31 40.8
32 41.0
33 41.2
34 41.4
35 41.6
36 41.8
37 42.0
38 42.2
39 42.4
40 42.6
41 42.8
42 43.0
43 43.2
44 43.4
45 43.6
46 43.8
47 44.0
48 44.2
49 44.4
50 44.6
51 44.8
52 45.0
53 45.2
54 45.4
55 45.6
56 45.8
57 46.0
58 46.2
59 46.4
60 46.6
61 46.8
62 47.0
63 47.2
64 47.4
65 47.6
66 47.8
67 48.0
68 48.2
69 48.4
70 48.6
71 48.8
72 49.0
73 49.2
74 49.4
75 49.6
76 49.8
77 50.0
78 50.2
79 50.4
80 50.6
81 50.8
82 51.0
83 51.2
84 51.4
85 51.6
86 51.8
87 52.0
88 52.2
89 52.4
90 52.6
91 52.8
92 53.0
93 53.2
94 53.4
95 53.6
96 53.8
97 54.0
98 54.2
99 54.4
100 54.6
101 54.8
102 55.0
103 55.2
104 55.4
105 55.6


# Obtain Kriging Mesh Data

## Access Kriging mesh object
* Reads mesh data files specified by `survey_2019` 

In [17]:
krig_mesh = survey_2019.get_kriging_mesh()

## Apply coordinate transformations
* Longitude transformation
* Lat/Lon to distance

### Transect points

In [18]:
krig_mesh.apply_coordinate_transformation(coord_type='transect')

### Mesh points

In [19]:
krig_mesh.apply_coordinate_transformation(coord_type='mesh')

# Perform Ordinary Kriging of biomass density

* transformed mesh points
* semi-variogram model
* normalized biomass density 

## Initialize Kriging routine

In [20]:
kriging_params = dict(
    # kriging parameters
    k_max=10,
    k_min=3,
    R=0.0226287,
    ratio=0.001,
    
    # parameters for semi-variogram model
    s_v_params={'nugget': 0.0, 'sill': 0.95279, 'ls': 0.0075429,
                'exp_pow': 1.5, 'ls_hole_eff': 0.0},
    
    # grab appropriate semi-variogram model
    s_v_model=SV.generalized_exp_bessel
)

# uncomment to use widget values 
# kriging_params.update(semi_vario.get_params_for_kriging())

# initalize kriging routine
krig = survey_2019.get_kriging(kriging_params)

## Perform Kriging
* Also generates total biomass at mesh points

In [21]:
%%time
krig.run_biomass_kriging(krig_mesh)

CPU times: user 5.31 s, sys: 6.08 s, total: 11.4 s
Wall time: 8.04 s


In [23]:
print(f"Total Kriged Biomass Estimate: {survey_2019.krig_results_gdf.krig_biomass_vals.sum():.3f} kmt")

Total Kriged Biomass Estimate: 1725.033 kmt
