# EchoPro Python Workflow <a class="tocSkip">

# Import libraries and configure the Jupyter notebook

In [1]:
# libraries used in the Notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 

# Python version of EchoPro
import EchoPro

# Allows us to grab the SemiVariogram class so we can use its models
from EchoPro.semivariogram import SemiVariogram as SV

# Allows us to easily use matplotlib widgets in our Notebook
%matplotlib widget
# %matplotlib inline

In [2]:
%%time
survey_2019 = EchoPro.Survey(init_file_path='../config_files/initialization_config.yml',
                             survey_year_file_path='../config_files/survey_year_2019_config.yml',
                             source=3, 
                             exclude_age1=True)

A check of the initialization file needs to be done!
A check of the survey year file needs to be done!
CPU times: user 10.8 ms, sys: 1.74 ms, total: 12.5 ms
Wall time: 11.3 ms


In [3]:
%%time 
survey_2019.load_survey_data() 

CPU times: user 1.8 s, sys: 28.1 ms, total: 1.83 s
Wall time: 1.85 s


In [4]:
TX_selected = [4,5,11,14,17,19,20,22,34,39,47,49,53,55,61,69,79,82,85,
               87,89,93,95,97,100,103,105,111,123,125,131,133,135,137,
               140,13,18,31,50,73,74,77,80,38,66]

In [33]:
len(survey_2019.gear_df.index.unique())

100

In [46]:
# calculate stratum based off of Latitdue column in nasc_df
lat_inpfc = (np.NINF, 36, 40.5, 43.000, 45.7667, 48.5, 55.0000)
nasc_calc_stratum = pd.cut(survey_2019.nasc_df["Latitude"],
                           lat_inpfc,
                           labels=range(len(lat_inpfc) - 1),
                           right=False)

In [64]:
are_they_equal = nasc_calc_stratum.values.to_numpy() == survey_2019.nasc_df["Stratum"].values
are_they_equal

array([False, False, False, ..., False, False, False])

In [67]:
num_equal = np.count_nonzero(are_they_equal)
num_equal

1253

In [70]:
num_not_equal = are_they_equal.size - num_equal
num_not_equal

8025

In [78]:
print(f"Percent that do not equal algorithmic way = {(num_not_equal/are_they_equal.size)*100} %")

Percent that do not equal algorithmic way = 86.49493425307179 %


## Compute the normalized biomass density
* The biomass density is stored in `survey_2019`

In [5]:
%%time
survey_2019.compute_biomass_density(selected_transects=TX_selected)

CPU times: user 133 ms, sys: 6.14 ms, total: 139 ms
Wall time: 145 ms


In [6]:
survey_2019.bio_calc.final_biomass_table.head()

Unnamed: 0_level_0,Latitude,Longitude,Stratum,Spacing,normalized_biomass_density,geometry
Transect,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13,36.397522,-122.363901,4,9.982472,6825.04289,POINT (-122.36390 36.39752)
13,36.397543,-122.3535,4,9.982472,35925.942494,POINT (-122.35350 36.39754)
13,36.397519,-122.343364,4,9.982472,38194.892945,POINT (-122.34336 36.39752)
13,36.397537,-122.33296,4,9.982472,48387.470681,POINT (-122.33296 36.39754)
13,36.397492,-122.322726,4,9.982472,94905.891709,POINT (-122.32273 36.39749)


In [12]:
survey_2019.bio_calc.final_biomass_table["Stratum"].groupby(level=0).mean()

Transect
13     4.000000
14     4.000000
15     4.000000
17     4.000000
18     4.000000
19     4.000000
20     4.000000
21     4.250000
22     5.000000
31     5.250000
38     5.000000
39     5.000000
47     5.000000
49     5.000000
50     5.000000
51     5.000000
53     5.000000
54     5.000000
55     5.000000
62     5.000000
63     5.000000
65     5.000000
66     5.000000
67     5.000000
69     5.000000
70     5.000000
71     5.000000
73     5.000000
76     5.000000
78     5.000000
79     5.000000
80     5.000000
81     5.000000
83     7.000000
85     7.000000
86     7.000000
87     7.390244
89     7.000000
90     7.000000
91     7.000000
92     7.000000
94     7.000000
95     7.000000
97     7.000000
98     7.000000
99     7.000000
100    7.000000
Name: Stratum, dtype: float64

In [10]:
survey_2019.bio_calc.final_biomass_table.index.unique()

Int64Index([ 13,  14,  15,  17,  18,  19,  21,  20,  22,  31,  38,  39,  47,
             49,  50,  51,  53,  54,  55,  62,  63,  65,  66,  67,  69,  70,
             71,  73,  76,  78,  79,  80,  81,  83,  85,  86,  87,  89,  91,
             92,  90,  94,  95,  97,  98,  99, 100],
           dtype='int64', name='Transect')

In [8]:
survey_2019.bio_calc.final_biomass_table["normalized_biomass_density"].sum()
# normal 298438694.4207147

100846365.90314913

# Jolly-Hampton CV Analysis

* Compute the mean of the Jolly-Hampton CV value on data that has not been Kriged
* Note: the algorithm used to compute this value is random in nature

In [9]:
%%time
CV_JH_mean = survey_2019.run_cv_analysis(kriged_data=False)

ValueError: Grouper and axis must be same length

In [None]:
print(f"Mean Jolly-Hampton CV: {CV_JH_mean:.4f}")

# Obtain Kriging Mesh Data

## Access Kriging mesh object
* Reads mesh data files specified by `survey_2019` 

In [None]:
krig_mesh = survey_2019.get_kriging_mesh()

### Plot the Mesh, Transects and smoothed isobath contour

* Generate interactive map using the Folium package
* Mesh points are in gray
* Transect points are represented by a changing color gradient
* Smoothed contour points (200m isobath) are in blue 

In [None]:
fmap = krig_mesh.plot_layered_points()
fmap

## Apply coordinate transformations
* Longitude transformation
* Lat/Lon to distance

### Transect points

In [None]:
krig_mesh.apply_coordinate_transformation(coord_type='transect')

### Mesh points

In [None]:
krig_mesh.apply_coordinate_transformation(coord_type='mesh')

In [None]:
# plot the transformed mesh points 
plt.plot(krig_mesh.transformed_mesh_df.x_mesh, 
         krig_mesh.transformed_mesh_df.y_mesh, 'r*', markersize=1.25)
plt.show()

# Compute biomass density Semi-Variogram and fit a model

* Compute the normalized semi-variogram using the normalized biomass density
* Fit a model to the semi-variogram values

## Compute the semi-variogram

### Initialize semi-variogram calculation
* Transformed transect points
* Parameters specific to semi-variogram algorithm

In [None]:
semi_vario = survey_2019.get_semi_variogram(
    krig_mesh,
    params=dict(nlag=30, lag_res=0.002)
)

### Compute the normalized semi-variogram

In [None]:
%%time
semi_vario.calculate_semi_variogram()

## Fit a model to the semi-variogram

* A widget to easily fit the model

In [None]:
semi_vario.get_widget()

# Perform Ordinary Kriging of biomass density

* transformed mesh points
* semi-variogram model
* normalized biomass density 

## Initialize Kriging routine

In [None]:
kriging_params = dict(
    # kriging parameters
    k_max=10,
    k_min=3,
    R=0.0226287,
    ratio=0.001,
    
    # parameters for semi-variogram model
    s_v_params={'nugget': 0.0, 'sill': 0.95279, 'ls': 0.0075429,
                'exp_pow': 1.5, 'ls_hole_eff': 0.0},
    
    # grab appropriate semi-variogram model
    s_v_model=SV.generalized_exp_bessel
)

# uncomment to use widget values 
# kriging_params.update(semi_vario.get_params_for_kriging())

# initalize kriging routine
krig = survey_2019.get_kriging(kriging_params)

## Perform Kriging
* Also generates total biomass at mesh points

In [None]:
%%time
krig.run_biomass_kriging(krig_mesh)

In [None]:
print(f"Total Kriged Biomass Estimate: {1e-6*survey_2019.krig_results_gdf.krig_biomass_vals.sum():.3f} kmt")

## Plot Kriged Biomass estimate in kmt

In [None]:
# plot all mesh points
survey_2019.krig_results_gdf.krig_biomass_vals = 1e-6 * survey_2019.krig_results_gdf.krig_biomass_vals
krig.plot_kriging_results(survey_2019.krig_results_gdf, krig_field_name="krig_biomass_vals")