# EchoPro Transect Subset Workflow

## Import libraries and configure the Jupyter notebook

In [None]:
# libraries used in the Notebook
import matplotlib.pyplot as plt
import math 
import numpy as np 

# Python version of EchoPro
import EchoPro

# Allows us to grab the SemiVariogram class so we can use its models
from EchoPro.computation import SemiVariogram as SV

# obtain all visualization routines
from EchoPro.visualization import plot_layered_points, plot_kriging_results

# Allows us to easily use matplotlib widgets in our Notebook
# %matplotlib widget

## Set up  EchoPro for a specific survey year

### Initialize EchoPro object  using configuration files

* `initialization_config.yml` -- parameters independent of survey year
* `survey_year_2019_config.yml` -- parameters specific to survey year
* `source` -- Define the region of data to use e.g. US, CAN, US & CAN
* `exclude_age1` -- States whether age 1 hake should be included in analysis.

In [None]:
%%time
survey_2019 = EchoPro.Survey(init_file_path='../config_files/initialization_config.yml',
                             survey_year_file_path='../config_files/survey_year_2019_config.yml',
                             source=3, 
                             exclude_age1=True)

### Load and process input data 
* This data is stored in `survey_2019`

In [None]:
%%time 
survey_2019.load_survey_data()

In [None]:
survey_2019.nasc_df.head()

### Select a subset of the available transects to analyze

In [None]:
# obtain all unique transects in nasc_df
unique_transects = survey_2019.nasc_df.index.unique().values

In [None]:
# set the percentage of transects that should be removed
removal_percentage = 50.0

# determine the number of transects that should be selected
num_sel_transects = math.floor(len(unique_transects) * (1.0 - removal_percentage / 100.0))

# initialize the random number generator object and fix the seed
rng = np.random.default_rng(seed=1234)

# randomly select transects without replacement
selected_transects = list(rng.choice(unique_transects, num_sel_transects, replace=False))

### Compute the areal biomass density on subset of transects
* The areal biomass density is stored in `survey_2019.bio_calc.transect_results_gdf` as `biomass_density_adult`

In [None]:
%%time
survey_2019.compute_transect_results(selected_transects=selected_transects)

In [None]:
survey_2019.bio_calc.transect_results_gdf.head()

In [None]:
print(f"Total Biomass Estimate without Kriging: {1e-6*survey_2019.bio_calc.transect_results_gdf.biomass_adult.sum():.3f} kmt")

**Note:**

**After the biomass density has been calculated using the selected transects, all steps that we have previously ran can be completed. However, it is important to note that it is suggested that one computes the semi-variogram parameters using the full data set, rather than a subset of transects. To remind the user of this, a warning will be raised if the user chooses to run the semi-variogram routine (we will demonstrate this later).** 

## Jolly-Hampton CV Analysis

* Compute the mean of the Jolly-Hampton CV value on data that has not been Kriged
* Note: the algorithm used to compute this value is random in nature

In [None]:
%%time
CV_JH_mean = survey_2019.run_cv_analysis(kriged_data=False)

In [None]:
print(f"Mean Jolly-Hampton CV: {CV_JH_mean:.4f}")

## Obtain Kriging Mesh Data

### Access Kriging mesh object
* Reads mesh data files specified by `survey_2019` 

In [None]:
krig_mesh = survey_2019.get_kriging_mesh()

#### Plot the Mesh, Transects and smoothed isobath contour

* Generate interactive map using the Folium package
* Mesh points are in gray
* Transect points are represented by a changing color gradient
* Smoothed contour points (200m isobath) are in blue 

In [None]:
fmap = plot_layered_points(krig_mesh, plot_mesh_points=False)
fmap

## Apply coordinate transformations
* Longitude transformation
* Lat/Lon to distance

### Transect points

In [None]:
krig_mesh.apply_coordinate_transformation(coord_type='transect')

### Mesh points

In [None]:
krig_mesh.apply_coordinate_transformation(coord_type='mesh')

In [None]:
# plot the transformed mesh points 
plt.plot(krig_mesh.transformed_mesh_df.x_mesh, 
         krig_mesh.transformed_mesh_df.y_mesh, 'r*', markersize=1.25)
plt.show()

## Try to initialize the Semi-Variogram

In [None]:
semi_vario = survey_2019.get_semi_variogram(
    krig_mesh,
    params=dict(nlag=30, lag_res=0.002)
)

**As expected, a warning pops up and reminds us not to run the semi-variogram calculation and model fitting using the data that was generated from a subset of the full data set.**

## Perform Ordinary Kriging of areal biomass density

* transformed mesh points
* semi-variogram model
* areal biomass density

### Initialize Kriging routine

In [None]:
kriging_params = dict(
    # kriging parameters
    k_max=10,
    k_min=3,
    R=0.0226287,
    ratio=0.001,
    
    # parameters for semi-variogram model
    s_v_params={'nugget': 0.0, 'sill': 0.95279, 'ls': 0.0075429,
                'exp_pow': 1.5, 'ls_hole_eff': 0.0},
    
    # grab appropriate semi-variogram model
    s_v_model=SV.generalized_exp_bessel
)

# initalize kriging routine
krig = survey_2019.get_kriging(kriging_params)

### Perform Kriging
* Also generates total biomass at mesh points

In [None]:
%%time
krig.run_biomass_kriging(krig_mesh)

In [None]:
krig_results = survey_2019.bio_calc.kriging_results_gdf

Convert from kg to kmt

In [None]:
print(f"Total Kriged Biomass Estimate: {1e-6*krig_results.biomass_adult.sum():.3f} kmt")

## Jolly-Hampton CV Analysis for Kriged data 

* Compute the mean of the Jolly-Hampton CV value on data that has not been Kriged
* Note: the algorithm used to compute this value is random in nature

In [None]:
CV_JH_mean_kriged = survey_2019.run_cv_analysis(kriged_data=True)

In [None]:
print(f"Mean Jolly-Hampton CV for data with Kriging: {CV_JH_mean_kriged:.4f}")

### Plot Kriged Biomass estimate in kmt

In [None]:
# plot mesh points with biomass values > 0
krig_results.biomass_adult = 1e-6 * krig_results.biomass_adult
plot_kriging_results(krig_results, krig_field_name="biomass_adult", greater_than_0=True)