### Setup

In [None]:
# Uncomment this cell if necessary libraries are not already installed

# ! pip install --upgrade pip

# ! pip install altair
# ! pip install ipywidgets
# ! pip install pandas
# ! pip install numpy


### Import Libraries

In [1]:
from IPython.display import display
from IPython.display import Javascript

import ipywidgets as widgets
import numpy as np 
import pandas as pd
import altair as alt


In [2]:
# enable extension for widgets
! jupyter nbextension enable --py widgetsnbextension

# disable the default 5000 limit on number of rows
alt.data_transformers.disable_max_rows()


Config option `kernel_spec_manager_class` not recognized by `EnableNBExtensionApp`.
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


DataTransformerRegistry.enable('default')

### Load data from S3

In [3]:
# define CloudFront domain name (to access S3) and data

cloudfront = 'https://d3cu2src083cxg.cloudfront.net'
data_key = 'rfmo_query_result.csv'

data_location = '{}/{}'.format(cloudfront, data_key)


In [4]:
# load select columns
df_rfmo = pd.read_csv(data_location, 
                      usecols = ['year', 'name_rfmo', 'name_comm_group', 'name_fishing_entity', 'name_sector_type', 'catch_sum', 'real_value'])

# rename columns as needed
df_rfmo.rename(columns = {"name_rfmo": "rfmo", "name_comm_group": "commercial_group", "name_fishing_entity": "fishing_entity", "name_sector_type": "sector_type"}, 
               inplace = True)

# print shape (rows & columns) of dataframe
df_rfmo.shape


(1027995, 7)

In [5]:
# sample data

df_rfmo.head(3)


Unnamed: 0,rfmo,year,commercial_group,fishing_entity,sector_type,catch_sum,real_value
0,NEAFC,1950,Other fishes & inverts,Belgium,Industrial,1082.915358,1587554.0
1,NEAFC,1950,Other fishes & inverts,Belgium,Industrial,112.044683,164257.5
2,NEAFC,1950,Other fishes & inverts,Belgium,Industrial,1937.004283,2839648.0


In [6]:
# dataframe information

df_rfmo.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1027995 entries, 0 to 1027994
Data columns (total 7 columns):
 #   Column            Non-Null Count    Dtype  
---  ------            --------------    -----  
 0   rfmo              1027995 non-null  object 
 1   year              1027995 non-null  int64  
 2   commercial_group  1027995 non-null  object 
 3   fishing_entity    1027995 non-null  object 
 4   sector_type       1027995 non-null  object 
 5   catch_sum         1027995 non-null  float64
 6   real_value        1027995 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 54.9+ MB


### Java script for downloading data

In [7]:
# java script function to download a dataframe into csv

def download_csv(dataframe_to_download, output_filename = 'results.csv'):
    
    js_download = """
    var csv = '%s';
    var filename = '%s';
    
    var blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
    if (navigator.msSaveBlob) { // IE 10+
        navigator.msSaveBlob(blob, filename);
    } else {
        var link = document.createElement("a");
        if (link.download !== undefined) { // feature detection
            // Browsers that support HTML5 download attribute
            var url = URL.createObjectURL(blob);
            link.setAttribute("href", url);
            link.setAttribute("download", filename);
            link.style.visibility = 'hidden';
            document.body.appendChild(link);
            link.click();
            document.body.removeChild(link);
        }
    }
    """ % (dataframe_to_download.to_csv(index = False).replace('\n','\\n').replace("'","\'").replace('\r',''), output_filename)
    
    return Javascript(js_download)


In [8]:
# example for using java script to download dataframe

df_test = df_rfmo[(df_rfmo['year'] >= 2010) & (df_rfmo['year'] <= 2015)].groupby('year', as_index = False).agg({'catch_sum':'mean', 'real_value':'mean'}).round(2).copy()

download_csv(df_test)


<IPython.core.display.Javascript object>

In [9]:
# output file name can be defined while calling this function as shown here

download_csv(df_test, 'new_file_name.csv') 


<IPython.core.display.Javascript object>

### Data analysis

In [10]:
# Function definitions for simple data analysis scenarios

def average_catch_and_value_by_year(start_year = 2010, end_year = 2015):
    return df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year)].groupby('year', as_index = False).agg({'catch_sum':'mean', 'real_value':'mean'}).round(2).copy()

def average_catch_and_value_by_region():
    return df_rfmo.groupby('fishing_entity', as_index = False)[['catch_sum', 'real_value']].mean().copy()

def average_catch_and_value_by_year_and_region(start_year = 2010, end_year = 2015, fishing_entity = 'Canada'):
    return df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == fishing_entity)].groupby('year', as_index = False).agg({'catch_sum':'mean', 'real_value':'mean'}).round(2).copy()

def catch_and_value_by_commercial_groups():
    return df_rfmo.groupby('commercial_group', as_index = False)[['catch_sum', 'real_value']].sum().copy()

def catch_and_value_by_commercial_groups_and_year(start_year = 2010, end_year = 2015):
    return df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year)].groupby(['year', 'commercial_group'], as_index = False)[['catch_sum', 'real_value']].sum().copy()


### Average catch sum & real value by year (parameters: start year, end year)

In [11]:
# Aggregate the data by Year, and display the result dataframe

df_table1 = average_catch_and_value_by_year(2005, 2010)
df_table1


Unnamed: 0,year,catch_sum,real_value
0,2005,595.99,146377000.0
1,2006,550.3,108178600.0
2,2007,523.11,103586800.0
3,2008,488.62,102787300.0
4,2009,496.13,105533200.0
5,2010,471.13,86749630.0


In [12]:
# download the dataframe into a csv

download_csv(df_table1, 'table1.csv')


<IPython.core.display.Javascript object>

### Average catch sum & real value by fishing entity or region

In [13]:
# Aggregate the data by Fishing entity, and display the result dataframe

df_table2 = average_catch_and_value_by_region()
df_table2.head(10)


Unnamed: 0,fishing_entity,catch_sum,real_value
0,Algeria,5.422545,7949.45
1,Angola,905.784217,2310087.0
2,Argentina,0.044136,62.44362
3,Azores Isl. (Portugal),43.395882,51081.1
4,Barbados,0.024249,13.58519
5,Belgium,185.975452,907248.2
6,Belize,25.080581,51045.15
7,Brazil,169.08592,77302.56
8,Bulgaria,84.628133,46455.46
9,Canada,2.973976,1354.338


In [14]:
# download the dataframe into a csv

download_csv(df_table2, 'table2.csv')


<IPython.core.display.Javascript object>

### Average catch sum & real value by year and fishing entity (parameters: start year, end year, region)

In [15]:
# Aggregate the data by Year, filter the data to a Region, and display the result dataframe

df_table3 = average_catch_and_value_by_year_and_region(2010, 2015, 'Iceland')
df_table3


Unnamed: 0,year,catch_sum,real_value
0,2010,1214.09,71481570.0
1,2011,1138.4,117280400.0
2,2012,1357.82,252423800.0
3,2013,1297.89,210824600.0
4,2014,1033.93,114645600.0
5,2015,1579.63,252277800.0


In [16]:
# download the dataframe into a csv

download_csv(df_table3, 'table3.csv')


<IPython.core.display.Javascript object>

### Total catch sum & real value of all commercial groups

In [17]:
# Aggregate the data by Commercial group, and display the result dataframe

df_table4 = catch_and_value_by_commercial_groups()
df_table4


Unnamed: 0,commercial_group,catch_sum,real_value
0,Anchovies,3147476.0,165098100000.0
1,Cod-likes,264534600.0,57431720000000.0
2,Crustaceans,14912850.0,243320400000.0
3,Flatfishes,32004710.0,528293300000.0
4,Herring-likes,124723400.0,39909540000000.0
5,Molluscs,14251900.0,458254100000.0
6,Other fishes & inverts,90735460.0,3704571000000.0
7,Perch-likes,70881650.0,9046523000000.0
8,"Salmon, smelts, etc",71308950.0,56585650000000.0
9,Scorpionfishes,33919010.0,2181282000000.0


In [18]:
# download the dataframe into a csv

download_csv(df_table4, 'table4.csv')


<IPython.core.display.Javascript object>

### Total catch sum & real value of all commercial groups by year (parameters: start year, end year)

In [19]:
# Aggregate the data by Year and Commercial group, and display the result dataframe

df_table5 = catch_and_value_by_commercial_groups_and_year(2010, 2011)
df_table5


Unnamed: 0,year,commercial_group,catch_sum,real_value
0,2010,Anchovies,19339.53,401948800.0
1,2010,Cod-likes,3081363.0,560213600000.0
2,2010,Crustaceans,239099.2,3289283000.0
3,2010,Flatfishes,341633.8,3186101000.0
4,2010,Herring-likes,1969670.0,945253500000.0
5,2010,Molluscs,193269.7,3390055000.0
6,2010,Other fishes & inverts,1537563.0,37468450000.0
7,2010,Perch-likes,1277467.0,128185200000.0
8,2010,"Salmon, smelts, etc",462386.2,48253510000.0
9,2010,Scorpionfishes,218162.3,6276247000.0


In [20]:
# download the dataframe into a csv

download_csv(df_table5, 'table5.csv')


<IPython.core.display.Javascript object>

# Visualizations

### Scripts for widgets, variables, and charts

In [21]:

# Chart 1 

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 1
start_year_dropdown1 = widgets.Dropdown(
    options = years,
    value = 2010,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 1
end_year_dropdown1 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 1
region_dropdown1 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

def display_chart_1_filters():
       
    # display both start year and end year dropdowns, and also region
    display(start_year_dropdown1)
    display(end_year_dropdown1)
    display(region_dropdown1)

def display_chart_1():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_1_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown1.value
    end_year = end_year_dropdown1.value
    region = region_dropdown1.value
    
    # create a temporary dataframe with above filters
    df1 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

    # Altair visualization based on above dataframe
    chart1 = alt.Chart(df1).mark_bar().encode(
                x = alt.X('catch_sum', 
                          title = 'Total catch', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 10)),
                y = alt.Y('year:N', 
                          title = 'Year', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                color = alt.Color('commercial_group', 
                                  legend = alt.Legend(title = 'Commercial Group', 
                                                      titleFontSize = 12, 
                                                      labelFontSize = 12, 
                                                      labelColor = 'steelblue', 
                                                      titleColor = 'steelblue')),
                tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum']
            ).properties(
                width = 600,
                height = 400,
                title = 'Total catch by Commercial Group and Year'
            ).configure_legend(orient = 'right'
            ).configure_title(fontSize = 20, 
                              color = 'teal'
            ).configure_axis(titleColor = 'steelblue')
    
    return chart1

def download_chart_1():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_1_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown1.value
    end_year = end_year_dropdown1.value
    region = region_dropdown1.value
    
    # create a temporary dataframe with above filters
    df1 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()
    
    return df1


In [22]:

# Chart 2

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 2
start_year_dropdown2 = widgets.Dropdown(
    options = years,
    value = 2010,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 2
end_year_dropdown2 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 2
region_dropdown2 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

def display_chart_2_filters():
       
    # display both start year and end year dropdowns, and also region
    display(start_year_dropdown2)
    display(end_year_dropdown2)
    display(region_dropdown2)

def display_chart_2():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_2_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown2.value
    end_year = end_year_dropdown2.value
    region = region_dropdown2.value
    
    # create a temporary dataframe with above filters
    df2 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group', 'sector_type'], as_index=False)[['catch_sum']].sum().copy()

    # Altair visualization based on above dataframe
    chart2 = alt.Chart(df2).mark_bar().encode(
                x = alt.X('year:N', 
                          title = 'Year', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                y = alt.Y('sum(catch_sum)', 
                          title = 'Total Catch', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 10)),
                color = alt.Color('commercial_group', 
                                  legend = alt.Legend(title = 'Commercial Group', 
                                                      titleFontSize = 12, 
                                                      labelFontSize = 12, 
                                                      labelColor = 'steelblue', 
                                                      titleColor = 'steelblue')),
                tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum'],
                facet = alt.Facet('sector_type', 
                                  title = None, 
                                  columns = 2, 
                                  header = alt.Header(labelFontSize=14))
            ).properties(
                width = 300,
                height = 300,
                title = 'Total Catch by Commercial groups and Year - for all Sector types'
            ).configure_legend(orient = 'right'
            ).configure_title(fontSize = 20, 
                              color = 'teal'
            ).configure_axis(titleColor = 'steelblue'
            ).resolve_scale(y = 'independent')
    
    return chart2

def download_chart_2():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_2_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown2.value
    end_year = end_year_dropdown2.value
    region = region_dropdown2.value
    
    # create a temporary dataframe with above filters
    df2 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group', 'sector_type'], as_index=False)[['catch_sum']].sum().copy()
    
    return df2


In [23]:

# Chart 3

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 3
start_year_dropdown3 = widgets.Dropdown(
    options = years,
    value = 2010,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 3
end_year_dropdown3 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 3
region_dropdown3 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

def display_chart_3_filters():
       
    # display both start year and end year dropdowns, and also region
    display(start_year_dropdown3)
    display(end_year_dropdown3)
    display(region_dropdown3)

def display_chart_3():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_3_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown3.value
    end_year = end_year_dropdown3.value
    region = region_dropdown3.value
    
    # create a temporary dataframe with above filters
    df3 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)][['year', 'fishing_entity', 'commercial_group', 'catch_sum']].copy()

    # Altair visualization based on above dataframe
    chart3 = alt.Chart(df3).mark_boxplot().encode(
                x = alt.X('catch_sum', 
                          title = 'Catch Sum', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                y = alt.Y('commercial_group', 
                          title = 'Commercial Group', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                color = alt.Color('commercial_group', 
                                  legend = alt.Legend(title = 'Commercial Group', 
                                                      titleFontSize = 12, 
                                                      labelFontSize = 12, 
                                                      labelColor = 'steelblue', 
                                                      titleColor = 'steelblue')),
                tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum'],
                size = 'count()'
            ).properties(
                width = 600,
                height = 500,
                title = 'Distribution of Catch Sum by Commercial groups'
            ).configure_legend(orient = 'right'
            ).configure_title(fontSize = 20, 
                              color = 'teal'
            ).configure_axis(titleColor = 'steelblue')

    return chart3

def download_chart_3():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_3_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown3.value
    end_year = end_year_dropdown3.value
    region = region_dropdown3.value
    
    # create a temporary dataframe with above filters
    df3 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)][['year', 'fishing_entity', 'commercial_group', 'catch_sum']].copy()

    return df3


In [24]:

# Chart 4

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 4
start_year_dropdown4 = widgets.Dropdown(
    options = years,
    value = 2000,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 4
end_year_dropdown4 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 4
region_dropdown4 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

# define radio button to select the measure for chart 4
measure_chart_4 = widgets.RadioButtons(
    options = [('Total Catch', 'catch_sum'), ('Real Value', 'real_value')],
    value = 'catch_sum',
    description = 'Measure :',
    disabled = False
)

def display_chart_4_filters():
       
    # display both start year and end year dropdowns, region and measure selection
    display(start_year_dropdown4)
    display(end_year_dropdown4)
    display(region_dropdown4)
    display(measure_chart_4)

def display_chart_4():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_4_filters()
    
    # get values selected in the start year and end year dropdowns, also from region
    start_year = start_year_dropdown4.value
    end_year = end_year_dropdown4.value
    region = region_dropdown4.value
    
    if measure_chart_4.value == 'real_value':
    
        # create a temporary dataframe with above filters
        df4 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['real_value']].sum().copy()

        # Altair visualization based on above dataframe
        chart4 = alt.Chart(df4).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        y = alt.Y('sum(real_value)', 
                                  title = 'Real Value', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'real_value']
                    ).properties(
                        width = 800,
                        height = 600,
                        title = 'Real Value by Commercial Group and Year'
                    ).configure_legend(orient = 'bottom'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')
    else: 

        # create a temporary data frame with filters applied
        df4 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

        # Altair visualization based on above dataframe
        chart4 = alt.Chart(df4).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        y = alt.Y('sum(catch_sum)', 
                                  title = 'Total Catch', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum']
                    ).properties(
                        width = 800,
                        height = 600,
                        title = 'Total Catch by Commercial Group and Year'
                    ).configure_legend(orient = 'bottom'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')

    # return the chart based on measure selection by the user
    return chart4

def download_chart_4():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_4_filters()
    
    # get values selected in the start year and end year dropdowns, also from region
    start_year = start_year_dropdown4.value
    end_year = end_year_dropdown4.value
    region = region_dropdown4.value
    
    if measure_chart_4.value == 'real_value':
    
        # create a temporary dataframe with above filters
        df4 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['real_value']].sum().copy()

    else: 

        # create a temporary data frame with filters applied
        df4 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

    # return the dataframe based on measure selection by the user
    return df4


In [25]:

# Chart 5

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 5
start_year_dropdown5 = widgets.Dropdown(
    options = years,
    value = 1950,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 5
end_year_dropdown5 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 5
region_dropdown5 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

# define radio button to select the measure for chart 5
measure_chart_5 = widgets.RadioButtons(
    options = [('Total Catch', 'catch_sum'), ('Real Value', 'real_value')],
    value = 'catch_sum',
    description = 'Measure :',
    disabled = False
)

def display_chart_5_filters():
       
    # display both start year and end year dropdowns, region and measure selection
    display(start_year_dropdown5)
    display(end_year_dropdown5)
    display(region_dropdown5)
    display(measure_chart_5)

def display_chart_5():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_5_filters()
    
    # get values selected in the start year and end year dropdowns, also from region
    start_year = start_year_dropdown5.value
    end_year = end_year_dropdown5.value
    region = region_dropdown5.value
    
    if measure_chart_5.value == 'real_value':
    
        # create a temporary dataframe with above filters
        df5 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['real_value']].sum().copy()

        # Altair visualization based on above dataframe
        chart5 = alt.Chart(df5).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        y = alt.Y('sum(real_value)', 
                                  title = 'Real Value', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        row = "commercial_group:N",
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'real_value']
                    ).properties(
                        width = 600,
                        height = 70,
                        title = 'Real Value by Commercial Group and Year'
                    ).resolve_scale(y='independent'
                    ).configure_legend(orient = 'right'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')
    else:

        # create a temporary data frame with filters applied
        df5 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

        # Altair visualization based on above dataframe
        chart5 = alt.Chart(df5).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        y = alt.Y('sum(catch_sum)', 
                                  title = 'Total Catch', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        row = "commercial_group:N",
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum']
                    ).properties(
                        width = 600,
                        height = 70,
                        title = 'Total Catch by Commercial Group and Year'
                    ).resolve_scale(y='independent'
                    ).configure_legend(orient = 'right'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')

    # return the chart based on measure selection by the user
    return chart5

def download_chart_5():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_5_filters()
    
    # get values selected in the start year and end year dropdowns, also from region
    start_year = start_year_dropdown5.value
    end_year = end_year_dropdown5.value
    region = region_dropdown5.value
    
    if measure_chart_5.value == 'real_value':
    
        # create a temporary dataframe with above filters
        df5 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['real_value']].sum().copy()

    else:

        # create a temporary data frame with filters applied
        df5 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

    # return the dataframe based on measure selection by the user
    return df5


### Total catch of Commercial groups by Year

In [26]:
display_chart_1()

Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

In [27]:
# download data behind the chart

chart1_data = download_chart_1()
download_csv(chart1_data, 'chart1_data.csv')


Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

<IPython.core.display.Javascript object>

### Total catch of Commercial groups by Year - for all Sector Types

In [28]:
display_chart_2()

Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

In [29]:
# download data behind the chart

chart2_data = download_chart_2()
download_csv(chart2_data, 'chart2_data.csv')


Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

<IPython.core.display.Javascript object>

### Distribution of Catch sum by Commercial groups

In [30]:
display_chart_3()

Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

In [31]:
# download data behind the chart

chart3_data = download_chart_3()
download_csv(chart3_data, 'chart3_data.csv')


Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

<IPython.core.display.Javascript object>

### Total Catch (or) Real value by Commercial Group and Year

In [32]:
display_chart_4()

Dropdown(description='Start Year :', index=50, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

RadioButtons(description='Measure :', options=(('Total Catch', 'catch_sum'), ('Real Value', 'real_value')), va…

In [33]:
# download data behind the chart

chart4_data = download_chart_4()
download_csv(chart4_data, 'chart4_data.csv')


Dropdown(description='Start Year :', index=50, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

RadioButtons(description='Measure :', options=(('Total Catch', 'catch_sum'), ('Real Value', 'real_value')), va…

<IPython.core.display.Javascript object>

### Total Catch (or) Real value by Year for every Commercial Group

In [34]:
display_chart_5()

Dropdown(description='Start Year :', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960…

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

RadioButtons(description='Measure :', options=(('Total Catch', 'catch_sum'), ('Real Value', 'real_value')), va…

In [35]:
# download data behind the chart

chart5_data = download_chart_5()
download_csv(chart5_data, 'chart5_data.csv')


Dropdown(description='Start Year :', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960…

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

RadioButtons(description='Measure :', options=(('Total Catch', 'catch_sum'), ('Real Value', 'real_value')), va…

<IPython.core.display.Javascript object>