### Setup

In [1]:
# Uncomment this cell if necessary libraries are not already installed

# ! pip install --upgrade pip

# ! pip install altair
# ! pip install ipywidgets
# ! pip install pandas
# ! pip install numpy


### Import Libraries

In [2]:
from IPython.display import display
from IPython.display import Javascript

import ipywidgets as widgets
import numpy as np 
import pandas as pd
import altair as alt


In [3]:
# enable extension for widgets
! jupyter nbextension enable --py widgetsnbextension

# disable the default 5000 limit on number of rows
alt.data_transformers.disable_max_rows()

# enable renderer
alt.renderers.enable('default')


Config option `kernel_spec_manager_class` not recognized by `EnableNBExtensionApp`.
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


RendererRegistry.enable('default')

### Load data from S3

In [4]:
# define CloudFront domain name (to access S3) and data

cloudfront = 'https://d3cu2src083cxg.cloudfront.net'
data_key = 'rfmo_query_result.csv'

data_location = '{}/{}'.format(cloudfront, data_key)


In [5]:
# load select columns
df_rfmo = pd.read_csv(data_location, usecols = ['year', 'name_rfmo', 'name_comm_group', 'name_fishing_entity', 'name_sector_type', 'catch_sum', 'real_value'])

# rename columns as needed
df_rfmo.rename(columns = {"name_rfmo": "rfmo", "name_comm_group": "commercial_group", "name_fishing_entity": "fishing_entity", "name_sector_type": "sector_type"}, 
               inplace = True)

# print shape (rows & columns) of dataframe
df_rfmo.shape


(1027995, 7)

In [6]:
# sample data

df_rfmo.head(3)


Unnamed: 0,rfmo,year,commercial_group,fishing_entity,sector_type,catch_sum,real_value
0,NEAFC,1950,Other fishes & inverts,Belgium,Industrial,1082.915358,1587554.0
1,NEAFC,1950,Other fishes & inverts,Belgium,Industrial,112.044683,164257.5
2,NEAFC,1950,Other fishes & inverts,Belgium,Industrial,1937.004283,2839648.0


In [7]:
# dataframe information

df_rfmo.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1027995 entries, 0 to 1027994
Data columns (total 7 columns):
 #   Column            Non-Null Count    Dtype  
---  ------            --------------    -----  
 0   rfmo              1027995 non-null  object 
 1   year              1027995 non-null  int64  
 2   commercial_group  1027995 non-null  object 
 3   fishing_entity    1027995 non-null  object 
 4   sector_type       1027995 non-null  object 
 5   catch_sum         1027995 non-null  float64
 6   real_value        1027995 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 54.9+ MB


# Visualizations

### Scripts for widgets, variables, and charts

In [8]:

# Chart 1 

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 1
start_year_dropdown1 = widgets.Dropdown(
    options = years,
    value = 2010,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 1
end_year_dropdown1 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 1
region_dropdown1 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

def display_chart_1_filters():
       
    # display both start year and end year dropdowns, and also region
    display(start_year_dropdown1)
    display(end_year_dropdown1)
    display(region_dropdown1)

def display_chart_1():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_1_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown1.value
    end_year = end_year_dropdown1.value
    region = region_dropdown1.value
    
    # create a temporary dataframe with above filters
    df1 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

    # Altair visualization based on above dataframe
    chart1 = alt.Chart(df1).mark_bar().encode(
                x = alt.X('catch_sum', 
                          title = 'Total catch', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 10)),
                y = alt.Y('year:N', 
                          title = 'Year', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                color = alt.Color('commercial_group', 
                                  legend = alt.Legend(title = 'Commercial Group', 
                                                      titleFontSize = 12, 
                                                      labelFontSize = 12, 
                                                      labelColor = 'steelblue', 
                                                      titleColor = 'steelblue')),
                tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum']
            ).properties(
                width = 600,
                height = 400,
                title = 'Total catch by Commercial Group and Year'
            ).configure_legend(orient = 'right'
            ).configure_title(fontSize = 20, 
                              color = 'teal'
            ).configure_axis(titleColor = 'steelblue')
    
    return chart1


In [9]:

# Chart 2

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 2
start_year_dropdown2 = widgets.Dropdown(
    options = years,
    value = 2010,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 2
end_year_dropdown2 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 2
region_dropdown2 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

def display_chart_2_filters():
       
    # display both start year and end year dropdowns, and also region
    display(start_year_dropdown2)
    display(end_year_dropdown2)
    display(region_dropdown2)

def display_chart_2():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_2_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown2.value
    end_year = end_year_dropdown2.value
    region = region_dropdown2.value
    
    # create a temporary dataframe with above filters
    df2 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group', 'sector_type'], as_index=False)[['catch_sum']].sum().copy()

    # Altair visualization based on above dataframe
    chart2 = alt.Chart(df2).mark_bar().encode(
                x = alt.X('year:N', 
                          title = 'Year', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                y = alt.Y('sum(catch_sum)', 
                          title = 'Total Catch', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 10)),
                color = alt.Color('commercial_group', 
                                  legend = alt.Legend(title = 'Commercial Group', 
                                                      titleFontSize = 12, 
                                                      labelFontSize = 12, 
                                                      labelColor = 'steelblue', 
                                                      titleColor = 'steelblue')),
                tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum'],
                facet = alt.Facet('sector_type', 
                                  title = None, 
                                  columns = 2, 
                                  header = alt.Header(labelFontSize=14))
            ).properties(
                width = 300,
                height = 300,
                title = 'Total Catch by Commercial groups and Year - for all Sector types'
            ).configure_legend(orient = 'right'
            ).configure_title(fontSize = 20, 
                              color = 'teal'
            ).configure_axis(titleColor = 'steelblue'
            ).resolve_scale(y = 'independent')
    
    return chart2


In [10]:

# Chart 3

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 3
start_year_dropdown3 = widgets.Dropdown(
    options = years,
    value = 2010,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 3
end_year_dropdown3 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 3
region_dropdown3 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

def display_chart_3_filters():
       
    # display both start year and end year dropdowns, and also region
    display(start_year_dropdown3)
    display(end_year_dropdown3)
    display(region_dropdown3)

def display_chart_3():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_3_filters()
    
    # get values selected in the start year and end year dropdowns, and also region
    start_year = start_year_dropdown3.value
    end_year = end_year_dropdown3.value
    region = region_dropdown3.value
    
    # create a temporary dataframe with above filters
    df3 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)][['year', 'fishing_entity', 'commercial_group', 'catch_sum']].copy()

    # Altair visualization based on above dataframe
    chart3 = alt.Chart(df3).mark_boxplot().encode(
                x = alt.X('catch_sum', 
                          title = 'Catch Sum', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                y = alt.Y('commercial_group', 
                          title = 'Commercial Group', 
                          axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 12)),
                color = alt.Color('commercial_group', 
                                  legend = alt.Legend(title = 'Commercial Group', 
                                                      titleFontSize = 12, 
                                                      labelFontSize = 12, 
                                                      labelColor = 'steelblue', 
                                                      titleColor = 'steelblue')),
                tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum'],
                size = 'count()'
            ).properties(
                width = 600,
                height = 500,
                title = 'Distribution of Catch Sum by Commercial groups'
            ).configure_legend(orient = 'right'
            ).configure_title(fontSize = 20, 
                              color = 'teal'
            ).configure_axis(titleColor = 'steelblue')

    return chart3


In [11]:

# Chart 4

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 4
start_year_dropdown4 = widgets.Dropdown(
    options = years,
    value = 2000,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 4
end_year_dropdown4 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 4
region_dropdown4 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

# define radio button to select the measure for chart 4
measure_chart_4 = widgets.RadioButtons(
    options = [('Total Catch', 'catch_sum'), ('Real Value', 'real_value')],
    value = 'catch_sum',
    description = 'Measure :',
    disabled = False
)

def display_chart_4_filters():
       
    # display both start year and end year dropdowns, region and measure selection
    display(start_year_dropdown4)
    display(end_year_dropdown4)
    display(region_dropdown4)
    display(measure_chart_4)

def display_chart_4():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_4_filters()
    
    # get values selected in the start year and end year dropdowns, also from region
    start_year = start_year_dropdown4.value
    end_year = end_year_dropdown4.value
    region = region_dropdown4.value
    
    if measure_chart_4.value == 'real_value':
    
        # create a temporary dataframe with above filters
        df4 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['real_value']].sum().copy()

        # Altair visualization based on above dataframe
        chart4 = alt.Chart(df4).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        y = alt.Y('sum(real_value)', 
                                  title = 'Real Value', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'real_value']
                    ).properties(
                        width = 800,
                        height = 600,
                        title = 'Real Value by Commercial Group and Year'
                    ).configure_legend(orient = 'bottom'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')
    else: 

        # create a temporary data frame with filters applied
        df4 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

        # Altair visualization based on above dataframe
        chart4 = alt.Chart(df4).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        y = alt.Y('sum(catch_sum)', 
                                  title = 'Total Catch', 
                                  axis = alt.Axis(titleFontSize = 14, 
                                                  labelFontSize = 14)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum']
                    ).properties(
                        width = 800,
                        height = 600,
                        title = 'Total Catch by Commercial Group and Year'
                    ).configure_legend(orient = 'bottom'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')

    # return the chart based on measure selection by the user
    return chart4


In [12]:

# Chart 5

# get unique list of years from the dataframe
years = list(df_rfmo['year'].unique())

# get unique list of regions from the dataframe
regions = list(df_rfmo['fishing_entity'].unique())

# define start year dropdown for chart 5
start_year_dropdown5 = widgets.Dropdown(
    options = years,
    value = 1950,
    description = 'Start Year :',
    disabled = False,
)

# define end year dropdown for chart 5
end_year_dropdown5 = widgets.Dropdown(
    options = years,
    value = 2015,
    description = 'End Year :',
    disabled = False,
)

# define region dropdown for chart 5
region_dropdown5 = widgets.Dropdown(
    options = regions,
    value = 'Iceland',
    description = 'Region',
    disabled = False,
)

# define radio button to select the measure for chart 5
measure_chart_5 = widgets.RadioButtons(
    options = [('Total Catch', 'catch_sum'), ('Real Value', 'real_value')],
    value = 'catch_sum',
    description = 'Measure :',
    disabled = False
)

def display_chart_5_filters():
       
    # display both start year and end year dropdowns, region and measure selection
    display(start_year_dropdown5)
    display(end_year_dropdown5)
    display(region_dropdown5)
    display(measure_chart_5)

def display_chart_5():
    
    # call the function to display both the start year and end year dropdowns
    display_chart_5_filters()
    
    # get values selected in the start year and end year dropdowns, also from region
    start_year = start_year_dropdown5.value
    end_year = end_year_dropdown5.value
    region = region_dropdown5.value
    
    if measure_chart_5.value == 'real_value':
    
        # create a temporary dataframe with above filters
        df5 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['real_value']].sum().copy()

        # Altair visualization based on above dataframe
        chart5 = alt.Chart(df5).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        y = alt.Y('sum(real_value)', 
                                  title = 'Real Value', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        row = "commercial_group:N",
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'real_value']
                    ).properties(
                        width = 600,
                        height = 70,
                        title = 'Real Value by Commercial Group and Year'
                    ).resolve_scale(y='independent'
                    ).configure_legend(orient = 'right'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')
    else:

        # create a temporary data frame with filters applied
        df5 = df_rfmo[(df_rfmo['year'] >= start_year) & (df_rfmo['year'] <= end_year) & (df_rfmo['fishing_entity'] == region)].groupby(['year', 'fishing_entity', 'commercial_group'], as_index=False)[['catch_sum']].sum().copy()

        # Altair visualization based on above dataframe
        chart5 = alt.Chart(df5).mark_area().encode(
                        x = alt.X('year:N', 
                                  title = 'Year', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        y = alt.Y('sum(catch_sum)', 
                                  title = 'Total Catch', 
                                  axis = alt.Axis(titleFontSize = 12, 
                                                  labelFontSize = 12)),
                        color = alt.Color('commercial_group', 
                                          legend = alt.Legend(title = 'Commercial Group', 
                                                              titleFontSize = 14, 
                                                              labelFontSize = 14, 
                                                              labelColor = 'steelblue', 
                                                              titleColor = 'steelblue')),
                        row = "commercial_group:N",
                        tooltip = ['year', 'fishing_entity:N', 'commercial_group', 'catch_sum']
                    ).properties(
                        width = 600,
                        height = 70,
                        title = 'Total Catch by Commercial Group and Year'
                    ).resolve_scale(y='independent'
                    ).configure_legend(orient = 'right'
                    ).configure_title(fontSize = 20, color = 'teal'
                    ).configure_axis(titleColor = 'steelblue')

    # return the chart based on measure selection by the user
    return chart5


### Total catch of Commercial groups by Year

In [13]:
display_chart_1()

Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

### Total catch of Commercial groups by Year - for all Sector Types

In [14]:
display_chart_2()

Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

### Distribution of Catch sum by Commercial groups

In [15]:
display_chart_3()

Dropdown(description='Start Year :', index=60, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

### Total Catch (or) Real value by Commercial Group and Year

In [16]:
display_chart_4()

Dropdown(description='Start Year :', index=50, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, …

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

RadioButtons(description='Measure :', options=(('Total Catch', 'catch_sum'), ('Real Value', 'real_value')), va…

### Total Catch (or) Real value by Year for every Commercial Group

In [17]:
display_chart_5()

Dropdown(description='Start Year :', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960…

Dropdown(description='End Year :', index=65, options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…

Dropdown(description='Region', index=8, options=('Belgium', 'Bulgaria', 'Croatia', 'Denmark', 'Faeroe Isl. (De…

RadioButtons(description='Measure :', options=(('Total Catch', 'catch_sum'), ('Real Value', 'real_value')), va…