# Creating a Multi-Panel Coordinated View w/ Altair

## Load Data

In [2]:
import pandas as pd

eod_data = pd.read_parquet('../data/0004691-251025141854904.parquet')
eod_data['eventDate'] = pd.to_datetime(eod_data['eventDate'], format='%Y-%m-%d', errors='coerce')
eod_data = eod_data.dropna(subset=['eventDate'])
eod_data.head(n=1)
print(eod_data.shape)

(30969425, 11)


## Create Base Chart

We have a lot of data. We'll focus on drilling down into some key metrics for a few species. We'll choose 3 least common species and focus on years 2017 and 2018.

In [3]:
import random

random.seed(200) # chose this after messing around, yields < 5000 rows

df = eod_data.loc[eod_data['eventDate'].dt.year.isin([2015, 2016])]

species = random.choices(df['species'].unique().tolist(), k=5)

df = df[(df['species'].isin(species))]
print(df.shape) # important: need less than 5000 rows otherwise altair complains...
df.head(n=3)


(4978, 11)


Unnamed: 0,genus,species,countryCode,locality,stateProvince,individualCount,decimalLatitude,decimalLongitude,eventDate,recordedBy,checklist_id
1241,Elliotomyia,Elliotomyia chionogaster,BO,Yumao (Río Grande),Santa Cruz,,-19.100819,-63.57786,2015-06-03,obsr932635,29142.0
4322,Sporophila,Sporophila intermedia,CO,Laguna El Tabacal,Cundinamarca,,5.026326,-74.328316,2016-10-24,obsr519561,149441.0
7949,Diopsittaca,Diopsittaca nobilis,BR,Parque Municipal da Quineira,Mato Grosso,2.0,-15.468118,-55.748592,2016-08-14,obsr1311916,127972.0


We need to make a base chart. We'll make a scatter plot where x is the date (year-month) and y is the count of observations. We'll color by species.

In [4]:
import altair as alt
from IPython.display import display

# aggregate data by year-month and species
year_month_species = df.groupby([pd.Grouper(key='eventDate', freq='ME'), 'species']).agg(
    observations=('eventDate', 'size'),
    survey_dates=('eventDate', lambda x: ', '.join(sorted(x.dt.strftime('%Y-%m-%d').unique()))),
    survey_countries=('countryCode', lambda x: ', '.join(sorted(x.unique()))),
).reset_index(names=['eventDate', 'species'])
display(year_month_species.head(n=3))

# create base chart
base = alt.Chart(year_month_species).mark_circle(size=60).encode(
    x=alt.X('yearmonth(eventDate):T', title='Year-Month').scale(type='time'),
    y=alt.Y('observations:Q', title='Count of Observations').scale(domain=[0, year_month_species['observations'].max()+10]),
    color=alt.Color('species:N', title='Species').scale(domain=sorted(species)),
    tooltip=[
        'species:N',
        alt.Tooltip('observations:Q', title='Observations'),
        alt.Tooltip('survey_dates:N', title='Survey Dates'),
        alt.Tooltip('survey_countries:N', title='Survey Countries'),
    ],
).properties(
    width=600,
    height=450,
    title='Observations over Time'
)

base

Unnamed: 0,eventDate,species,observations,survey_dates,survey_countries
0,2015-01-31,Aegolius harrisii,6,"2015-01-03, 2015-01-16, 2015-01-18, 2015-01-20",AR
1,2015-01-31,Diopsittaca nobilis,84,"2015-01-01, 2015-01-02, 2015-01-03, 2015-01-04...","BR, GY"
2,2015-01-31,Elliotomyia chionogaster,22,"2015-01-03, 2015-01-05, 2015-01-08, 2015-01-09...","AR, BO, PE"


We'll want a way to filter by species. We'll create a bar chart that shows the counts of observations per species. Selecting a species in this bar chart will filter the scatter plot to only show data for that species.


In [5]:
# create bar chart for species selection
species_selection = alt.selection_point(fields=['species'])

bar = alt.Chart(df).mark_bar().encode(
    x=alt.X('species:N', title='Species').scale(domain=sorted(species)),
    y=alt.Y('count():Q', title='Total Observations').scale(domain=[0, df['species'].value_counts().max()+100]),
    color=alt.when(species_selection).then(alt.Color('species:N').scale(domain=sorted(species))).otherwise(alt.value('lightgray')),
    tooltip=[alt.Tooltip('species:N', title='Species'), alt.Tooltip('count():Q', title='Total Observations')],
).add_params(
    species_selection
).properties(
    width=300,
    height=400,
    title='Total Observations per Species')

bar


In [6]:
# connect bar and base chart so when we choose a species in the bar chart it filters the base chart

final_chart = bar | base.transform_filter(species_selection)

final_chart

We'll add 1 more chart to the view that's also dependent on the bar chart specie selection.

We'll create a small map based on the coordinates of the observations for the selected species.

In [7]:
from vega_datasets import data

# see https://altair-viz.github.io/altair-tutorial/notebooks/09-Geographic-plots.html

# get world map data
world_country_data = alt.topo_feature(data.world_110m.url, 'countries')

# list of South American ISO 3166-1 numeric country codes
south_america_codes = [
    32, 68, 76, 152, 170, 218, 238, 254, 328, 600, 604, 740, 858, 862
]

# create map of South America layer
south_america_map = alt.Chart(world_country_data).mark_geoshape(
    fill='lightgray',
    stroke='white'
).encode(
).transform_filter(
    alt.FieldOneOfPredicate(field='id', oneOf=south_america_codes)
).properties(
    width=300,
    height=400
)

# create the observation points layer
observation_points = alt.Chart(df).mark_circle().encode(
    longitude='decimalLongitude:Q',
    latitude='decimalLatitude:Q',
    color=alt.Color('species:N', title='Species').scale(domain=sorted(species)),
    tooltip=['species:N','eventDate:T','countryCode']
).transform_filter(species_selection) # this causes a javascript error??

observations_map = (south_america_map + observation_points).properties(
    title='Observed Locations of Species'
)

observations_map

In [8]:
final_chart = bar | (base.transform_filter(species_selection) & observations_map)
final_chart