In [9]:
# Correcting the variables for the dataset
# Installing necessary packages
%pip install pandas
%pip install altair

# Importing libraries
import pandas as pd
import altair as alt

# Enabling Altair renderers for visualization
alt.renderers.enable('mimetype')


Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


RendererRegistry.enable('mimetype')

In [11]:
# Reading the dataset
url = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-31/bird_baths.csv'
df = pd.read_csv(url)

# Renaming columns to match the corrected variable names
df = df.rename(columns={
    'survey_year': 'survey_year',
    'urban_rural': 'urban_rural',
    'bioregions': 'bioregion',
    'bird_type': 'bird_species',
    'bird_count': 'count'
})

# Data Preparation for Visualization
# Filtering only regions with data
regions_with_data = df['bioregion'].dropna().unique()

# Aggregating data by bioregion and bird species
df_aggregated = df.groupby(['bioregion', 'urban_rural', 'bird_species']).agg({'count': 'sum'}).reset_index()

# Define the map for Australia, filtered for regions with data
map_with_data = alt.Chart(df_aggregated).mark_circle().encode(
    latitude='latitude:Q',    # Replace with actual latitude column if available
    longitude='longitude:Q',  # Replace with actual longitude column if available
    color=alt.Color(
        'urban_rural:N',
        scale=alt.Scale(domain=['Urban', 'Rural'], range=['#1f77b4', '#ff7f0e']),
        legend=alt.Legend(title='Urban/Rural')
    ),
    size=alt.Size('count:Q', scale=alt.Scale(domain=[10, 50, 100], range=[10, 50])),
    tooltip=['bioregion', 'urban_rural', 'bird_species', 'count']
).transform_filter(
    alt.FieldOneOfPredicate(field='bioregion', oneOf=list(regions_with_data))
).properties(
    width=800,
    height=500,
    title='Bird Bath Observations in Regions with Data'
)

map_with_data

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [12]:

import pandas as pd
import altair as alt

# Read the dataset (adjust the file path if necessary)
url = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-31/bird_baths.csv'
df = pd.read_csv(url)

# Rename columns to match the corrected variable names
df = df.rename(columns={
    'survey_year': 'survey_year',
    'urban_rural': 'urban_rural',
    'bioregions': 'bioregion',
    'bird_type': 'bird_species',
    'bird_count': 'count'
})

# Filter out regions without data
df_with_data = df.dropna(subset=['bioregion', 'bird_species', 'count'])

# List of specific bioregions to display
regions_with_data = [
    'South Eastern Queensland', 'NSW North Coast', 'Sydney Basin',
    'South Eastern Highlands', 'South East Coastal Plain',
    'Brigalow Belt South', 'NSW South Western Slopes',
    'Victorian Volcanic Plain', 'Victorian Midlands', 'Flinders Lofty Block'
]

# Filter data for specified regions
df_filtered = df_with_data[df_with_data['bioregion'].isin(regions_with_data)]

# Aggregate data by bioregion, urban/rural classification, and bird species
df_aggregated = df_filtered.groupby(['bioregion', 'urban_rural', 'bird_species']).agg({'count': 'sum'}).reset_index()

# Define the map visualization with a zoomed-in focus on regions with data
base_map_zoomed = alt.Chart(df_aggregated).mark_circle().encode(
    longitude='longitude:Q',  # Replace with actual longitude if available
    latitude='latitude:Q',    # Replace with actual latitude if available
    color=alt.Color(
        'urban_rural:N',
        scale=alt.Scale(domain=['Urban', 'Rural'], range=['#1f77b4', '#ff7f0e']),
        legend=alt.Legend(title='Urban/Rural')
    ),
    size=alt.Size('count:Q', scale=alt.Scale(domain=[10, 50, 100], range=[10, 50])),
    tooltip=['bioregion', 'urban_rural', 'bird_species', 'count']
).properties(
    width=800,
    height=500,
    title='Zoomed Bird Bath Observations Across Selected Australian Bioregions'
)

# Combine with a bar chart (optional, same as before)
bar_chart = alt.Chart(df_aggregated).mark_bar().encode(
    x=alt.X('bird_species:N', title='Bird Species'),
    y=alt.Y('count:Q', title='Number of Observations'),
    color=alt.Color(
        'urban_rural:N',
        scale=alt.Scale(domain=['Urban', 'Rural'], range=['#1f77b4', '#ff7f0e']),
        legend=None
    ),
    tooltip=['bioregion', 'urban_rural', 'bird_species', 'count']
).properties(
    width=800,
    height=300,
    title='Bird Species Observations by Bioregion and Urban/Rural Classification'
)

# Combine the map and bar chart
final_chart_zoomed = alt.vconcat(
    base_map_zoomed,
    bar_chart
).configure_legend(
    titleFontSize=15,
    labelFontSize=13
).configure_title(
    fontSize=20,
    fontWeight='bold'
)

final_chart_zoomed


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [13]:
%pip install geopandas
import altair as alt
import geopandas as gpd
import pandas as pd

# Load the GeoJSON file containing Australia's bioregions
geojson_path = 'path_to_bioregions_geojson.geojson'  # Replace with actual path
bioregions_gdf = gpd.read_file(geojson_path)

# Filter for relevant bioregions
relevant_regions = [
    'South Eastern Queensland', 'NSW North Coast', 'Sydney Basin',
    'South Eastern Highlands', 'South East Coastal Plain',
    'Brigalow Belt South', 'NSW South Western Slopes',
    'Victorian Volcanic Plain', 'Victorian Midlands', 'Flinders Lofty Block'
]
filtered_gdf = bioregions_gdf[bioregions_gdf['bioregion_name'].isin(relevant_regions)]

# Convert to GeoJSON format for Altair
geojson_data = alt.Data(values=filtered_gdf.__geo_interface__['features'])

# Plot the map with Altair
bioregion_map = alt.Chart(geojson_data).mark_geoshape().encode(
    color=alt.Color('properties.bioregion_name:N', title='Bioregions'),
    tooltip=['properties.bioregion_name:N']
).properties(
    width=800,
    height=600,
    title='Selected Bioregions in Australia'
)

bioregion_map


Defaulting to user installation because normal site-packages is not writeable
Collecting geopandas
  Downloading geopandas-1.0.1-py3-none-any.whl (323 kB)
[K     |████████████████████████████████| 323 kB 6.9 MB/s eta 0:00:01
Collecting pyogrio>=0.7.2
  Downloading pyogrio-0.10.0-cp39-cp39-macosx_12_0_x86_64.whl (16.5 MB)
[K     |████████████████████████████████| 16.5 MB 1.6 MB/s eta 0:00:011
Collecting pyproj>=3.3.0
  Downloading pyproj-3.6.1-cp39-cp39-macosx_10_9_x86_64.whl (6.1 MB)
[K     |████████████████████████████████| 6.1 MB 40.7 MB/s eta 0:00:01
[?25hCollecting shapely>=2.0.0
  Downloading shapely-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 12.6 MB/s eta 0:00:01
Collecting certifi
  Using cached certifi-2024.8.30-py3-none-any.whl (167 kB)
Installing collected packages: certifi, shapely, pyproj, pyogrio, geopandas
Successfully installed certifi-2024.8.30 geopandas-1.0.1 pyogrio-0.10.0 pyproj-3.6.1 shapely-2.0.6
You should

DataSourceError: path_to_bioregions_geojson.geojson: No such file or directory