# Geographic Hotspot Identification for Migration

## Description
This project will explore how visualization and spatial analytics can reveal geographic “hotspots” of migration risk using open data. The core idea is to combine multiple socioeconomic and environmental indicators — such as poverty, unemployment, education levels, remittances, and climate risk — into a composite migration-risk score for each region. The focous will be Latin America and the Caribbean.

In [None]:
# imports
import altair as alt
alt.data_transformers.disable_max_rows()
import polars as pl
from pathlib import Path
from vega_datasets import data
import geopandas as gpd


In [None]:
import os, sys
sys.path.insert(0, os.path.abspath(".."))  # añade el directorio padre

from Data.regions import (
    african_states,
    asia_pacific,
    eastern_europe,
    latin_america_caribbean,
    western_europe,
    north_america, aggregates, central_america, caribbean, south_america
)
from src.theme import lac_theme

In [None]:
migrants_stock = pl.read_csv(Path("../Data/migrants_stock_undesa.csv"))
migrants_stock  

In [None]:
# Our data base is in a wide format, we need to convert it to a long format
migrants_long = migrants_stock.melt(
    id_vars=[
        "index", "destination", "coverage", "type", 
        "destination_code", "origin", "origin_code", "gender"
    ],
    value_vars=["1990", "1995", "2000", "2005", "2010", "2015", "2020", "2024"],
    variable_name="year",
    value_name="migrants"    
)

migrants_long = migrants_long.with_columns([
        # year → integer
        pl.col("year").cast(pl.Int32),

        # clean and cast migrants
        (
            pl.col("migrants")
            .cast(pl.Utf8)                               # ensure string ops ok
            # turn common placeholders into nulls
            .replace({"..": None, "—": None, "-": None, "": None, " ":""})
            # remove commas/spaces/other non-digits
            .str.replace_all(r"[^\d.]", "")
            # empty string after cleaning → null
            .map_elements(lambda s: None if s == "" else s, return_dtype=pl.Utf8)
            # finally cast to number; strict=False turns bad leftovers into null
            .cast(pl.Int64, strict=False)
        ).alias("migrants"),

        # clean country orijin and destination names
        pl.col("origin").str.replace_all(r"\*", ""),
        pl.col("destination").str.replace_all(r"\*", "") 
    ])

migrants_long


In [None]:
# We want to be able to have regions as well as countries in our analysis
migrants_long = migrants_long.with_columns(
    pl.when(pl.col("origin").is_in(african_states)).then(pl.lit("Africa"))
    .when(pl.col("origin").is_in(asia_pacific)).then(pl.lit("Asia-Pacific"))
    .when(pl.col("origin").is_in(eastern_europe)).then(pl.lit("Eastern Europe"))
    .when(pl.col("origin").is_in(latin_america_caribbean)).then(pl.lit("Latin American and Caribbean"))
    .when(pl.col("origin").is_in(western_europe)).then(pl.lit("Western Europe"))
    .when(pl.col("origin").is_in(north_america)).then(pl.lit("North America"))
    .otherwise(pl.col("origin"))
    .alias("origin_region")
)
migrants_long = migrants_long.with_columns(
    pl.when(pl.col("origin").is_in(central_america)).then(pl.lit("Central America"))
     .when(pl.col("origin").is_in(caribbean)).then(pl.lit("Caribbean"))
     .when(pl.col("origin").is_in(south_america)).then(pl.lit("South America"))
     .otherwise(pl.col("origin"))
     .alias("origin_subregion")
)

migrants_long = migrants_long.with_columns(
    pl.when(pl.col("destination").is_in(african_states)).then(pl.lit("Africa"))
    .when(pl.col("destination").is_in(asia_pacific)).then(pl.lit("Asia-Pacific"))
    .when(pl.col("destination").is_in(eastern_europe)).then(pl.lit("Eastern Europe"))
    .when(pl.col("destination").is_in(latin_america_caribbean)).then(pl.lit("Latin American and Caribbean"))
    .when(pl.col("destination").is_in(western_europe)).then(pl.lit("Western Europe"))
    .when(pl.col("destination").is_in(north_america)).then(pl.lit("North America"))
    .otherwise(pl.col("destination"))
    .alias("destination_region")
)

migrants_long = migrants_long.with_columns(
    pl.when(pl.col("destination").is_in(central_america)).then(pl.lit("Central America"))
     .when(pl.col("destination").is_in(caribbean)).then(pl.lit("Caribbean"))
     .when(pl.col("destination").is_in(south_america)).then(pl.lit("South America"))
     .otherwise(pl.col("destination"))
     .alias("destination_subregion")
)

migrants_long = migrants_long.filter(~pl.col("origin").is_in(aggregates))
migrants_long = migrants_long.filter(~pl.col("destination").is_in(aggregates))
migrants_long = migrants_long.with_columns(
    (pl.col("migrants") / 1_000_000).alias("migrants")
)

In [None]:
unmapped_origins = (
    migrants_long
    .filter(pl.col("origin_region").is_null())        # where region is None
    .select("origin")                                # keep only origin column
    .unique()                                        # no duplicates
    .sort("origin")
)

unmapped_list = unmapped_origins["origin"].to_list()
for country in unmapped_list:
    print("-", country)

print(f"\nTotal unmapped: {len(unmapped_list)}")

In [None]:
migrants_long

In [None]:
def migration_from(df, save_path=None):
    bars = alt.Chart(df).mark_bar().encode(
        x=alt.X("year:N", title="Year"),
        y=alt.Y("sum(migrants):Q", title="Total Migrants (Millions)"),
        color=alt.Color("origin_region:N", title="Region of Origin")
    )

    chart = bars.properties(
        title=alt.TitleParams(text="Migration by Origin Region")
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart


In [None]:
def migration_to(df, save_path=None):
    bars = alt.Chart(df).mark_bar().encode(
        x=alt.X("year:N", title="Year"),
        y=alt.Y("sum(migrants):Q", title="Total Emigrants (Millions)"),
        color=alt.Color("destination_region:N", title="Destination Region")
    )

    chart = bars.properties(
        title=alt.TitleParams(text="Migration by Destination Region")
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart


In [None]:
def combined_migration(df, save_path="../milestones/combined_LAC_flows.png"):
    chart_from = migration_from(df)
    chart_to = migration_to(df)

    combined = (chart_to | chart_from).resolve_scale(y='shared')

    
    os.makedirs(Path(save_path).parent, exist_ok=True)
    combined.save(save_path)
    print(f"Combined chart saved to: {save_path}")

    return combined


# Run it
combined_migration(migrants_long, save_path="../milestones/chart_01_bars.png")

In [None]:
def migration_from_LAC(df, save_path=None):
    df = df.filter(pl.col("origin_region")== "Latin American and Caribbean")
    bars = alt.Chart(df).mark_bar().encode(
        x=alt.X("year:N", title="Year"),
        y=alt.Y("sum(migrants):Q", title="Total Migrants (Millions)"),
        color=alt.Color("origin_subregion:N", title="Origin Sub-Region")
    )

    chart = bars.properties(
        title=alt.TitleParams(text="Migration from LAC by Sub-Region")
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

migration_from_LAC(migrants_long, save_path="../milestones/chart_01_bars_LAC.png")

In [None]:
def migration_from_LAC(df, save_path=None):
    df = df.filter(pl.col("origin_region")== "Latin American and Caribbean")
    bars = alt.Chart(df).mark_bar().encode(
        x=alt.X("year:N", title="Year"),
        y=alt.Y("sum(migrants):Q", title="Total Migrants (Millions)"),
        color=alt.Color("gender:N", title="Gender")
    )

    chart = bars.properties(
        title=alt.TitleParams(text="Migration from LAC by Gender")
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

migration_from_LAC(migrants_long, save_path="../milestones/chart_01_bars_LAC_gender.png")

In [None]:
def migration_faceted_areas(df, save_path=None):
    # Migrants TO LAC (color by origin)
    to_LAC = (
        df.filter(
            (pl.col("destination_region") == "Latin American and Caribbean") &
            (pl.col("origin_region") != "Latin American and Caribbean")
        )
        .with_columns([
            pl.lit("Migrants to LAC").alias("flow_type"),
            pl.col("origin_region").alias("region_color")
        ])
    )

    # Migrants WITHIN LAC (single color)
    within_LAC = (
        df.filter(
            (pl.col("destination_region") == "Latin American and Caribbean") &
            (pl.col("origin_region") == "Latin American and Caribbean")
        )
        .with_columns([
            pl.lit("Migrants within LAC").alias("flow_type"),
            pl.lit("Latin American and the Caribbean").alias("region_color")
        ])
    )

    # Migrants FROM LAC (color by destination)
    from_LAC = (
        df.filter(
            (pl.col("origin_region") == "Latin American and Caribbean") &
            (pl.col("destination_region") != "Latin American and Caribbean")
        )
        .with_columns([
            pl.lit("Migrants from LAC").alias("flow_type"),
            pl.col("destination_region").alias("region_color")
        ])
    )

    all_flows = pl.concat([to_LAC, within_LAC, from_LAC])

    facet_order = [
        "Emigrants to LAC",
        "Migrants within LAC",
        "Migrants from LAC"
    ]

    # Chart
    chart = (
        alt.Chart(all_flows)
        .mark_area(opacity=1)
        .encode(
            x=alt.X("year:O", title="Year"),
            y=alt.Y("sum(migrants):Q", title="Migrant stock (millions)"),
            color=alt.Color(
                "region_color:N",
                title="Region",
                scale=alt.Scale(scheme="tableau20")
            ),
            facet=alt.Facet(
                "flow_type:N",
                columns=3,
                sort=facet_order,
                title=None
            )
        )
        .properties(
            title=alt.TitleParams(
                text="Migration Flows Involving Latin America and the Caribbean"))
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")


    return chart

# Run it
migration_faceted_areas(migrants_long, save_path="../milestones/chart_02_areas_LAC.png" )


In [None]:
import altair as alt
import polars as pl

def migration_heatmap_global(df, year=2024, save_path=None):

    flows = (
        df.filter(pl.col("year") == year)
          .group_by(["origin_region", "destination_region"])
          .agg(pl.col("migrants").sum().alias("migrants"))
          .with_columns((pl.col("migrants")).alias("migrants_millions"))
          .to_pandas()
    )

    region_order = [
        "Africa",
        "Asia-Pacific",
        "Eastern Europe",
        "Latin American and Caribbean",
        "North America",
        "Western Europe"
    ]

    base = (
        alt.Chart(flows)
        .encode(
            x=alt.X(
                "origin_region:N",
                title="Origin Region",
                sort=region_order,
                axis=alt.Axis(labelAngle=-45, labelFontSize=12, titleFontSize=13)
            ),
            y=alt.Y(
                "destination_region:N",
                title="Destination Region",
                sort=region_order,
                axis=alt.Axis(labelFontSize=12, titleFontSize=13)
            )
        )
    )

    # heatmap
    heatmap = base.mark_rect(stroke="white", strokeWidth=0.5).encode(
        color=alt.Color(
            "migrants_millions:Q",
            title="Migrants (millions)",
            scale=alt.Scale(
                scheme="blues",
                domain=[0, flows["migrants_millions"].max()]
            )
        ),
        tooltip=[
            alt.Tooltip("origin_region:N", title="Origin"),
            alt.Tooltip("destination_region:N", title="Destination"),
            alt.Tooltip("migrants_millions:Q", format=".2f", title="Migrants (M)")
        ]
    )

    text = base.mark_text(
        fontSize=10,
        fontWeight="bold",
        color="#08306B"
    ).encode(
        text=alt.Text("migrants_millions:Q", format=".1f")
    ).transform_filter("datum.migrants_millions > 0.3")

    chart = (
        (heatmap + text)
        .properties(
            width=480,
            height=480,
            title=alt.TitleParams(
                text=f"Global Migration Flows by Region ({year})",
                subtitle="Each cell shows total migrant stock (millions) between origin and destination regions",
            )
        )
    )
    
    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")


    return chart


# Run it
migration_heatmap_global(migrants_long, year=2024, save_path="../milestones/chart_03_heatmap_flowa.png" )


In [None]:
def origin_dot_timeline(df,top_n: int = 10, save_path=None):
    df = (df.filter((pl.col("origin_region") == "Latin American and Caribbean"))
        .group_by(["origin", "year"])
        .agg(pl.col("migrants").sum().alias("migrants"))
    )

    years = [1990, 1995, 2000, 2005, 2010, 2015, 2020, 2024]

    latest = years[-1]
    top_origins = (
        df.filter(pl.col("year") == latest)
        .sort("migrants", descending=True)
        .head(top_n)
        .select("origin")
        .to_series()
        .to_list()
    )

    # 3) Filter only those top origins across all years
    plot_df = (
        df.filter(pl.col("origin").is_in(top_origins))
        .sort(["origin", "year"])
    )

    # 4) Build the dot-timeline
    domain_years = years
    shapes = ["circle"] * (len(domain_years) - 1) + ["square"]

    chart = (
        alt.Chart(plot_df)
        .mark_point(filled=True, size=100)
        .encode(
            y=alt.Y("origin:N",
                sort=top_origins[::-1],
                title="Origin"),

            x=alt.X("migrants:Q",
                title="Migrants (millions)",
                axis=alt.Axis(format=".0f")
            ),
            color=alt.Color(
                "year:N",
                title="Year",
                scale=alt.Scale(domain=domain_years, scheme="blues"),
                legend=alt.Legend(direction="vertical")
            ),
            shape=alt.Shape(
                "year:N",
                scale=alt.Scale(domain=domain_years, range=shapes),
                legend=None
            ),
            tooltip=[
                alt.Tooltip("origin:N", title="Origin"),
                alt.Tooltip("year:N", title="Year"),
                alt.Tooltip("migrants:Q", title="Migrants (M)", format=".2f"),
            ]
        )
        .properties(
            width=480,
            height=480,
            title=alt.TitleParams(
            text=f"Top {top_n} Migrant Countries from Latin America and the Caribbean",
            subtitle="Each point represents migrant stock (millions) from origin countries in LAC to all destinations"))
    )
    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

origin_dot_timeline(migrants_long, top_n=15, save_path="../milestones/chart_04_dot_timeline_LAC.png")


In [None]:
def destination_dot_timeline(df,top_n: int = 10, save_path=None):
    df = (df.filter((pl.col("origin_region") == "Latin American and Caribbean"))
        .group_by(["destination", "year"])
        .agg(pl.col("migrants").sum().alias("migrants"))
    )

    years = [1990, 1995, 2000, 2005, 2010, 2015, 2020, 2024]

    latest = years[-1]
    top_destinations = (
        df.filter(pl.col("year") == latest)
        .sort("migrants", descending=True)
        .head(top_n)
        .select("destination")
        .to_series()
        .to_list()
    )

    # 3) Filter only those top origins across all years
    plot_df = (
        df.filter(pl.col("destination").is_in(top_destinations))
        .sort(["destination", "year"])
    )

    # 4) Build the dot-timeline
    domain_years = years
    shapes = ["circle"] * (len(domain_years) - 1) + ["square"]

    chart = (
        alt.Chart(plot_df)
        .mark_point(filled=True, size=100)
        .encode(
            y=alt.Y("destination:N",
                sort=top_destinations[::-1],
                title="Destination"),

            x=alt.X("migrants:Q",
                title="Migrants (millions)",
                axis=alt.Axis(format=".0f")
            ),
            color=alt.Color(
                "year:N",
                title="Year",
                scale=alt.Scale(domain=domain_years, scheme="blues"),
                legend=alt.Legend(direction="vertical")
            ),
            shape=alt.Shape(
                "year:N",
                scale=alt.Scale(domain=domain_years, range=shapes),
                legend=None
            )
        )
        .properties(
            width=480,
            height=480,
            title=alt.TitleParams(
            text=f"Top {top_n} Destination Countries from Latin America and the Caribbean",
            subtitle="Each point represents migrant stock (millions) from countries in LAC"))
    )
    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

destination_dot_timeline(migrants_long, top_n=15, save_path="../milestones/chart_04_dot_timeline_LAC_destinations.png")

In [None]:
def migration_from_LAC_normalized(df, save_path=None):
    df = df.filter(pl.col("origin_region")== "Latin American and Caribbean")
    chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X("year:N", title="Year"),
            y=alt.Y("sum(migrants):Q", stack="normalize", title="Share of Migrants"),
            color=alt.Color("origin_subregion:N", title="Region of Origin"),
        )
        .properties(
            title=alt.TitleParams(text="Migration by Origin Subregion (Normalized)")
        )
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

# Example usage
migration_from_LAC_normalized(migrants_long)

In [None]:
def migration_from_LAC_normalized(df, save_path=None):
    df = df.filter(pl.col("origin_region")== "Latin American and Caribbean")
    chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X("year:N", title="Year"),
            y=alt.Y("sum(migrants):Q", stack="normalize", title="Share of Migrants"),
            color=alt.Color("origin:N", title="Country of Origin"),
        )
        .properties(
            title=alt.TitleParams(text="Migration by Origin Country (Normalized)")
        )
    )

    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

# Example usage
migration_from_LAC_normalized(migrants_long)

In [None]:
def migration_from_LAC_normalized(df, subregion = None, save_path=None):
    # Ensure expected columns exist
    expected_cols = {"origin_region", "origin", "year", "migrants"}
    missing = expected_cols - set(df.columns)
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    # Filter for Latin American and Caribbean origins
    df = df.filter(pl.col("origin_region") == "Latin American and Caribbean")
    
    if subregion:
        df = df.filter(pl.col("origin_subregion") == subregion)

    
    top4_df = (
        df.filter(pl.col("year") == 2024)
          .group_by("origin")
          .agg(pl.col("migrants").sum().alias("total_migrants"))
          .sort("total_migrants", descending=True)
          .head(6)
    )
    top4 = top4_df["origin"].to_list()

    
    df = df.with_columns([
        pl.when(pl.col("origin").is_in(top4))
          .then(pl.col("origin"))
          .otherwise(pl.lit("Others"))
          .alias("origin_grouped")
    ])

    if subregion:
        title_text = f"Migration by Country (Normalized, {subregion})"
    else:
        title_text = "Migration by Country (Normalized, LAC)"


    
    chart = (
        alt.Chart(df.to_pandas())  # Altair uses pandas
        .mark_bar()
        .encode(
            x=alt.X("year:N", title="Year"),
            y=alt.Y("sum(migrants):Q", stack="normalize", title="Share of Migrants"),
            color=alt.Color("origin_grouped:N", title="Country of Origin"),
            tooltip=[
                alt.Tooltip("year:N", title="Year"),
                alt.Tooltip("origin_grouped:N", title="Country"),
                alt.Tooltip("sum(migrants):Q", title="Migrants", format=",.0f")
            ]
        )
        .properties(title=alt.TitleParams(text=title_text))
        )
    

    
    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        chart.save(save_path)
        print(f"Chart saved to: {save_path}")

    return chart

# Example usage:
migration_from_LAC_normalized(migrants_long)


In [None]:
def combined_migration_subregions(df, save_path=None):
    chart_central = migration_from_LAC_normalized(df, subregion="Central America")
    chart_caribbean = migration_from_LAC_normalized(df, subregion="Caribbean")
    chart_south = migration_from_LAC_normalized(df, subregion="South America")

    combined = (chart_central | chart_caribbean | chart_south ).resolve_scale(y='shared')

    # Save as png
    if save_path is not None:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        combined.save(save_path)
        print(f"Combined chart saved to: {save_path}")

    return combined


# Run it
combined_migration_subregions(migrants_long)

In [None]:
migration_from_LAC_normalized(migrants_long, subregion="Central America", save_path="../milestones/chart_01_bars_LAC_CentralAmerica.png")
migration_from_LAC_normalized(migrants_long, subregion="Caribbean", save_path="../milestones/chart_01_bars_LAC_Caribbean.png")
migration_from_LAC_normalized(migrants_long, subregion="South America", save_path="../milestones/chart_01_bars_LAC_SouthAmerica.png")

In [None]:
gdf_ne = gpd.read_file(Path("../Data/un_shapefiles.zip"))  # zipped shapefile
gdf_ne

In [None]:
# Load shapefile
gdf_ne = gpd.read_file(Path("../Data/un_shapefiles.zip"))
gdf_ne = gdf_ne[["NAME", "CONTINENT", "POP_EST", "geometry", "SOV_A3"]]

# add migration data
migrants_long_2024 = migrants_long.filter(pl.col("year") == 2024)

migrants_by_country = (
    migrants_long_2024
    .group_by(["origin", "origin_region", "origin_subregion"])
    .agg([
        pl.col("migrants").sum().alias("total_migrants_2024"),
        pl.when(pl.col("gender").str.to_lowercase() == "male")
          .then(pl.col("migrants")).otherwise(0).sum()
          .alias("male_migrants_2024"),
        pl.when(pl.col("gender").str.to_lowercase() == "female")
          .then(pl.col("migrants")).otherwise(0).sum()
          .alias("female_migrants_2024")
    ])
)

migrants_pd = migrants_by_country.to_pandas()

# Merge
gdf_ne = gdf_ne.merge(
    migrants_pd,
    how="outer",
    left_on="NAME",
    right_on="origin"
)


In [None]:
# Countries present only in the shapefile (no migration data)
missing_in_migrants = gdf_ne[gdf_ne["origin"].isna()][["NAME", "CONTINENT", "POP_EST"]]
print("Countries missing in migration data:", len(missing_in_migrants))
display(missing_in_migrants.head(15))

# Countries present only in migration data (not in shapefile)
missing_in_shapefile = gdf_ne[gdf_ne["NAME"].isna()][["origin", "origin_region", "origin_subregion"]]
print("Countries missing in shapefile:", len(missing_in_shapefile))
display(missing_in_shapefile.head(15))


In [None]:

gdf_ne

In [None]:
gdf_sel = gdf_ne.query("origin_region ==  'Latin American and Caribbean' ")
alt.Chart(gdf_sel).mark_geoshape()

In [None]:
gdf_sel = gdf_ne.query("CONTINENT ==  'North America' or CONTINENT == 'South America' ")
alt.Chart(gdf_sel).mark_geoshape()

In [None]:
gdf_centroid = gpd.GeoDataFrame(
    data=gdf_sel.copy(),
    geometry=gdf_sel.geometry.to_crs(epsg=3857).centroid.to_crs(epsg=4326)
)

alt.Chart(gdf_centroid).mark_geoshape()

In [None]:
gdf_centroid["lon"] = gdf_centroid.geometry.x
gdf_centroid["lat"] = gdf_centroid.geometry.y

alt.Chart(gdf_centroid).mark_circle().encode(
    longitude="lon:Q", latitude="lat:Q", size="total_migrants_2024:Q", color="origin_subregion:N"
)

In [None]:
basemap = alt.Chart(gdf_sel).mark_geoshape(
     fill='lightgray', stroke='white', strokeWidth=0.5
)

bubbles = alt.Chart(gdf_sel).transform_calculate(
    centroid=alt.expr.geoCentroid(None, alt.datum)
).mark_circle(
    stroke='black'
).encode(
    longitude='centroid[0]:Q',
    latitude='centroid[1]:Q',
    size="total_migrants_2024:Q",
    color="origin_subregion:N"
)

(basemap + bubbles).project(
    type='identity', reflectY=True
)

In [None]:
import altair as alt
import json


gdf_lac = (
    gdf_ne
    .query("origin_region == 'Latin American and Caribbean'")
    .dropna(subset=["NAME"])             
    .to_crs("EPSG:4326")
    .copy()
)


gdf_lac["lon"] = gdf_lac.geometry.centroid.x
gdf_lac["lat"] = gdf_lac.geometry.centroid.y


geojson = json.loads(gdf_lac.to_json())


basemap = alt.Chart(alt.Data(values=geojson["features"])).mark_geoshape(
    fill="#f5f5f5",
    stroke="#e0e0e0",
    strokeWidth=0.4
).properties(
    width=650,
    height=400,
    title={
        "text": "Migration Flows from Latin America and the Caribbean (2024)",
        "subtitle": ["Bubble size = total migrants", "Color = origin subregion"],
        "anchor": "start",
        "font": "Inter, Helvetica, Arial, sans-serif",
        "fontSize": 18,
        "subtitleFontSize": 12,
        "subtitleColor": "#4B5563"
    }
).project(type="mercator")


bubbles = (
    alt.Chart(gdf_lac)
    .mark_circle(stroke="#111", strokeWidth=0.4, opacity=0.85)
    .encode(
        longitude="lon:Q",
        latitude="lat:Q",
        size=alt.Size(
            "total_migrants_2024:Q",
            scale=alt.Scale(range=[20, 1500]),
            legend=alt.Legend(title="Total Migrants (Millions 2024)")
        ),
        color=alt.Color(
            "origin_subregion:N",
            legend=alt.Legend(title="Origin Subregion"),
            scale=alt.Scale(scheme="tableau10")
        ),
        tooltip=[
            alt.Tooltip("NAME:N", title="Country"),
            alt.Tooltip("origin_region:N", title="Region"),
            alt.Tooltip("origin_subregion:N", title="Subregion"),
            alt.Tooltip("total_migrants_2024:Q", title="Total Migrants (Millions)", format=",.0f"),
        ]
    )
)


lac_map = (basemap + bubbles).configure_view(strokeWidth=0)
lac_map


In [None]:
save_path = Path("../milestones/chart_05_bubbles_LAC_map.png")
os.makedirs(save_path.parent, exist_ok=True)
lac_map.save(str(save_path), format="png")