paper: https://link.springer.com/article/10.1007/s11111-024-00452-9#Sec10

Prophet: https://facebook.github.io/prophet/docs/quick_start.html#python-api  
github: https://github.com/facebook/prophet  

| **Year**    | **Country/Region** | **Event / Policy**                     | **Migration Impact**                                                 |
| ----------- | ------------------ | -------------------------------------- | -------------------------------------------------------------------- |
| **2019**    | Argentina          | Severe financial crisis; inflation 54% | Argentinians start emigrating; fewer Venezuelans choose Argentina    |
|             | Venezuela          | Hyperinflation + shortages             | Mass exodus (\~4M migrants)                                          |
|             | Chile/Peru/Ecuador | Visa rules tighten                     | Venezuelan migration shifts patterns                                 |
|             | US–Mexico          | MPP (“Remain in Mexico”)               | Slows U.S. asylum, traps many                                        |
| **2020**    | Regional           | COVID closures                         | Migration slows temporarily                                          |
|             | U.S.               | Title 42 begins                        | Migrant expulsions surge                                             |
| **2021 Q1** | Colombia           | TPS regularization for Venezuelans     | Encourages local settlement                                          |
| **2021 Q2** | Chile              | New immigration law (April)            | Triggers Venezuelan & Haitian outflows                               |
| **2021 Q3** | Haiti              | Assassination + earthquake             | Haitian migration peaks                                              |
| **2022 Q1** | Argentina          | Inflation accelerates (\~55%)          | Argentinians emigrate; Venezuelans in Argentina begin leaving        |
| **2022 Q3** | Ecuador            | Venezuelan regularization              | Slows outflow temporarily                                            |
| **2022 Q4** | US–Mexico          | Title 42 expanded to Venezuelans       | Thousands stranded in Mexico                                         |
|             | Argentina          | Inflation \~95%, IMF tensions          | Argentinian emigration accelerates sharply                           |

In [None]:
import os
import gc
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path

# Geospatial
import geopandas as gpd
import dask_geopandas as dgpd
import rasterio
from rasterstats import zonal_stats
from osgeo import gdal, osr

# Statistics & Bayesian modeling
import pymc as pm
import arviz as az

# Visualisation
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Base directories
BASE_DIR = Path("/Users/wenlanzhang/PycharmProjects/Mapineq/src/")
DATA_DIR = Path("/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford")

In [None]:
df = pd.read_csv(DATA_DIR/f"Migration/international_migration_flow.csv") 
df['year'] = pd.to_datetime(df['migration_month']).dt.year
df['month'] = pd.to_datetime(df['migration_month']).dt.month
df["migration_month"] = pd.to_datetime(df["migration_month"])

# Define the mapping of old codes to new codes
country_code_mapping = {
    'GR': 'EL',  # Greece (GR → EL)
    'GB': 'UK'   # United Kingdom (GB → UK)
}

# Apply the replacement to both columns
df['country_from'] = df['country_from'].replace(country_code_mapping)
df['country_to'] = df['country_to'].replace(country_code_mapping)

df
# len(df['country_from'].unique())

In [None]:
# country_list = ['EL', 'ES', 'FI', 'FR', 'HR', 'EE', 'DE', 'DK', 'BA', 'AT', 'BG',
#        'CH', 'CY', 'BE', 'CZ', 'AL', 'LU', 'LV', 'ME', 'IE', 'IS', 'IT',
#        'MK', 'MT', 'LI', 'NL', 'LT', 'HU', 'RS', 'SE', 'SI', 'TR', 'UA',
#        'SK', 'RO', 'NO', 'PL', 'PT', 'XK', 'UK']
# # df = df[(df['country_from'].isin(country_list)) | (df['country_to'].isin(country_list))]
# # df = df[(df['country_from'].isin(country_list)) & (df['country_to'].isin(country_list))]

# # df = df[(df['country_from'] == 'UA') & (df['country_to'].isin(country_list))]
# # df = df[df['num_migrants'] != 0]
# df = df[df['num_migrants'] >= 100]

# df

# VE 

In [None]:
df_ve_in = df[(df['country_to'] == 'VE') & (df['num_migrants'] != 0)]
df_ve_out = df[(df['country_from'] == 'VE') & (df['num_migrants'] != 0)]

In [None]:
# 1. Always work on a copy of the dataframe to avoid chained assignment warnings
df_ve_out = df_ve_out.copy()

# 2. Convert migration_month to datetime safely
df_ve_out.loc[:, "migration_month"] = pd.to_datetime(df_ve_out["migration_month"])

# 3. Create migration_quarter column safely using .loc
df_ve_out.loc[:, "migration_quarter"] = df_ve_out["migration_month"].dt.to_period("Q").astype(str)

# 4. Aggregate data by quarter & country
df_quarterly = df_ve_out.groupby(["migration_quarter", "country_to"], as_index=False)["num_migrants"].sum()

# 5. Calculate total migrants per country
df_country_totals = df_quarterly.groupby("country_to", as_index=False)["num_migrants"].sum()
df_country_totals.rename(columns={"num_migrants": "total_migrants"}, inplace=True)

# 6. Set threshold and filter valid countries
threshold = 10000
valid_countries = df_country_totals.loc[df_country_totals["total_migrants"] > threshold, "country_to"]

# 7. Filter df_quarterly without triggering SettingWithCopyWarning
df_quarterly_filtered = df_quarterly[df_quarterly["country_to"].isin(valid_countries)].copy()

# 8. Create stacked bar chart with filtered data
fig = px.bar(
    df_quarterly_filtered,
    x="migration_quarter",
    y="num_migrants",
    color="country_to",
    title=f"Migrants from VE Over Time (Quarterly, Only Countries > {threshold} Migrants)",
    labels={"migration_quarter": "Quarter", "num_migrants": "Number of Migrants"},
)

fig.update_layout(
    barmode="stack",
    xaxis_title="Quarter",
    yaxis_title="Number of Migrants",
    legend_title="From Country",
)

fig.show()

In [None]:
# 1. Work on a copy to avoid SettingWithCopyWarning
df_ve_in = df_ve_in.copy()

# 2. Convert migration_month to datetime safely
df_ve_in.loc[:, "migration_month"] = pd.to_datetime(df_ve_in["migration_month"])

# 3. Create migration_quarter column safely
df_ve_in.loc[:, "migration_quarter"] = df_ve_in["migration_month"].dt.to_period("Q").astype(str)

# 4. Aggregate data by quarter & country_from
df_quarterly_in = df_ve_in.groupby(["migration_quarter", "country_from"], as_index=False)["num_migrants"].sum()

# 5. Calculate total migrants per country_from
df_country_totals_in = df_quarterly_in.groupby("country_from", as_index=False)["num_migrants"].sum()
df_country_totals_in.rename(columns={"num_migrants": "total_migrants"}, inplace=True)

# 6. Set threshold (e.g., keep only countries with > 500 migrants total)
threshold = 10000
valid_countries_in = df_country_totals_in.loc[df_country_totals_in["total_migrants"] > threshold, "country_from"]

# 7. Filter df_quarterly_in to include only valid countries
df_quarterly_in_filtered = df_quarterly_in[df_quarterly_in["country_from"].isin(valid_countries_in)].copy()

# 8. Create stacked bar chart with filtered data
fig = px.bar(
    df_quarterly_in_filtered,
    x="migration_quarter",
    y="num_migrants",
    color="country_from",
    title=f"Migrants to VE Over Time (Quarterly, Only Countries > {threshold} Migrants)",
    labels={"migration_quarter": "Quarter", "num_migrants": "Number of Migrants"},
)

fig.update_layout(
    barmode="stack",
    xaxis_title="Quarter",
    yaxis_title="Number of Migrants",
    legend_title="From Country",
)

fig.show()

### Waffle for R

In [None]:
out_migration_totals = df_ve_out.groupby('country_to')['num_migrants'].sum().sort_values(ascending=False)
out_migration_totals

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(out_migration_totals, bins=30, edgecolor='black')

plt.title('Distribution of Migrant Counts per Destination Country')
plt.xlabel('Number of Migrants')
plt.ylabel('Number of Countries')
plt.tight_layout()
plt.show()

In [None]:
threshold = 0.05 * 1_000_000  # 200,000 migrants
major_countries = out_migration_totals[out_migration_totals > threshold]
others_sum = out_migration_totals[out_migration_totals < threshold].sum()
major_countries['Other'] = others_sum

major_countries

In [None]:
df_ve_out_top =  df_ve_out[df_ve_out['migration_month'] == '2022-02-01'].sort_values(by='num_migrants', ascending=False).head(11)
df_ve_out_top[['country_to', 'num_migrants']]

In [None]:
df_month = df_ve_out[df_ve_out['migration_month'] == '2022-02-01']

migration_totals = df_month.groupby('country_to')['num_migrants'].sum()
sorted_totals = migration_totals.sort_values(ascending=False)

top_10 = sorted_totals.head(8)
others_sum = sorted_totals.iloc[8:].sum()

# Use pd.concat instead of append
migration_final = pd.concat([top_10, pd.Series({'Other': others_sum})])

migration_final

### Heatmap

In [None]:
# 1. Filter origin and destination countries of interest:
origin_countries = ['VE', 'CL', 'AR', 'PE']
# For destination, you can pick top N countries by migrant volume or specific countries
destination_countries = ['US', 'ES', 'CO', 'BR', 'EC', 'PE', 'CL', 'AR']  # example subset

df_filtered = df[
    (df['country_from'].isin(origin_countries)) &
    (df['country_to'].isin(destination_countries))
]

# 2. Aggregate data: average monthly migrants by origin-destination pair over all months
avg_flows = (
    df_filtered
    .groupby(['country_from', 'country_to'])['num_migrants']
    .mean()
    .reset_index()
)

# 3. Pivot data to get matrix form for heatmap: rows=origin, columns=destination
heatmap_data = avg_flows.pivot(index='country_from', columns='country_to', values='num_migrants')

# 4. Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap_data, annot=True, fmt=".0f", cmap='YlGnBu')

plt.title('Average Monthly Migration Flows by Corridor')
plt.ylabel('Origin Country')
plt.xlabel('Destination Country')
plt.tight_layout()
plt.show()


## Exporting

### VE 

In [None]:
df_ve_in = df[(df['country_to'] == 'VE') & (df['num_migrants'] != 0)]
df_ve_out = df[(df['country_from'] == 'VE') & (df['num_migrants'] != 0)]

In [None]:
# df_ve_in
fig = px.line(
    df_ve_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to VE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_ve_out
fig = px.line(
    df_ve_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from VE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

### Haiti

In [None]:
df_HT_in = df[(df['country_to'] == 'HT') & (df['num_migrants'] != 0)]
df_HT_out = df[(df['country_from'] == 'HT') & (df['num_migrants'] != 0)]

In [None]:
# df_HT_in
fig = px.line(
    df_HT_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to HT Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_HT_out
fig = px.line(
    df_HT_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from HT Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

### AR

In [None]:
df_AR_in = df[(df['country_to'] == 'AR') & (df['num_migrants'] != 0)]
df_AR_out = df[(df['country_from'] == 'AR') & (df['num_migrants'] != 0)]

In [None]:
# df_AR_in
fig = px.line(
    df_AR_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to AR Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_AR_out
fig = px.line(
    df_AR_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from AR Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

## Importing 

### CO

In [None]:
df_co_in = df[(df['country_to'] == 'CO') & (df['num_migrants'] != 0)]
df_co_out = df[(df['country_from'] == 'CO') & (df['num_migrants'] != 0)]

In [None]:
# df_co_in
fig = px.line(
    df_co_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to CO Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_co_out
fig = px.line(
    df_co_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from CO Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

### PE

In [None]:
df_PE_in = df[(df['country_to'] == 'PE') & (df['num_migrants'] != 0)]
df_PE_out = df[(df['country_from'] == 'PE') & (df['num_migrants'] != 0)]

In [None]:
# df_cl_in
fig = px.line(
    df_PE_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to PE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_PE_out
fig = px.line(
    df_PE_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from PE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

### CL

In [None]:
df_cl_in = df[(df['country_to'] == 'CL') & (df['num_migrants'] != 0)]
df_cl_out = df[(df['country_from'] == 'CL') & (df['num_migrants'] != 0)]

In [None]:
# df_cl_in
fig = px.line(
    df_cl_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to CL Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_cl_out
fig = px.line(
    df_cl_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from CL Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
df_AR_in = df[(df['country_to'] == 'AR') & (df['num_migrants'] != 0)]
df_AR_out = df[(df['country_from'] == 'AR') & (df['num_migrants'] != 0)]

In [None]:
# df_cl_in
fig = px.line(
    df_AR_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to AR Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_cl_out
fig = px.line(
    df_AR_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from AR Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

### MX

In [None]:
df_MX_in = df[(df['country_to'] == 'MX') & (df['num_migrants'] != 0)]
df_MX_out = df[(df['country_from'] == 'MX') & (df['num_migrants'] != 0)]

In [None]:
# df_mx_out
fig = px.line(
    df_MX_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from MX Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

In [None]:
# df_mx_in
fig = px.line(
    df_MX_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to MX Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

### EC

In [None]:
df_EC_in = df[(df['country_to'] == 'EC') & (df['num_migrants'] != 0)]
df_EC_out = df[(df['country_from'] == 'EC') & (df['num_migrants'] != 0)]

In [None]:
# df_EC_in
fig = px.line(
    df_EC_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to EC Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_EC_out
fig = px.line(
    df_EC_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from EC Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

### UY (with AR) 

In [None]:
df_UY_in = df[(df['country_to'] == 'UY') & (df['num_migrants'] != 0)]
df_UY_out = df[(df['country_from'] == 'UY') & (df['num_migrants'] != 0)]

In [None]:
# df_ur_out
fig = px.line(
    df_UY_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from UY Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_uy_in
fig = px.line(
    df_UY_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to UY Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

### DO  

In [None]:
df_DO_in = df[(df['country_to'] == 'DO') & (df['num_migrants'] != 0)]
df_DO_out = df[(df['country_from'] == 'DO') & (df['num_migrants'] != 0)]

In [None]:
# df_DO_in
fig = px.line(
    df_DO_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to DO Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_DO_out
fig = px.line(
    df_DO_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from DO Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

## MM to TH

In [None]:
df_MM_in = df[(df['country_to'] == 'MM') & (df['num_migrants'] != 0)]
df_MM_out = df[(df['country_from'] == 'MM') & (df['num_migrants'] != 0)]

fig = px.line(
    df_MM_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to MM Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_MM_out
fig = px.line(
    df_MM_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from MM Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
df_TH_in = df[(df['country_to'] == 'TH') & (df['num_migrants'] != 0)]
df_TH_out = df[(df['country_from'] == 'TH') & (df['num_migrants'] != 0)]

fig = px.line(
    df_TH_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to TH Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_TH_out
fig = px.line(
    df_TH_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from TH Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

## SY TR

In [None]:
df_SY_in = df[(df['country_to'] == 'SY') & (df['num_migrants'] != 0)]
df_SY_out = df[(df['country_from'] == 'SY') & (df['num_migrants'] != 0)]

# df_DO_in
fig = px.line(
    df_SY_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to SY Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_SY_out
fig = px.line(
    df_SY_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from SY Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

In [None]:
df_TR_in = df[(df['country_to'] == 'TR') & (df['num_migrants'] != 0)]
df_TR_out = df[(df['country_from'] == 'TR') & (df['num_migrants'] != 0)]

# df_TR_in
fig = px.line(
    df_TR_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to TR Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_TR_out
fig = px.line(
    df_TR_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from TR Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="To Country"
)

fig.show()

# Test: with Baysian

In [None]:
# Step 1: Create unique flow ID (origin -> destination)
df["flow_id"] = df["country_from"] + "_" + df["country_to"]

# Create numeric indices for flows and months
df["flow_index"] = df["flow_id"].astype("category").cat.codes
df["time_index"] = (
    (df["migration_month"].dt.year - df["migration_month"].dt.year.min()) * 12 +
    (df["migration_month"].dt.month - df["migration_month"].dt.month.min())
)

# Check unique flow mappings and time indexing
flow_map = df[["flow_id", "flow_index"]].drop_duplicates().reset_index(drop=True)
time_range = df["migration_month"].min(), df["migration_month"].max()
df

In [None]:
# Your data:
flow_idx = df["flow_index"].values
time_idx = df["time_index"].values
y = df["num_migrants"].values
num_flows = df["flow_index"].nunique()
num_times = df["time_index"].nunique()

with pm.Model() as model:
    mu = pm.Normal("mu", mu=0, sigma=5)
    sigma_flow = pm.HalfNormal("sigma_flow", sigma=2)
    alpha = pm.Normal("alpha", mu=0, sigma=sigma_flow, shape=num_flows)
    sigma_time = pm.HalfNormal("sigma_time", sigma=2)
    beta = pm.Normal("beta", mu=0, sigma=sigma_time, shape=num_times)
    
    log_lambda = mu + alpha[flow_idx] + beta[time_idx]
    lambda_ = pm.math.exp(log_lambda)
    
    y_obs = pm.Poisson("y_obs", mu=lambda_, observed=y)

    trace = pm.sample(2000, tune=2000, target_accept=0.99, 
                      init='jitter+adapt_diag', chains=4, cores=4, max_treedepth=15, return_inferencedata=True)
    
    # trace = pm.sample(500, tune=500, chains=2, cores=2, target_accept=0.9)  # for simple test


| Term            | Meaning                                                                    |
| --------------- | -------------------------------------------------------------------------- |
| **mean**        | Posterior mean (best guess based on all chains)                            |
| **sd**          | Posterior standard deviation (uncertainty)                                 |
| **hdi\_3%-97%** | 94% credible interval (Bayesian version of confidence interval)            |
| **r\_hat**      | Should be \~1.00. Values >1.1 → **non-converged** (⚠️ yours are too high)  |
| **ess\_bulk**   | Effective sample size. You want this to be **much higher** (like >200)     |
| **trace plots** | Chain mixing over time — your `mu` and `sigma_*` are **not well-mixed** 😕 |


In [None]:
az.plot_trace(trace, var_names=["mu", "sigma_flow", "sigma_time"])
az.summary(trace, var_names=["mu", "sigma_flow", "sigma_time"])

In [None]:
# import numpy as np
# import arviz as az

# Extract posterior samples from trace
mu_samples = trace.posterior["mu"].values.flatten()
alpha_samples = trace.posterior["alpha"].values[:, :, 28].flatten()  # flow_index = 36 UK; 28 PL
beta_samples = trace.posterior["beta"].values[:, :, 38].flatten()    # time_index = 41

# Compute log-lambda samples
log_lambda_samples = mu_samples + alpha_samples + beta_samples

# Convert to rate (expected migrant count)
lambda_samples = np.exp(log_lambda_samples)

# Summary statistics
mean_prediction = lambda_samples.mean()
median_prediction = np.median(lambda_samples)
hdi_interval = az.hdi(lambda_samples, hdi_prob=0.94)

print(f"Estimated migrants from UA to UK in 2022-04:")
print(f"  Posterior mean: {mean_prediction:.2f}")
print(f"  Posterior median: {median_prediction:.2f}")
print(f"  94% credible interval: {hdi_interval[0]:.2f} to {hdi_interval[1]:.2f}")


In [None]:
# df[(df['country_from'] == 'UA') & (df['country_to'] == 'PL')]
# df[(df['country_from'] == 'UA') & (df['country_to'] == 'UK')]
df[(df['flow_index'] == 28) & (df['time_index'] == 38)]