paper: https://link.springer.com/article/10.1007/s11111-024-00452-9#Sec10

Prophet: https://facebook.github.io/prophet/docs/quick_start.html#python-api  
github: https://github.com/facebook/prophet  

| **Year**    | **Country/Region** | **Event / Policy**                     | **Migration Impact**                                                 |
| ----------- | ------------------ | -------------------------------------- | -------------------------------------------------------------------- |
| **2019**    | Argentina          | Severe financial crisis; inflation 54% | Argentinians start emigrating; fewer Venezuelans choose Argentina    |
|             | Venezuela          | Hyperinflation + shortages             | Mass exodus (\~4M migrants)                                          |
|             | Chile/Peru/Ecuador | Visa rules tighten                     | Venezuelan migration shifts patterns                                 |
|             | US–Mexico          | MPP (“Remain in Mexico”)               | Slows U.S. asylum, traps many                                        |
| **2020**    | Regional           | COVID closures                         | Migration slows temporarily                                          |
|             | U.S.               | Title 42 begins                        | Migrant expulsions surge                                             |
| **2021 Q1** | Colombia           | TPS regularization for Venezuelans     | Encourages local settlement                                          |
| **2021 Q2** | Chile              | New immigration law (April)            | Triggers Venezuelan & Haitian outflows                               |
| **2021 Q3** | Haiti              | Assassination + earthquake             | Haitian migration peaks                                              |
| **2022 Q1** | Argentina          | Inflation accelerates (\~55%)          | Argentinians emigrate; Venezuelans in Argentina begin leaving        |
| **2022 Q3** | Ecuador            | Venezuelan regularization              | Slows outflow temporarily                                            |
| **2022 Q4** | US–Mexico          | Title 42 expanded to Venezuelans       | Thousands stranded in Mexico                                         |
|             | Argentina          | Inflation \~95%, IMF tensions          | Argentinian emigration accelerates sharply                           |

In [1]:
import os
import gc
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path

# Geospatial
import geopandas as gpd
import dask_geopandas as dgpd
import rasterio
from rasterstats import zonal_stats
from osgeo import gdal, osr

# Statistics & Bayesian modeling
import pymc as pm
import arviz as az

# Visualisation
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Base directories
BASE_DIR = Path("/Users/wenlanzhang/PycharmProjects/Mapineq/src/")
DATA_DIR = Path("/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford")

country_list = ['CL', 'BR', 'ES', 'PE', 'US', 'EC', 'AR'] # 'CO', 

In [2]:
df = pd.read_csv(DATA_DIR/f"Migration/international_migration_flow.csv") 
df['year'] = pd.to_datetime(df['migration_month']).dt.year
df['month'] = pd.to_datetime(df['migration_month']).dt.month
df["migration_month"] = pd.to_datetime(df["migration_month"])

# # Define the mapping of old codes to new codes
# country_code_mapping = {
#     'GR': 'EL',  # Greece (GR → EL)
#     'GB': 'UK'   # United Kingdom (GB → UK)
# }

# # Apply the replacement to both columns
# df['country_from'] = df['country_from'].replace(country_code_mapping)
# df['country_to'] = df['country_to'].replace(country_code_mapping)

df
# len(df['country_from'].unique())

Unnamed: 0,country_from,country_to,migration_month,num_migrants,year,month
0,AD,AE,2019-01-01,12,2019,1
1,AD,AE,2019-02-01,2,2019,2
2,AD,AE,2019-03-01,1,2019,3
3,AD,AE,2019-04-01,7,2019,4
4,AD,AE,2019-05-01,0,2019,5
...,...,...,...,...,...,...
1563149,ZW,ZM,2022-08-01,138,2022,8
1563150,ZW,ZM,2022-09-01,162,2022,9
1563151,ZW,ZM,2022-10-01,149,2022,10
1563152,ZW,ZM,2022-11-01,104,2022,11


# Test: with Baysian

In [None]:
# Step 1: Create unique flow ID (origin -> destination)
df["flow_id"] = df["country_from"] + "_" + df["country_to"]

# Create numeric indices for flows and months
df["flow_index"] = df["flow_id"].astype("category").cat.codes
df["time_index"] = (
    (df["migration_month"].dt.year - df["migration_month"].dt.year.min()) * 12 +
    (df["migration_month"].dt.month - df["migration_month"].dt.month.min())
)

# Check unique flow mappings and time indexing
flow_map = df[["flow_id", "flow_index"]].drop_duplicates().reset_index(drop=True)
time_range = df["migration_month"].min(), df["migration_month"].max()
df

In [None]:
# Your data:
flow_idx = df["flow_index"].values
time_idx = df["time_index"].values
y = df["num_migrants"].values
num_flows = df["flow_index"].nunique()
num_times = df["time_index"].nunique()

with pm.Model() as model:
    mu = pm.Normal("mu", mu=0, sigma=5)
    sigma_flow = pm.HalfNormal("sigma_flow", sigma=2)
    alpha = pm.Normal("alpha", mu=0, sigma=sigma_flow, shape=num_flows)
    sigma_time = pm.HalfNormal("sigma_time", sigma=2)
    beta = pm.Normal("beta", mu=0, sigma=sigma_time, shape=num_times)
    
    log_lambda = mu + alpha[flow_idx] + beta[time_idx]
    lambda_ = pm.math.exp(log_lambda)
    
    y_obs = pm.Poisson("y_obs", mu=lambda_, observed=y)

    trace = pm.sample(2000, tune=2000, target_accept=0.99, 
                      init='jitter+adapt_diag', chains=4, cores=4, max_treedepth=15, return_inferencedata=True)
    
    # trace = pm.sample(500, tune=500, chains=2, cores=2, target_accept=0.9)  # for simple test


| Term            | Meaning                                                                    |
| --------------- | -------------------------------------------------------------------------- |
| **mean**        | Posterior mean (best guess based on all chains)                            |
| **sd**          | Posterior standard deviation (uncertainty)                                 |
| **hdi\_3%-97%** | 94% credible interval (Bayesian version of confidence interval)            |
| **r\_hat**      | Should be \~1.00. Values >1.1 → **non-converged** (⚠️ yours are too high)  |
| **ess\_bulk**   | Effective sample size. You want this to be **much higher** (like >200)     |
| **trace plots** | Chain mixing over time — your `mu` and `sigma_*` are **not well-mixed** 😕 |


In [None]:
az.plot_trace(trace, var_names=["mu", "sigma_flow", "sigma_time"])
az.summary(trace, var_names=["mu", "sigma_flow", "sigma_time"])

In [None]:
# import numpy as np
# import arviz as az

# Extract posterior samples from trace
mu_samples = trace.posterior["mu"].values.flatten()
alpha_samples = trace.posterior["alpha"].values[:, :, 28].flatten()  # flow_index = 36 UK; 28 PL
beta_samples = trace.posterior["beta"].values[:, :, 38].flatten()    # time_index = 41

# Compute log-lambda samples
log_lambda_samples = mu_samples + alpha_samples + beta_samples

# Convert to rate (expected migrant count)
lambda_samples = np.exp(log_lambda_samples)

# Summary statistics
mean_prediction = lambda_samples.mean()
median_prediction = np.median(lambda_samples)
hdi_interval = az.hdi(lambda_samples, hdi_prob=0.94)

print(f"Estimated migrants from UA to UK in 2022-04:")
print(f"  Posterior mean: {mean_prediction:.2f}")
print(f"  Posterior median: {median_prediction:.2f}")
print(f"  94% credible interval: {hdi_interval[0]:.2f} to {hdi_interval[1]:.2f}")


In [None]:
# df[(df['country_from'] == 'UA') & (df['country_to'] == 'PL')]
# df[(df['country_from'] == 'UA') & (df['country_to'] == 'UK')]
df[(df['flow_index'] == 28) & (df['time_index'] == 38)]