paper: https://link.springer.com/article/10.1007/s11111-024-00452-9#Sec10

Prophet: https://facebook.github.io/prophet/docs/quick_start.html#python-api  
github: https://github.com/facebook/prophet  

| Feature                            | **Prophet**                                    | **STL**                             |
| ---------------------------------- | ---------------------------------------------- | ----------------------------------- |
| **Purpose**                        | Forecasting + decomposition                    | Decomposition only (no forecasting) |
| **Components**                     | Trend + Seasonality + Holidays                 | Trend + Seasonality + Residuals     |
| **Handles missing data?**          | Yes                                            | No (must handle manually)           |
| **Handles irregular time series?** | Yes                                            | No (requires evenly spaced data)    |
| **Forecasting**                    | Yes (core feature)                             | ❌ No forecasting                    |
| **Custom seasonality/holidays**    | Yes (add holidays, custom cycles)              | No                                  |
| **Trend modeling**                 | Piecewise linear or logistic with changepoints | LOESS smoothing (local regression)  |
| **Built-in in Python via**         | `prophet` package                              | `statsmodels.tsa.seasonal.STL`      |


In [None]:
import os
import gc
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path

# Geospatial
import geopandas as gpd
import dask_geopandas as dgpd
import rasterio
from rasterstats import zonal_stats
from osgeo import gdal, osr

# Statistics & Bayesian modeling
import pymc as pm
import arviz as az

# Visualisation
import matplotlib.pyplot as plt
import plotly.express as px

# Base directories
BASE_DIR = Path("/Users/wenlanzhang/PycharmProjects/Mapineq/src/")
DATA_DIR = Path("/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford")

In [None]:
df = pd.read_csv(DATA_DIR/f"Migration/international_migration_flow.csv") 
df['year'] = pd.to_datetime(df['migration_month']).dt.year
df['month'] = pd.to_datetime(df['migration_month']).dt.month
df["migration_month"] = pd.to_datetime(df["migration_month"])

# Define the mapping of old codes to new codes
country_code_mapping = {
    'GR': 'EL',  # Greece (GR → EL)
    'GB': 'UK'   # United Kingdom (GB → UK)
}

# Apply the replacement to both columns
df['country_from'] = df['country_from'].replace(country_code_mapping)
df['country_to'] = df['country_to'].replace(country_code_mapping)

df
# len(df['country_from'].unique())

In [None]:
# country_list = ['EL', 'ES', 'FI', 'FR', 'HR', 'EE', 'DE', 'DK', 'BA', 'AT', 'BG',
#        'CH', 'CY', 'BE', 'CZ', 'AL', 'LU', 'LV', 'ME', 'IE', 'IS', 'IT',
#        'MK', 'MT', 'LI', 'NL', 'LT', 'HU', 'RS', 'SE', 'SI', 'TR', 'UA',
#        'SK', 'RO', 'NO', 'PL', 'PT', 'XK', 'UK']
# # df = df[(df['country_from'].isin(country_list)) | (df['country_to'].isin(country_list))]
# # df = df[(df['country_from'].isin(country_list)) & (df['country_to'].isin(country_list))]

# # df = df[(df['country_from'] == 'UA') & (df['country_to'].isin(country_list))]
# # df = df[df['num_migrants'] != 0]
# df = df[df['num_migrants'] >= 100]

# df

# Time Series

In [None]:
df_ve_in = df[(df['country_to'] == 'VE') & (df['num_migrants'] != 0)]
df_ve_out = df[(df['country_from'] == 'VE') & (df['num_migrants'] != 0)]

In [None]:
# df_ve_in
fig = px.line(
    df_ve_in,
    x="migration_month",
    y="num_migrants",
    color="country_from", 
    markers=True,
    title="Migrants to VE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_ve_out
fig = px.line(
    df_ve_out,
    x="migration_month",
    y="num_migrants",
    color="country_to", 
    markers=True,
    title="Migrants from VE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

# Top over the 4 years

In [None]:
out_migration_totals = df_ve_out.groupby('country_to')['num_migrants'].sum().sort_values(ascending=False)
out_migration_totals

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(out_migration_totals, bins=30, edgecolor='black')

plt.title('Distribution of Migrant Counts per Destination Country')
plt.xlabel('Number of Migrants')
plt.ylabel('Number of Countries')
plt.tight_layout()
plt.show()

In [None]:
threshold = 0.05 * 1_000_000  # 200,000 migrants
major_countries = out_migration_totals[out_migration_totals > threshold]
others_sum = out_migration_totals[out_migration_totals < threshold].sum()
major_countries['Other'] = others_sum

major_countries

In [None]:
df_ve_out_top =  df_ve_out[df_ve_out['migration_month'] == '2022-02-01'].sort_values(by='num_migrants', ascending=False).head(11)
df_ve_out_top[['country_to', 'num_migrants']]

# Top ranked on 2022-02-01

In [None]:
df_month = df_ve_out[df_ve_out['migration_month'] == '2022-02-01']

migration_totals = df_month.groupby('country_to')['num_migrants'].sum()
sorted_totals = migration_totals.sort_values(ascending=False)

top_10 = sorted_totals.head(8)
others_sum = sorted_totals.iloc[8:].sum()

# Use pd.concat instead of append
migration_final = pd.concat([top_10, pd.Series({'Other': others_sum})])

migration_final

In [None]:
# 1. Filter only rows where destination is UA
df_ua_in = df[df['country_to'] == 'UA']
df_ua_out = df[df['country_from'] == 'UA']

In [None]:
# Plot one line per origin country
plt.figure(figsize=(8, 5))
for country in df_ua_in['country_from'].unique():
    subset = df_ua_in[df_ua_in['country_from'] == country]
    plt.plot(subset['migration_month'], subset['num_migrants'], label=country)
    # plt.plot(subset['migration_month'], subset['num_migrants'], marker='o', label=country)

# 3. Add labels and style
plt.title("Migrants to UA Over Time")
plt.xlabel("Month")
plt.ylabel("Number of Migrants")
# plt.legend(title="From Country")
plt.grid(True)
plt.tight_layout()

plt.show()

In [None]:
# df_ua_in
fig = px.line(
    df_ua_in,
    x="migration_month",
    y="num_migrants",
    color="country_from",  # separate line for each origin country
    markers=True,
    title="Migrants to UA Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_ua_out
fig = px.line(
    df_ua_out,
    x="migration_month",
    y="num_migrants",
    color="country_to",  # separate line for each origin country
    markers=True,
    title="Migrants from UA Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_pl_out
df_pl_out = df[(df['country_from'] == 'PL') & (df['num_migrants'] != 0)]

fig = px.line(
    df_pl_out,
    x="migration_month",
    y="num_migrants",
    color="country_to",  # separate line for each origin country
    markers=True,
    title="Migrants from PL Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_de_out
df_de_out = df[(df['country_from'] == 'DE') & (df['num_migrants'] != 0)]

fig = px.line(
    df_de_out,
    x="migration_month",
    y="num_migrants",
    color="country_to",  # separate line for each origin country
    markers=True,
    title="Migrants from DE Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# df_cz_out
df_cz_out = df[(df['country_from'] == 'CZ') & (df['num_migrants'] != 0)]

fig = px.line(
    df_cz_out,
    x="migration_month",
    y="num_migrants",
    color="country_to",  # separate line for each origin country
    markers=True,
    title="Migrants from CZ Over Time"
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Migrants",
    legend_title="From Country"
)

fig.show()

In [None]:
# Python
df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv')
df.head()
# 2905 rows

In [None]:
m = Prophet()
m.fit(df)

In [None]:
future = m.make_future_dataframe(periods=365)
future.tail()
# 3270 row

In [None]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:
fig1 = m.plot(forecast)

In [None]:
fig2 = m.plot_components(forecast)

In [None]:
plot_plotly(m, forecast)

In [None]:
plot_components_plotly(m, forecast)