# international_migration_flow

This note works to produce the dataset to be put in Mapineq database  
Source: Meta  
Publisher: HDX (https://data.humdata.org/dataset/international-migration-flows)   
New York Times Article: https://www.nytimes.com/interactive/2025/04/17/opinion/global-migration-facebook-data.html   
Special countries: BA, UA, XK, UK/GB, EL/GR 

In [1]:
import os
import gc
import rasterio
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas as gpd
from pathlib import Path
import dask_geopandas as dgpd
from osgeo import gdal, osr
from rasterstats import zonal_stats

BASE_DIR = Path('/Users/wenlanzhang/PycharmProjects/Mapineq/src/data-wrangling/')
DATA_DIR = Path('/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford')

# Load NTUS

In [2]:
# Load a GeoJSON file
gdf_2024 = gpd.read_file(DATA_DIR/"NUTS_RG_01M_2024_3035.geojson")

gdf_country_2024 = gdf_2024[gdf_2024['LEVL_CODE'] == 0]
gdf_country_2024 = gdf_country_2024[['CNTR_CODE', 'geometry']].reset_index(drop=True)
gdf_country_2024

Unnamed: 0,CNTR_CODE,geometry
0,EL,"MULTIPOLYGON (((6083881.558 1676236.011, 60838..."
1,ES,"MULTIPOLYGON (((3815056.85 1904971.998, 381575..."
2,FI,"MULTIPOLYGON (((4999905.463 5305310.537, 50031..."
3,FR,"MULTIPOLYGON (((9980485.23 -3029930.054, 99812..."
4,HR,"MULTIPOLYGON (((4809521.18 2624665.768, 480963..."
5,EE,"MULTIPOLYGON (((5200614.71 4159725.901, 520083..."
6,DE,"MULTIPOLYGON (((4355225.354 2715902.995, 43548..."
7,DK,"MULTIPOLYGON (((4650283.775 3591676.53, 465057..."
8,BA,"MULTIPOLYGON (((4866544.51 2485914.128, 486624..."
9,AT,"MULTIPOLYGON (((4354847.685 2714710.627, 43552..."


In [3]:
# Load a GeoJSON file
gdf_2021 = gpd.read_file(DATA_DIR/"NUTS_RG_01M_2021_3035.geojson")

gdf_country_2021 = gdf_2021[gdf_2021['LEVL_CODE'] == 0]
gdf_country_2021 = gdf_country_2021[['CNTR_CODE', 'geometry']].reset_index(drop=True)
uk_2021 = gdf_country_2021[gdf_country_2021['CNTR_CODE'] == 'UK']
uk_2021

Unnamed: 0,CNTR_CODE,geometry
34,UK,"MULTIPOLYGON (((3546135.14 4022028.934, 354660..."


In [4]:
# Concatenate with 2024 data
gdf_country = pd.concat(
    [gdf_country_2024, uk_2021],
    ignore_index=True  # Reset index
)
gdf_country

Unnamed: 0,CNTR_CODE,geometry
0,EL,"MULTIPOLYGON (((6083881.558 1676236.011, 60838..."
1,ES,"MULTIPOLYGON (((3815056.85 1904971.998, 381575..."
2,FI,"MULTIPOLYGON (((4999905.463 5305310.537, 50031..."
3,FR,"MULTIPOLYGON (((9980485.23 -3029930.054, 99812..."
4,HR,"MULTIPOLYGON (((4809521.18 2624665.768, 480963..."
5,EE,"MULTIPOLYGON (((5200614.71 4159725.901, 520083..."
6,DE,"MULTIPOLYGON (((4355225.354 2715902.995, 43548..."
7,DK,"MULTIPOLYGON (((4650283.775 3591676.53, 465057..."
8,BA,"MULTIPOLYGON (((4866544.51 2485914.128, 486624..."
9,AT,"MULTIPOLYGON (((4354847.685 2714710.627, 43552..."


In [5]:
country_list = gdf_country['CNTR_CODE'].unique()
len(country_list)
country_list

array(['EL', 'ES', 'FI', 'FR', 'HR', 'EE', 'DE', 'DK', 'BA', 'AT', 'BG',
       'CH', 'CY', 'BE', 'CZ', 'AL', 'LU', 'LV', 'ME', 'IE', 'IS', 'IT',
       'MK', 'MT', 'LI', 'NL', 'LT', 'HU', 'RS', 'SE', 'SI', 'TR', 'UA',
       'SK', 'RO', 'NO', 'PL', 'PT', 'XK', 'UK'], dtype=object)

# Load Migration

In [6]:
df = pd.read_csv(DATA_DIR/f"Migration/international_migration_flow.csv") 
df['year'] = pd.to_datetime(df['migration_month']).dt.year
df['month'] = pd.to_datetime(df['migration_month']).dt.month

# Define the mapping of old codes to new codes
country_code_mapping = {
    'GR': 'EL',  # Greece (GR → EL)
    'GB': 'UK'   # United Kingdom (GB → UK)
}

# Apply the replacement to both columns
df['country_from'] = df['country_from'].replace(country_code_mapping)
df['country_to'] = df['country_to'].replace(country_code_mapping)

df
# len(df['country_from'].unique())

Unnamed: 0,country_from,country_to,migration_month,num_migrants,year,month
0,AD,AE,2019-01,12,2019,1
1,AD,AE,2019-02,2,2019,2
2,AD,AE,2019-03,1,2019,3
3,AD,AE,2019-04,7,2019,4
4,AD,AE,2019-05,0,2019,5
...,...,...,...,...,...,...
1563149,ZW,ZM,2022-08,138,2022,8
1563150,ZW,ZM,2022-09,162,2022,9
1563151,ZW,ZM,2022-10,149,2022,10
1563152,ZW,ZM,2022-11,104,2022,11


In [10]:
len(df['country_to'].unique())
# df['country_to'].unique()

# df[df['country_to'].isna()].groupby('country_from').size().unique()
# df[df['country_from'].isna()].groupby('country_to').size().unique()

181

# Country Aggregation

In [28]:
def prepare_flow_data(df_input, flow_type: str, country_scope: str, country_list: list):
    # Determine the column based on flow direction
    flow_col = 'country_to' if flow_type == 'inflow' else 'country_from'

    # Optional: filter for EU countries
    if country_scope == 'eu_countries':
        df_input = df_input[df_input[flow_col].isin(country_list)]

    # Ensure 'month' column exists
    if 'month' not in df_input.columns:
        df_input['month'] = pd.to_datetime(df_input['migration_month']).dt.month

    # Yearly aggregation
    yearly = (
        df_input.groupby([flow_col, 'year'])
        .agg(migration_count_total=('num_migrants', 'sum'))
        .reset_index()
        .assign(month='all')  # Placeholder month
        .rename(columns={flow_col: 'geo', 'year': 'obsTime'})
    )

    # Monthly aggregation
    monthly = (
        df_input.groupby([flow_col, 'year', 'month'])
        .agg(migration_count_total=('num_migrants', 'sum'))
        .reset_index()
        .rename(columns={flow_col: 'geo', 'year': 'obsTime'})
    )

    # Combine both
    combined = pd.concat([yearly, monthly], ignore_index=True)

    # Melt to long format
    melted = combined.melt(
        id_vars=['geo', 'obsTime', 'month'],
        value_vars=['migration_count_total'],
        var_name='time_granularity',
        value_name='obsValue'
    )

    # Clean and tag
    melted['time_granularity'] = melted['time_granularity'].map({
        'migration_count_total': 'Total Migration'
    })
    melted['flow_type'] = flow_type
    melted['country'] = country_scope

    return melted

In [29]:
# Inflow
inflow_all = prepare_flow_data(df, 'inflow', 'all_countries', country_list)
# inflow_eu = prepare_flow_data(df, 'inflow', 'eu_countries', country_list)
# inflow_combined = pd.concat([inflow_all, inflow_eu], ignore_index=True)

# Outflow
outflow_all = prepare_flow_data(df, 'outflow', 'all_countries', country_list)
# outflow_eu = prepare_flow_data(df, 'outflow', 'eu_countries', country_list)
# outflow_combined = pd.concat([outflow_all, outflow_eu], ignore_index=True)

# Combine inflow and outflow into one final dataset
# migration_combined = pd.concat([inflow_combined, outflow_combined], ignore_index=True)
migration_combined = pd.concat([inflow_all, outflow_all], ignore_index=True)
migration_combined

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country
0,AD,2019,all,Total Migration,8390,inflow,all_countries
1,AD,2020,all,Total Migration,7908,inflow,all_countries
2,AD,2021,all,Total Migration,6923,inflow,all_countries
3,AD,2022,all,Total Migration,9685,inflow,all_countries
4,AE,2019,all,Total Migration,1248145,inflow,all_countries
...,...,...,...,...,...,...,...
18715,ZW,2022,8,Total Migration,4339,outflow,all_countries
18716,ZW,2022,9,Total Migration,4867,outflow,all_countries
18717,ZW,2022,10,Total Migration,6280,outflow,all_countries
18718,ZW,2022,11,Total Migration,5907,outflow,all_countries


In [30]:
migration_combined['geo'].unique()

array(['AD', 'AE', 'AF', 'AL', 'AM', 'AO', 'AR', 'AT', 'AU', 'AZ', 'BA',
       'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BN', 'BO', 'BR',
       'BS', 'BT', 'BW', 'BY', 'BZ', 'CA', 'CD', 'CF', 'CG', 'CH', 'CI',
       'CL', 'CM', 'CO', 'CR', 'CV', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DO',
       'DZ', 'EC', 'EE', 'EG', 'EL', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FM',
       'FR', 'GA', 'GD', 'GE', 'GH', 'GM', 'GN', 'GQ', 'GT', 'GW', 'GY',
       'HK', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IQ', 'IS',
       'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KR', 'KW',
       'KZ', 'LA', 'LB', 'LC', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY',
       'MA', 'MD', 'ME', 'MG', 'MK', 'ML', 'MM', 'MN', 'MO', 'MR', 'MT',
       'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NE', 'NG', 'NI', 'NL', 'NO',
       'NP', 'NZ', 'OM', 'PA', 'PE', 'PG', 'PH', 'PK', 'PL', 'PT', 'PY',
       'QA', 'RO', 'RS', 'RU', 'RW', 'SA', 'SB', 'SD', 'SE', 'SG', 'SI',
       'SK', 'SL', 'SN', 'SR', 'SS', 'ST', 'SV', 'S

# Individual Country

In [31]:
# Create inflow DataFrame
inflow_df = df.rename(columns={
    'country_to': 'geo',
    'country_from': 'country'
})
inflow_df['flow_type'] = 'inflow'

# Create outflow DataFrame
outflow_df = df.rename(columns={
    'country_from': 'geo',
    'country_to': 'country',
})
outflow_df['flow_type'] = 'outflow'

# Concatenate both
individual_df = pd.concat([inflow_df, outflow_df], ignore_index=True)

# Optional: reorder columns
individual_df = individual_df.rename(columns={
    'num_migrants': 'obsValue',
    'year': 'obsTime',
})
individual_df['time_granularity'] = 'Monthly'
individual_df = individual_df[['geo', 'obsTime', 'month', 'time_granularity', 'obsValue', 'flow_type', 'country']]
individual_df

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country
0,AE,2019,1,Monthly,12,inflow,AD
1,AE,2019,2,Monthly,2,inflow,AD
2,AE,2019,3,Monthly,1,inflow,AD
3,AE,2019,4,Monthly,7,inflow,AD
4,AE,2019,5,Monthly,0,inflow,AD
...,...,...,...,...,...,...,...
3126303,ZW,2022,8,Monthly,138,outflow,ZM
3126304,ZW,2022,9,Monthly,162,outflow,ZM
3126305,ZW,2022,10,Monthly,149,outflow,ZM
3126306,ZW,2022,11,Monthly,104,outflow,ZM


# Check
Same country, Same year, Same flow direction should have same number

In [13]:
# individual_df[(individual_df['geo'] == 'AE') & (individual_df['obsTime'] == 2019) & (individual_df['flow_type'] == 'inflow') & (individual_df['month'] == 1)]['obsValue'].sum()
individual_df[(individual_df['geo'] == 'AE') & (individual_df['obsTime'] == 2019) & (individual_df['flow_type'] == 'inflow')]['obsValue'].sum()

np.int64(1248145)

In [14]:
migration_combined[(migration_combined['geo'] == 'AE') & (migration_combined['obsTime'] == 2019) & (migration_combined['flow_type'] == 'inflow') & (migration_combined['month'] == 'all')]

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country
4,AE,2019,all,Total Migration,1248145,inflow,all_countries


In [15]:
migration_combined[(migration_combined['geo'] == 'AE') & (migration_combined['obsTime'] == 2019) & (migration_combined['flow_type'] == 'inflow') & (migration_combined['month'] != 'all')]['obsValue'].sum()

np.int64(1248145)

# Final Combine

In [16]:
final = pd.concat([individual_df, migration_combined], ignore_index=True)
final

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country
0,AE,2019,1,Monthly,12,inflow,AD
1,AE,2019,2,Monthly,2,inflow,AD
2,AE,2019,3,Monthly,1,inflow,AD
3,AE,2019,4,Monthly,7,inflow,AD
4,AE,2019,5,Monthly,0,inflow,AD
...,...,...,...,...,...,...,...
3145023,ZW,2022,8,Total Migration,4339,outflow,all_countries
3145024,ZW,2022,9,Total Migration,4867,outflow,all_countries
3145025,ZW,2022,10,Total Migration,6280,outflow,all_countries
3145026,ZW,2022,11,Total Migration,5907,outflow,all_countries


# Merge

In [17]:
merged_migration = final.merge(gdf_country, left_on='geo', right_on='CNTR_CODE', how='inner').drop(columns='CNTR_CODE')

merged_migration['geo_source'] = np.where(
    merged_migration['geo'] == 'UK',  # Condition
    'NUTS2021',                             # Value if True (UK)
    'NUTS2024'                              # Value if False (all others)
)

merged_migration

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country,geometry,geo_source
0,AL,2019,1,Monthly,0,inflow,AD,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
1,AL,2019,2,Monthly,5,inflow,AD,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
2,AL,2019,3,Monthly,2,inflow,AD,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
3,AL,2019,4,Monthly,0,inflow,AD,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
4,AL,2019,5,Monthly,0,inflow,AD,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
...,...,...,...,...,...,...,...,...,...
677297,XK,2022,8,Total Migration,2690,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
677298,XK,2022,9,Total Migration,4658,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
677299,XK,2022,10,Total Migration,6711,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
677300,XK,2022,11,Total Migration,3905,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024


In [18]:
len(merged_migration['geo'].unique())

39

In [19]:
merged_migration[merged_migration['geo'] == 'XK']

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country,geometry,geo_source
1824,XK,2019,1,Monthly,0,inflow,AD,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
1825,XK,2019,2,Monthly,0,inflow,AD,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
1826,XK,2019,3,Monthly,0,inflow,AD,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
1827,XK,2019,4,Monthly,9,inflow,AD,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
1828,XK,2019,5,Monthly,2,inflow,AD,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
...,...,...,...,...,...,...,...,...,...
677297,XK,2022,8,Total Migration,2690,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
677298,XK,2022,9,Total Migration,4658,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
677299,XK,2022,10,Total Migration,6711,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
677300,XK,2022,11,Total Migration,3905,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024


# Export

In [21]:
# # Ensure merged is a GeoDataFrame
gdf_merged = gpd.GeoDataFrame(merged_migration, geometry='geometry')

In [22]:
# # Export without the geometry column
merged_migration.drop(columns='geometry').to_csv(DATA_DIR/"Migration/Output/international_migration.csv", index=True, index_label="id")

In [28]:
# Export with the geometry column
gdf_merged_country = gdf_merged[(gdf_merged['country'] == 'all_countries') & (gdf_merged['month'] != 'all')]
gdf_merged_country.to_csv(DATA_DIR/"Migration/Processed/international_migration_all_country_bymonth.csv", index=True, index_label="id")

In [27]:
gdf_merged_country

Unnamed: 0,geo,obsTime,month,time_granularity,obsValue,flow_type,country,geometry,geo_source
673246,AL,2019,all,Total Migration,33450,inflow,all_countries,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
673247,AL,2020,all,Total Migration,34778,inflow,all_countries,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
673248,AL,2021,all,Total Migration,30131,inflow,all_countries,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
673249,AL,2022,all,Total Migration,28662,inflow,all_countries,"MULTIPOLYGON (((5120936.933 2221189.677, 51208...",NUTS2024
673250,AT,2019,all,Total Migration,103587,inflow,all_countries,"MULTIPOLYGON (((4354847.685 2714710.627, 43552...",NUTS2024
...,...,...,...,...,...,...,...,...,...
675425,UK,2022,all,Total Migration,442728,outflow,all_countries,"MULTIPOLYGON (((3546135.14 4022028.934, 354660...",NUTS2021
675426,XK,2019,all,Total Migration,35189,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
675427,XK,2020,all,Total Migration,31446,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024
675428,XK,2021,all,Total Migration,34511,outflow,all_countries,"POLYGON ((5201301.166 2301673.012, 5202865.563...",NUTS2024


In [24]:
# Reproject only once
gdf_country_ll = gdf_country.to_crs("EPSG:3857")

# Compute centroid and extract coordinates
gdf_country_ll['centroid'] = gdf_country_ll.geometry.centroid
gdf_country_ll['lon'] = gdf_country_ll['centroid'].x
gdf_country_ll['lat'] = gdf_country_ll['centroid'].y

country_coords = gdf_country_ll[['CNTR_CODE', 'lon', 'lat']].copy()
country_coords

gdf_country_ll

Unnamed: 0,CNTR_CODE,geometry,centroid,lon,lat
0,EL,"MULTIPOLYGON (((3298938.638 4315586.622, 32988...",POINT (2557818.465 4735784.59),2557818.0,4735785.0
1,ES,"MULTIPOLYGON (((455733.008 4874933.93, 456795....",POINT (-403205.361 4915380.798),-403205.4,4915381.0
2,FI,"MULTIPOLYGON (((3101760.532 11095778.494, 3112...",POINT (2923520.484 9578864.637),2923520.0,9578865.0
3,FR,"MULTIPOLYGON (((6172016.137 -2376935.15, 61727...",POINT (-138045.184 5490616.641),-138045.2,5490617.0
4,HR,"MULTIPOLYGON (((1822508.409 5868370.87, 182262...",POINT (1826178.186 5630635.442),1826178.0,5630635.0
5,EE,"MULTIPOLYGON (((2862671.48 8323714.126, 286289...",POINT (2843518.811 8111840.027),2843519.0,8111840.0
6,DE,"MULTIPOLYGON (((1163782.809 6033270.891, 11632...",POINT (1157958.025 6660331.023),1157958.0,6660331.0
7,DK,"MULTIPOLYGON (((1690946.745 7424862.027, 16914...",POINT (1118425.905 7554263.825),1118426.0,7554264.0
8,BA,"MULTIPOLYGON (((1886912.567 5661971.675, 18864...",POINT (1979510.332 5492972.626),1979510.0,5492973.0
9,AT,"MULTIPOLYGON (((1163214.211 6031504.349, 11637...",POINT (1575016.481 6040108.883),1575016.0,6040109.0


In [25]:
# Merge origin country (FROM)
df_from = df.merge(country_coords, left_on='country_from', right_on='CNTR_CODE', how='left')
df_from.rename(columns={'lon': 'start_lon', 'lat': 'start_lat'}, inplace=True)
df_from.drop(columns='CNTR_CODE', inplace=True)  # Drop to avoid conflict in next merge
df_from

Unnamed: 0,country_from,country_to,migration_month,num_migrants,year,month,start_lon,start_lat
0,AD,AE,2019-01,12,2019,1,,
1,AD,AE,2019-02,2,2019,2,,
2,AD,AE,2019-03,1,2019,3,,
3,AD,AE,2019-04,7,2019,4,,
4,AD,AE,2019-05,0,2019,5,,
...,...,...,...,...,...,...,...,...
1563149,ZW,ZM,2022-08,138,2022,8,,
1563150,ZW,ZM,2022-09,162,2022,9,,
1563151,ZW,ZM,2022-10,149,2022,10,,
1563152,ZW,ZM,2022-11,104,2022,11,,


In [26]:
# Merge destination country (TO)
df_to = df_from.merge(country_coords, left_on='country_to', right_on='CNTR_CODE', how='left')
df_to.rename(columns={'lon': 'end_lon', 'lat': 'end_lat'}, inplace=True)
df_to.drop(columns='CNTR_CODE', inplace=True)
df_to

Unnamed: 0,country_from,country_to,migration_month,num_migrants,year,month,start_lon,start_lat,end_lon,end_lat
0,AD,AE,2019-01,12,2019,1,,,,
1,AD,AE,2019-02,2,2019,2,,,,
2,AD,AE,2019-03,1,2019,3,,,,
3,AD,AE,2019-04,7,2019,4,,,,
4,AD,AE,2019-05,0,2019,5,,,,
...,...,...,...,...,...,...,...,...,...,...
1563149,ZW,ZM,2022-08,138,2022,8,,,,
1563150,ZW,ZM,2022-09,162,2022,9,,,,
1563151,ZW,ZM,2022-10,149,2022,10,,,,
1563152,ZW,ZM,2022-11,104,2022,11,,,,


In [27]:
clean_df = df_to.dropna(subset=['start_lon', 'start_lat', 'end_lon', 'end_lat'])
clean_df

Unnamed: 0,country_from,country_to,migration_month,num_migrants,year,month,start_lon,start_lat,end_lon,end_lat
26206,AL,AT,2019-01,34,2019,1,2.233391e+06,5.034732e+06,1.575016e+06,6.040109e+06
26207,AL,AT,2019-02,23,2019,2,2.233391e+06,5.034732e+06,1.575016e+06,6.040109e+06
26208,AL,AT,2019-03,36,2019,3,2.233391e+06,5.034732e+06,1.575016e+06,6.040109e+06
26209,AL,AT,2019-04,46,2019,4,2.233391e+06,5.034732e+06,1.575016e+06,6.040109e+06
26210,AL,AT,2019-05,26,2019,5,2.233391e+06,5.034732e+06,1.575016e+06,6.040109e+06
...,...,...,...,...,...,...,...,...,...,...
1527965,XK,UA,2022-08,8,2022,8,2.325391e+06,5.248771e+06,3.490131e+06,6.291143e+06
1527966,XK,UA,2022-09,12,2022,9,2.325391e+06,5.248771e+06,3.490131e+06,6.291143e+06
1527967,XK,UA,2022-10,2,2022,10,2.325391e+06,5.248771e+06,3.490131e+06,6.291143e+06
1527968,XK,UA,2022-11,6,2022,11,2.325391e+06,5.248771e+06,3.490131e+06,6.291143e+06


In [28]:
clean_df.to_csv(DATA_DIR/"Migration/Processed/Individual_Flow_3857.csv", index=True)