## Applying Negative Binomial Regression

## Model Specification
- #### Dependent variable:
    - Y: The migration flow (between Region)
- ####  Independent variables:
    - ΔRisk (x₁): Difference in climate risk between the destination and the origin
    - ΔHousing (x₂): Difference in housing affordability between the destination and the origin
    - ΔUnemployment (x₃): Difference in unemployment rate between the destination and the origin

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
# Disable scientific notation & add commas
pd.set_option('display.float_format', '{:,.2f}'.format)
# Show all columns without truncation
pd.set_option('display.max_columns', None)

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
import pandas as pd
flow_data = pd.read_csv('../../data/final/flow_data_final.csv')

In [3]:
flow_data.shape

(88888, 410)

In [4]:
flow = flow_data['Destination_Code'].unique()
len(flow)

392

In [5]:
flow_data.head()

Unnamed: 0,Origin_Code,Destination_Code,Origin_Name,Destination_Name,Origin_State,Destination_State,Origin_Region,Destination_Region,Flow,Flow_MOE,Origin_Crosswalk2023_CBSA Code,Origin_Crosswalk2023_CBSA Title,Origin_Crosswalk2023_State Name,Origin_NRI_AVLN_EALPE,Origin_NRI_CFLD_EALPE,Origin_NRI_CWAV_EALPE,Origin_NRI_ERQK_EALPE,Origin_NRI_HAIL_EALPE,Origin_NRI_HWAV_EALPE,Origin_NRI_HRCN_EALPE,Origin_NRI_ISTM_EALPE,Origin_NRI_LNDS_EALPE,Origin_NRI_LTNG_EALPE,Origin_NRI_RFLD_EALPE,Origin_NRI_SWND_EALPE,Origin_NRI_TRND_EALPE,Origin_NRI_TSUN_EALPE,Origin_NRI_VLCN_EALPE,Origin_NRI_WFIR_EALPE,Origin_NRI_WNTW_EALPE,Origin_NRI_AVLN_EALB,Origin_NRI_CFLD_EALB,Origin_NRI_CWAV_EALB,Origin_NRI_ERQK_EALB,Origin_NRI_HAIL_EALB,Origin_NRI_HWAV_EALB,Origin_NRI_HRCN_EALB,Origin_NRI_ISTM_EALB,Origin_NRI_LNDS_EALB,Origin_NRI_LTNG_EALB,Origin_NRI_RFLD_EALB,Origin_NRI_SWND_EALB,Origin_NRI_TRND_EALB,Origin_NRI_TSUN_EALB,Origin_NRI_VLCN_EALB,Origin_NRI_WFIR_EALB,Origin_NRI_WNTW_EALB,Origin_NRI_AVLN_EXPPE,Origin_NRI_CFLD_EXPPE,Origin_NRI_CWAV_EXPPE,Origin_NRI_ERQK_EXPPE,Origin_NRI_HAIL_EXPPE,Origin_NRI_HWAV_EXPPE,Origin_NRI_HRCN_EXPPE,Origin_NRI_ISTM_EXPPE,Origin_NRI_LNDS_EXPPE,Origin_NRI_LTNG_EXPPE,Origin_NRI_RFLD_EXPPE,Origin_NRI_SWND_EXPPE,Origin_NRI_TRND_EXPPE,Origin_NRI_TSUN_EXPPE,Origin_NRI_VLCN_EXPPE,Origin_NRI_WFIR_EXPPE,Origin_NRI_WNTW_EXPPE,Origin_NRI_AVLN_EXPB,Origin_NRI_CFLD_EXPB,Origin_NRI_CWAV_EXPB,Origin_NRI_ERQK_EXPB,Origin_NRI_HAIL_EXPB,Origin_NRI_HWAV_EXPB,Origin_NRI_HRCN_EXPB,Origin_NRI_ISTM_EXPB,Origin_NRI_LNDS_EXPB,Origin_NRI_LTNG_EXPB,Origin_NRI_RFLD_EXPB,Origin_NRI_SWND_EXPB,Origin_NRI_TRND_EXPB,Origin_NRI_TSUN_EXPB,Origin_NRI_VLCN_EXPB,Origin_NRI_WFIR_EXPB,Origin_NRI_WNTW_EXPB,Origin_NRI_BUILDVALUE,Origin_NRI_POPULATION,Origin_NRI_AREA,Origin_Migration16to20_A_Inflow_Estimate,Origin_Migration16to20_A_Outflow_Estimate,Origin_Migration16to20_A_NetMigration_Estimate,Origin_Migration16to20_A_GrossMigration_Estimate,Origin_Migration16to20_A_Inflow_MOE,Origin_Migration16to20_A_Outflow_MOE,Origin_Migration16to20_A_NetMigration_MOE,Origin_Migration16to20_A_GrossMigration_MOE,Origin_HousingPI25_Metro Area,Origin_HousingPI25_GEOID.1,Origin_HousingPI25_1980,Origin_HousingPI25_1981,Origin_HousingPI25_1982,Origin_HousingPI25_1983,Origin_HousingPI25_1984,Origin_HousingPI25_1985,Origin_HousingPI25_1986,Origin_HousingPI25_1987,Origin_HousingPI25_1988,Origin_HousingPI25_1989,Origin_HousingPI25_1990,Origin_HousingPI25_1991,Origin_HousingPI25_1992,Origin_HousingPI25_1993,Origin_HousingPI25_1994,Origin_HousingPI25_1995,Origin_HousingPI25_1996,Origin_HousingPI25_1997,Origin_HousingPI25_1998,Origin_HousingPI25_1999,Origin_HousingPI25_2000,Origin_HousingPI25_2001,Origin_HousingPI25_2002,Origin_HousingPI25_2003,Origin_HousingPI25_2004,Origin_HousingPI25_2005,Origin_HousingPI25_2006,Origin_HousingPI25_2007,Origin_HousingPI25_2008,Origin_HousingPI25_2009,Origin_HousingPI25_2010,Origin_HousingPI25_2011,Origin_HousingPI25_2012,Origin_HousingPI25_2013,Origin_HousingPI25_2014,Origin_HousingPI25_2015,Origin_HousingPI25_2016,Origin_HousingPI25_2017,Origin_HousingPI25_2018,Origin_HousingPI25_2019,Origin_HousingPI25_2020,Origin_HousingPI25_2021,Origin_HousingPI25_2022,Origin_HousingPI25_2023,Origin_HousingPI25_2024,Origin_Labor_Force_2015,Origin_Labor_Force_2016,Origin_Labor_Force_2017,Origin_Labor_Force_2018,Origin_Labor_Force_2019,Origin_Labor_Force_2020,Origin_Labor_Force_2021,Origin_Labor_Force_2022,Origin_Labor_Force_2023,Origin_Labor_Force_2024,Origin_Employed_2015,Origin_Employed_2016,Origin_Employed_2017,Origin_Employed_2018,Origin_Employed_2019,Origin_Employed_2020,Origin_Employed_2021,Origin_Employed_2022,Origin_Employed_2023,Origin_Employed_2024,Origin_Unemployed_2015,Origin_Unemployed_2016,Origin_Unemployed_2017,Origin_Unemployed_2018,Origin_Unemployed_2019,Origin_Unemployed_2020,Origin_Unemployed_2021,Origin_Unemployed_2022,Origin_Unemployed_2023,Origin_Unemployed_2024,Origin_MSA_Name,Origin_Current_Residence_Population,Origin_Current_Residence_Population_MOE,Origin_Current_Residence_Nonmovers,Origin_Current_Residence_Nonmovers_MOE,Origin_Current_Residence_Movers_within_MSA,Origin_Current_Residence_Movers_within_MSA_MOE,Origin_Current_Residence_Movers_different_MSA,Origin_Current_Residence_Movers_different_MSA_MOE,Origin_Current_Residence_elsewhere_PR,Origin_Current_Residence_elsewhere_PR_MOE,Origin_Current_Residence_Movers_abroad,Origin_Current_Residence_Movers_abroad_MOE,Origin_total_population_equiv,Origin_exposure_pop_total,Origin_pct_pop_exposed,Origin_loss_pop_total,Origin_pct_pop_loss,Origin_building_equiv,Origin_exposure_bldg_total,Origin_pct_bldg_exposed,Origin_loss_bldg_total,Origin_pct_bldg_loss,Origin_loss_per_capita_pe,Origin_loss_per_capita_bldg,Origin_climate_exposure_total,Origin_climate_loss_total,Origin_Unemployment_Rate_2015,Origin_Unemployment_Rate_2016,Origin_Unemployment_Rate_2017,Origin_Unemployment_Rate_2018,Origin_Unemployment_Rate_2019,Origin_Unemployment_Rate_2020,Origin_Unemployment_Rate_2021,Origin_Unemployment_Rate_2022,Origin_Unemployment_Rate_2023,Origin_Unemployment_Rate_2024,Origin_Unemployed_2016_2020_sum,Origin_LaborForce_2016_2020_sum,Origin_avg_unemployment_rate_16_20,Origin_HousingPI25_2016_2020_mean,Destination_Crosswalk2023_CBSA Code,Destination_Crosswalk2023_CBSA Title,Destination_Crosswalk2023_State Name,Destination_NRI_AVLN_EALPE,Destination_NRI_CFLD_EALPE,Destination_NRI_CWAV_EALPE,Destination_NRI_ERQK_EALPE,Destination_NRI_HAIL_EALPE,Destination_NRI_HWAV_EALPE,Destination_NRI_HRCN_EALPE,Destination_NRI_ISTM_EALPE,Destination_NRI_LNDS_EALPE,Destination_NRI_LTNG_EALPE,Destination_NRI_RFLD_EALPE,Destination_NRI_SWND_EALPE,Destination_NRI_TRND_EALPE,Destination_NRI_TSUN_EALPE,Destination_NRI_VLCN_EALPE,Destination_NRI_WFIR_EALPE,Destination_NRI_WNTW_EALPE,Destination_NRI_AVLN_EALB,Destination_NRI_CFLD_EALB,Destination_NRI_CWAV_EALB,Destination_NRI_ERQK_EALB,Destination_NRI_HAIL_EALB,Destination_NRI_HWAV_EALB,Destination_NRI_HRCN_EALB,Destination_NRI_ISTM_EALB,Destination_NRI_LNDS_EALB,Destination_NRI_LTNG_EALB,Destination_NRI_RFLD_EALB,Destination_NRI_SWND_EALB,Destination_NRI_TRND_EALB,Destination_NRI_TSUN_EALB,Destination_NRI_VLCN_EALB,Destination_NRI_WFIR_EALB,Destination_NRI_WNTW_EALB,Destination_NRI_AVLN_EXPPE,Destination_NRI_CFLD_EXPPE,Destination_NRI_CWAV_EXPPE,Destination_NRI_ERQK_EXPPE,Destination_NRI_HAIL_EXPPE,Destination_NRI_HWAV_EXPPE,Destination_NRI_HRCN_EXPPE,Destination_NRI_ISTM_EXPPE,Destination_NRI_LNDS_EXPPE,Destination_NRI_LTNG_EXPPE,Destination_NRI_RFLD_EXPPE,Destination_NRI_SWND_EXPPE,Destination_NRI_TRND_EXPPE,Destination_NRI_TSUN_EXPPE,Destination_NRI_VLCN_EXPPE,Destination_NRI_WFIR_EXPPE,Destination_NRI_WNTW_EXPPE,Destination_NRI_AVLN_EXPB,Destination_NRI_CFLD_EXPB,Destination_NRI_CWAV_EXPB,Destination_NRI_ERQK_EXPB,Destination_NRI_HAIL_EXPB,Destination_NRI_HWAV_EXPB,Destination_NRI_HRCN_EXPB,Destination_NRI_ISTM_EXPB,Destination_NRI_LNDS_EXPB,Destination_NRI_LTNG_EXPB,Destination_NRI_RFLD_EXPB,Destination_NRI_SWND_EXPB,Destination_NRI_TRND_EXPB,Destination_NRI_TSUN_EXPB,Destination_NRI_VLCN_EXPB,Destination_NRI_WFIR_EXPB,Destination_NRI_WNTW_EXPB,Destination_NRI_BUILDVALUE,Destination_NRI_POPULATION,Destination_NRI_AREA,Destination_Migration16to20_A_Inflow_Estimate,Destination_Migration16to20_A_Outflow_Estimate,Destination_Migration16to20_A_NetMigration_Estimate,Destination_Migration16to20_A_GrossMigration_Estimate,Destination_Migration16to20_A_Inflow_MOE,Destination_Migration16to20_A_Outflow_MOE,Destination_Migration16to20_A_NetMigration_MOE,Destination_Migration16to20_A_GrossMigration_MOE,Destination_HousingPI25_Metro Area,Destination_HousingPI25_GEOID.1,Destination_HousingPI25_1980,Destination_HousingPI25_1981,Destination_HousingPI25_1982,Destination_HousingPI25_1983,Destination_HousingPI25_1984,Destination_HousingPI25_1985,Destination_HousingPI25_1986,Destination_HousingPI25_1987,Destination_HousingPI25_1988,Destination_HousingPI25_1989,Destination_HousingPI25_1990,Destination_HousingPI25_1991,Destination_HousingPI25_1992,Destination_HousingPI25_1993,Destination_HousingPI25_1994,Destination_HousingPI25_1995,Destination_HousingPI25_1996,Destination_HousingPI25_1997,Destination_HousingPI25_1998,Destination_HousingPI25_1999,Destination_HousingPI25_2000,Destination_HousingPI25_2001,Destination_HousingPI25_2002,Destination_HousingPI25_2003,Destination_HousingPI25_2004,Destination_HousingPI25_2005,Destination_HousingPI25_2006,Destination_HousingPI25_2007,Destination_HousingPI25_2008,Destination_HousingPI25_2009,Destination_HousingPI25_2010,Destination_HousingPI25_2011,Destination_HousingPI25_2012,Destination_HousingPI25_2013,Destination_HousingPI25_2014,Destination_HousingPI25_2015,Destination_HousingPI25_2016,Destination_HousingPI25_2017,Destination_HousingPI25_2018,Destination_HousingPI25_2019,Destination_HousingPI25_2020,Destination_HousingPI25_2021,Destination_HousingPI25_2022,Destination_HousingPI25_2023,Destination_HousingPI25_2024,Destination_Labor_Force_2015,Destination_Labor_Force_2016,Destination_Labor_Force_2017,Destination_Labor_Force_2018,Destination_Labor_Force_2019,Destination_Labor_Force_2020,Destination_Labor_Force_2021,Destination_Labor_Force_2022,Destination_Labor_Force_2023,Destination_Labor_Force_2024,Destination_Employed_2015,Destination_Employed_2016,Destination_Employed_2017,Destination_Employed_2018,Destination_Employed_2019,Destination_Employed_2020,Destination_Employed_2021,Destination_Employed_2022,Destination_Employed_2023,Destination_Employed_2024,Destination_Unemployed_2015,Destination_Unemployed_2016,Destination_Unemployed_2017,Destination_Unemployed_2018,Destination_Unemployed_2019,Destination_Unemployed_2020,Destination_Unemployed_2021,Destination_Unemployed_2022,Destination_Unemployed_2023,Destination_Unemployed_2024,Destination_MSA_Name,Destination_Current_Residence_Population,Destination_Current_Residence_Population_MOE,Destination_Current_Residence_Nonmovers,Destination_Current_Residence_Nonmovers_MOE,Destination_Current_Residence_Movers_within_MSA,Destination_Current_Residence_Movers_within_MSA_MOE,Destination_Current_Residence_Movers_different_MSA,Destination_Current_Residence_Movers_different_MSA_MOE,Destination_Current_Residence_elsewhere_PR,Destination_Current_Residence_elsewhere_PR_MOE,Destination_Current_Residence_Movers_abroad,Destination_Current_Residence_Movers_abroad_MOE,Destination_total_population_equiv,Destination_exposure_pop_total,Destination_pct_pop_exposed,Destination_loss_pop_total,Destination_pct_pop_loss,Destination_building_equiv,Destination_exposure_bldg_total,Destination_pct_bldg_exposed,Destination_loss_bldg_total,Destination_pct_bldg_loss,Destination_loss_per_capita_pe,Destination_loss_per_capita_bldg,Destination_climate_exposure_total,Destination_climate_loss_total,Destination_Unemployment_Rate_2015,Destination_Unemployment_Rate_2016,Destination_Unemployment_Rate_2017,Destination_Unemployment_Rate_2018,Destination_Unemployment_Rate_2019,Destination_Unemployment_Rate_2020,Destination_Unemployment_Rate_2021,Destination_Unemployment_Rate_2022,Destination_Unemployment_Rate_2023,Destination_Unemployment_Rate_2024,Destination_Unemployed_2016_2020_sum,Destination_LaborForce_2016_2020_sum,Destination_avg_unemployment_rate_16_20,Destination_HousingPI25_2016_2020_mean
0,10740,10180,"Albuquerque, NM Metro Area","Abilene, TX Metro Area",New Mexico,Texas,West,South,55.0,65.0,10740.0,"Albuquerque, NM",New Mexico,0.0,0.0,112315.94,9955888.73,133893.13,0.0,0.0,20347.17,82295.84,1825047.91,10992794.85,258283.25,842327.18,0.0,0.0,4069.0,83697.75,0.0,0.0,68.0,25121903.7,1189266.59,0.0,0.0,21002.44,125352.47,22934.21,2343843.58,79559.72,929170.25,0.0,0.0,3745743.64,22878.85,0.0,0.0,613518999001.89,10631724800000.0,10615856000000.0,0.0,0.0,10615856000000.0,625397958844.71,10615856000000.0,598693739196.26,10615856000000.0,10615856000000.0,0.0,0.0,800635572225.69,10615855994705.76,0.0,0.0,10052201230.4,153788997000.0,153791290418.0,0.0,0.0,153791290418.0,9917802828.2,153791290418.0,9833463095.09,153791290418.0,153791290418.0,0.0,0.0,12559649899.62,153791290264.68,153791290418.0,915160.0,9389.52,32109.0,36067.0,-3958.0,68176.0,2198.86,2203.05,3126.02,3099.07,"Albuquerque, NM",10740.0,3.5,3.5,3.2,3.4,3.3,3.3,3.3,3.3,3.2,3.1,3.0,3.0,3.1,3.2,3.3,3.4,3.4,3.5,3.4,3.3,3.3,3.4,3.3,3.3,3.4,3.8,4.1,4.3,4.1,3.8,3.9,3.6,3.6,3.7,3.7,3.7,3.8,3.9,3.8,4.0,4.2,4.8,5.2,5.2,5.3,423411.0,434627.0,437006.0,437532.0,442237.0,432465.0,441823.0,447518.0,457244.0,464045.0,397057.0,408349.0,412634.0,417401.0,421821.0,399796.0,413016.0,431011.0,441611.0,446147.0,26354.0,26278.0,24372.0,20131.0,20416.0,32669.0,28807.0,16507.0,15633.0,17898.0,"Albuquerque, NM Metro Area",907037.0,870.0,784690.0,4088.0,85970.0,3607.0,25175.0,1692.0,6934.0,1058.0,4268.0,930.0,10615856000000.0,76965107063974.31,725.0,24310960.75,0.0,2614451937106.0,1118899856407.98,42.8,33601723.44,0.0,26.56,36.72,78084006920382.3,57912684.2,6.22,6.05,5.58,4.6,4.62,7.55,6.52,3.69,3.42,3.86,123866.0,2183867.0,5.67,3.94,10180.0,"Abilene, TX",Texas,0.0,0.0,485441.03,19906.67,483001.07,1375110.42,2751.96,118521.39,40474.26,246081.19,799068.31,333215.29,2730724.55,0.0,0.0,60137.66,957269.67,0.0,0.0,5501.59,67120.4,8539877.17,702.6,188083.44,63924.93,10175.72,30008.07,3730847.39,1285781.74,1894376.57,0.0,0.0,5298566.05,29422.02,0.0,0.0,2046680800000.0,2048316400000.0,2046680800000.0,2046680800000.0,2046680800000.0,2046411449150.22,24676263430.46,2046680800000.0,315200650271.38,2046680800000.0,2046680800000.0,0.0,0.0,309913109617.89,2046680800000.0,0.0,0.0,31702035918.0,31701499000.0,31702035918.0,31702035918.0,31702035918.0,31699663206.96,464553290.05,31702035918.0,4136462362.0,31702035918.0,31702035918.0,0.0,0.0,4755645584.11,31702035918.0,31702035918.0,176438.0,2785.55,14074.0,12714.0,1360.0,26788.0,1184.83,1675.58,2061.05,2043.93,"Abilene, TX",10180.0,3.1,2.9,2.9,2.9,2.7,2.6,2.6,2.4,2.2,2.1,2.0,2.1,2.2,2.2,2.2,2.1,2.1,2.1,2.1,2.1,2.1,2.2,2.3,2.3,2.4,2.4,2.5,2.5,2.5,2.6,2.7,2.9,2.9,2.9,3.0,3.3,3.2,3.3,3.3,3.4,3.6,3.8,4.2,3.9,4.0,74433.0,75307.0,76199.0,77324.0,79031.0,78666.0,80760.0,82450.0,84633.0,86805.0,71545.0,72353.0,73380.0,74757.0,76665.0,74302.0,77106.0,79593.0,81692.0,83807.0,2888.0,2954.0,2819.0,2567.0,2366.0,4364.0,3654.0,2857.0,2941.0,2998.0,"Abilene, TX Metro Area",169276.0,377.0,135412.0,2025.0,18575.0,1619.0,10581.0,1135.0,3493.0,500.0,1215.0,439.0,2046680800000.0,21117964272469.95,1031.82,7651703.48,0.0,538934610606.0,326374110787.12,60.56,21144387.7,0.01,43.37,119.84,21444338383257.07,28796091.18,3.88,3.92,3.7,3.32,2.99,5.55,4.52,3.47,3.48,3.45,15070.0,386527.0,3.9,3.36
1,11100,10180,"Amarillo, TX Metro Area","Abilene, TX Metro Area",Texas,Texas,South,South,461.0,292.0,11100.0,"Amarillo, TX",Texas,0.0,0.0,1855952.23,122633.6,551586.21,66167.57,0.0,132395.49,48243.73,1268905.21,2680480.23,4322168.72,21177154.98,0.0,0.0,867259.05,3938399.33,0.0,0.0,19416.43,482568.6,19306476.47,42.06,0.0,23542.1,14910.65,71123.89,733373.72,4620226.3,9936425.73,0.0,0.0,9085237.09,761450.09,0.0,0.0,3115896674300.8,3116815600000.0,3115899200000.0,1653834376520.52,0.0,3115760017738.79,50657491889.15,3115899200000.0,41781363175.32,3115899200000.0,3115899200000.0,0.0,0.0,298892586018.8,3115896674300.8,0.0,0.0,44339625215.39,44338866000.0,44339643350.0,22024243518.71,0.0,44338748939.19,1481500515.59,44339643350.0,682899602.02,44339643350.0,44339643350.0,0.0,0.0,4606412233.85,44339625215.39,44339643350.0,268612.0,5236.11,11968.0,15075.0,-3107.0,27043.0,1097.63,1413.38,1833.75,1744.25,"Amarillo, TX",11100.0,2.6,2.6,2.3,2.5,2.6,2.5,2.5,2.5,2.3,2.1,2.1,2.1,2.0,2.1,2.1,2.2,2.3,2.3,2.3,2.3,2.3,2.4,2.5,2.5,2.6,2.7,2.8,2.7,2.8,2.7,2.7,2.7,2.8,2.8,2.8,2.8,2.9,2.9,3.0,3.0,3.2,3.4,3.5,3.5,3.5,129736.0,127554.0,127035.0,127270.0,127866.0,127438.0,131239.0,132267.0,134694.0,137915.0,125699.0,123437.0,123090.0,123647.0,124486.0,121002.0,125974.0,128132.0,130472.0,133592.0,4037.0,4117.0,3945.0,3623.0,3380.0,6436.0,5265.0,4135.0,4222.0,4323.0,"Amarillo, TX Metro Area",261187.0,418.0,216613.0,2317.0,31869.0,2021.0,7261.0,902.0,4707.0,663.0,737.0,289.0,3115899200000.0,26973131583944.18,865.66,37031346.34,0.0,753773936950.0,383510494640.14,50.88,45054793.14,0.01,137.86,167.73,27356642078584.31,82086139.48,3.11,3.23,3.11,2.85,2.64,5.05,4.01,3.13,3.13,3.13,21501.0,637163.0,3.37,3.0,10180.0,"Abilene, TX",Texas,0.0,0.0,485441.03,19906.67,483001.07,1375110.42,2751.96,118521.39,40474.26,246081.19,799068.31,333215.29,2730724.55,0.0,0.0,60137.66,957269.67,0.0,0.0,5501.59,67120.4,8539877.17,702.6,188083.44,63924.93,10175.72,30008.07,3730847.39,1285781.74,1894376.57,0.0,0.0,5298566.05,29422.02,0.0,0.0,2046680800000.0,2048316400000.0,2046680800000.0,2046680800000.0,2046680800000.0,2046411449150.22,24676263430.46,2046680800000.0,315200650271.38,2046680800000.0,2046680800000.0,0.0,0.0,309913109617.89,2046680800000.0,0.0,0.0,31702035918.0,31701499000.0,31702035918.0,31702035918.0,31702035918.0,31699663206.96,464553290.05,31702035918.0,4136462362.0,31702035918.0,31702035918.0,0.0,0.0,4755645584.11,31702035918.0,31702035918.0,176438.0,2785.55,14074.0,12714.0,1360.0,26788.0,1184.83,1675.58,2061.05,2043.93,"Abilene, TX",10180.0,3.1,2.9,2.9,2.9,2.7,2.6,2.6,2.4,2.2,2.1,2.0,2.1,2.2,2.2,2.2,2.1,2.1,2.1,2.1,2.1,2.1,2.2,2.3,2.3,2.4,2.4,2.5,2.5,2.5,2.6,2.7,2.9,2.9,2.9,3.0,3.3,3.2,3.3,3.3,3.4,3.6,3.8,4.2,3.9,4.0,74433.0,75307.0,76199.0,77324.0,79031.0,78666.0,80760.0,82450.0,84633.0,86805.0,71545.0,72353.0,73380.0,74757.0,76665.0,74302.0,77106.0,79593.0,81692.0,83807.0,2888.0,2954.0,2819.0,2567.0,2366.0,4364.0,3654.0,2857.0,2941.0,2998.0,"Abilene, TX Metro Area",169276.0,377.0,135412.0,2025.0,18575.0,1619.0,10581.0,1135.0,3493.0,500.0,1215.0,439.0,2046680800000.0,21117964272469.95,1031.82,7651703.48,0.0,538934610606.0,326374110787.12,60.56,21144387.7,0.01,43.37,119.84,21444338383257.07,28796091.18,3.88,3.92,3.7,3.32,2.99,5.55,4.52,3.47,3.48,3.45,15070.0,386527.0,3.9,3.36
2,11260,10180,"Anchorage, AK Metro Area","Abilene, TX Metro Area",Alaska,Texas,West,South,0.0,,11260.0,"Anchorage, AK",Alaska,3200886.29,2089.28,0.0,20806480.13,8332.63,0.0,0.0,34265.08,0.0,0.0,106635.32,0.0,82.87,0.0,54.96,154193.15,37689.08,4837.56,6596.12,0.0,81576922.3,12405.48,0.0,0.0,1138.99,0.0,0.0,885405.2,0.0,160.58,0.0,5751.85,2185518.37,112.87,116000000.0,3950999435.1,0.0,4620604800000.0,4616626000000.0,0.0,0.0,4616626000000.0,0.0,0.0,36142313379.65,4616626000000.0,4616626000000.0,0.0,938132838.94,840846454603.18,4616626000000.0,2000000.0,114320551.54,1281731.0,93413706000.0,93414728170.0,0.0,0.0,93414728170.0,0.0,0.0,1056583863.03,93414728170.0,93414728170.0,0.0,265442142.42,17146775130.56,93414728170.0,93414728170.0,397985.0,27486.92,20047.0,27239.0,-7192.0,47286.0,1608.94,1910.89,2560.95,2433.24,"Anchorage, AK",11260.0,3.1,3.3,3.7,3.6,3.7,3.6,3.6,3.1,3.2,2.7,2.6,2.7,2.8,2.8,2.9,3.0,3.1,3.1,3.1,3.2,3.3,3.5,3.7,3.8,4.1,4.5,4.7,4.5,4.3,4.2,4.2,4.3,4.2,4.2,4.2,4.2,4.3,4.2,4.3,4.1,4.3,4.3,4.4,4.5,4.5,202013.0,201178.0,201000.0,197370.0,195123.0,193818.0,196930.0,198072.0,199608.0,201117.0,190991.0,189200.0,188707.0,186347.0,185264.0,178084.0,184810.0,190342.0,191974.0,192744.0,11022.0,11978.0,12293.0,11023.0,9859.0,15734.0,12120.0,7730.0,7634.0,8373.0,"Anchorage, AK Metro Area",394199.0,455.0,323583.0,3200.0,47678.0,2718.0,13375.0,1248.0,6672.0,1022.0,2891.0,610.0,4616626000000.0,28585728700256.86,619.19,24350708.8,0.0,1588050378890.0,579073750268.55,36.46,84678849.31,0.01,61.18,212.77,29164802450525.41,109029558.11,5.46,5.95,6.12,5.58,5.05,8.12,6.15,3.9,3.82,4.16,60887.0,988489.0,6.16,4.24,10180.0,"Abilene, TX",Texas,0.0,0.0,485441.03,19906.67,483001.07,1375110.42,2751.96,118521.39,40474.26,246081.19,799068.31,333215.29,2730724.55,0.0,0.0,60137.66,957269.67,0.0,0.0,5501.59,67120.4,8539877.17,702.6,188083.44,63924.93,10175.72,30008.07,3730847.39,1285781.74,1894376.57,0.0,0.0,5298566.05,29422.02,0.0,0.0,2046680800000.0,2048316400000.0,2046680800000.0,2046680800000.0,2046680800000.0,2046411449150.22,24676263430.46,2046680800000.0,315200650271.38,2046680800000.0,2046680800000.0,0.0,0.0,309913109617.89,2046680800000.0,0.0,0.0,31702035918.0,31701499000.0,31702035918.0,31702035918.0,31702035918.0,31699663206.96,464553290.05,31702035918.0,4136462362.0,31702035918.0,31702035918.0,0.0,0.0,4755645584.11,31702035918.0,31702035918.0,176438.0,2785.55,14074.0,12714.0,1360.0,26788.0,1184.83,1675.58,2061.05,2043.93,"Abilene, TX",10180.0,3.1,2.9,2.9,2.9,2.7,2.6,2.6,2.4,2.2,2.1,2.0,2.1,2.2,2.2,2.2,2.1,2.1,2.1,2.1,2.1,2.1,2.2,2.3,2.3,2.4,2.4,2.5,2.5,2.5,2.6,2.7,2.9,2.9,2.9,3.0,3.3,3.2,3.3,3.3,3.4,3.6,3.8,4.2,3.9,4.0,74433.0,75307.0,76199.0,77324.0,79031.0,78666.0,80760.0,82450.0,84633.0,86805.0,71545.0,72353.0,73380.0,74757.0,76665.0,74302.0,77106.0,79593.0,81692.0,83807.0,2888.0,2954.0,2819.0,2567.0,2366.0,4364.0,3654.0,2857.0,2941.0,2998.0,"Abilene, TX Metro Area",169276.0,377.0,135412.0,2025.0,18575.0,1619.0,10581.0,1135.0,3493.0,500.0,1215.0,439.0,2046680800000.0,21117964272469.95,1031.82,7651703.48,0.0,538934610606.0,326374110787.12,60.56,21144387.7,0.01,43.37,119.84,21444338383257.07,28796091.18,3.88,3.92,3.7,3.32,2.99,5.55,4.52,3.47,3.48,3.45,15070.0,386527.0,3.9,3.36
3,12060,10180,"Atlanta-Sandy Springs-Alpharetta, GA Metro Area","Abilene, TX Metro Area",Georgia,Texas,South,South,14.0,20.0,12060.0,"Atlanta-Sandy Springs-Roswell, GA",Georgia,0.0,0.0,868460.24,10043727.15,5776473.22,1071012.18,342628.8,1107559.04,567110.62,16806162.49,14869148.78,6043721.3,81152701.21,0.0,0.0,67642.9,6806115.95,0.0,0.0,6915.3,42317360.2,48572900.07,4959.97,27801644.76,4116309.51,1830953.63,5894411.46,13431007.02,3919925.99,112998214.27,0.0,0.0,1083605.44,268482.54,0.0,0.0,5434841982989.7,70815714800000.0,70747402400000.0,8959189403045.31,70457341517855.14,70728513899753.16,25569404411610.82,70747402400000.0,982334959033.56,70747402400000.0,70747402400000.0,0.0,0.0,7975577326007.15,70747334836199.03,0.0,0.0,84581767645.07,1139024359000.0,1139038916046.0,131344234164.34,1135109508583.86,1138794634359.04,420236903267.72,1139038916046.0,14830243297.59,1139038916046.0,1139038916046.0,0.0,0.0,112550624460.83,1139037766984.64,1139038916046.0,6098914.0,9023.6,204029.0,205499.0,-1470.0,409528.0,6832.39,5610.04,8902.2,8778.43,"Atlanta-Sandy Springs-Roswell, GA",12060.0,2.6,2.6,2.6,2.5,2.4,2.4,2.4,2.4,2.4,2.4,2.5,2.4,2.4,2.4,2.4,2.3,2.3,2.3,2.4,2.4,2.5,2.6,2.8,2.9,2.9,3.0,2.9,2.8,2.5,2.1,2.0,1.8,1.8,2.4,2.7,2.8,2.9,3.0,3.2,3.3,3.5,4.0,4.4,4.3,4.3,2823186.0,2941622.0,3062806.0,3084021.0,3112308.0,3073330.0,3131956.0,3195607.0,3267132.0,3306439.0,2660071.0,2788801.0,2922392.0,2965739.0,3005468.0,2863398.0,3009697.0,3098454.0,3161919.0,3192562.0,163115.0,152821.0,140414.0,118282.0,106840.0,209932.0,122259.0,97153.0,105213.0,113877.0,"Atlanta-Sandy Springs-Alpharetta, GA Metro Area",5879040.0,2524.0,5033299.0,12226.0,605283.0,11114.0,179899.0,6042.0,24130.0,1954.0,36429.0,2784.0,70747402400000.0,614659862736493.8,868.81,145522463.88,0.0,19363661572782.0,9871665705947.08,50.98,262246690.14,0.0,23.86,43.0,624531528442440.9,407769154.02,5.78,5.2,4.58,3.84,3.43,6.83,3.9,3.04,3.22,3.44,728289.0,15274087.0,4.77,3.18,10180.0,"Abilene, TX",Texas,0.0,0.0,485441.03,19906.67,483001.07,1375110.42,2751.96,118521.39,40474.26,246081.19,799068.31,333215.29,2730724.55,0.0,0.0,60137.66,957269.67,0.0,0.0,5501.59,67120.4,8539877.17,702.6,188083.44,63924.93,10175.72,30008.07,3730847.39,1285781.74,1894376.57,0.0,0.0,5298566.05,29422.02,0.0,0.0,2046680800000.0,2048316400000.0,2046680800000.0,2046680800000.0,2046680800000.0,2046411449150.22,24676263430.46,2046680800000.0,315200650271.38,2046680800000.0,2046680800000.0,0.0,0.0,309913109617.89,2046680800000.0,0.0,0.0,31702035918.0,31701499000.0,31702035918.0,31702035918.0,31702035918.0,31699663206.96,464553290.05,31702035918.0,4136462362.0,31702035918.0,31702035918.0,0.0,0.0,4755645584.11,31702035918.0,31702035918.0,176438.0,2785.55,14074.0,12714.0,1360.0,26788.0,1184.83,1675.58,2061.05,2043.93,"Abilene, TX",10180.0,3.1,2.9,2.9,2.9,2.7,2.6,2.6,2.4,2.2,2.1,2.0,2.1,2.2,2.2,2.2,2.1,2.1,2.1,2.1,2.1,2.1,2.2,2.3,2.3,2.4,2.4,2.5,2.5,2.5,2.6,2.7,2.9,2.9,2.9,3.0,3.3,3.2,3.3,3.3,3.4,3.6,3.8,4.2,3.9,4.0,74433.0,75307.0,76199.0,77324.0,79031.0,78666.0,80760.0,82450.0,84633.0,86805.0,71545.0,72353.0,73380.0,74757.0,76665.0,74302.0,77106.0,79593.0,81692.0,83807.0,2888.0,2954.0,2819.0,2567.0,2366.0,4364.0,3654.0,2857.0,2941.0,2998.0,"Abilene, TX Metro Area",169276.0,377.0,135412.0,2025.0,18575.0,1619.0,10581.0,1135.0,3493.0,500.0,1215.0,439.0,2046680800000.0,21117964272469.95,1031.82,7651703.48,0.0,538934610606.0,326374110787.12,60.56,21144387.7,0.01,43.37,119.84,21444338383257.07,28796091.18,3.88,3.92,3.7,3.32,2.99,5.55,4.52,3.47,3.48,3.45,15070.0,386527.0,3.9,3.36
4,12220,10180,"Auburn-Opelika, AL Metro Area","Abilene, TX Metro Area",Alabama,Texas,South,South,16.0,20.0,12220.0,"Auburn-Opelika, AL",Alabama,0.0,0.0,0.0,176828.27,4535.42,1737426.35,100995.97,23141.52,34800.0,678591.79,350086.21,520151.16,1782363.86,0.0,0.0,3828.75,14007.06,0.0,0.0,0.0,756426.8,136278.03,6608.81,4668498.17,5074.35,109500.0,55871.8,313868.43,186984.33,1485170.28,0.0,0.0,38483.04,2998.4,0.0,0.0,0.0,2247766800000.0,2242999200000.0,2242999118472.42,2242600162460.86,2242998404092.42,520928758301.1,2242999200000.0,38868757691.72,2242999200000.0,2242999200000.0,0.0,0.0,169848506199.29,2242999118472.42,0.0,0.0,0.0,32706095000.0,32706692269.0,32706692151.38,32703066803.21,32706681499.02,8982175776.66,32706692269.0,560120004.8,32706692269.0,32706692269.0,0.0,0.0,2132375643.64,32706692151.38,32706692269.0,193362.0,1241.47,14713.0,11265.0,3448.0,25978.0,1376.92,1150.84,1779.62,1808.65,"Auburn-Opelika, AL",12220.0,3.5,3.3,3.1,3.0,2.9,2.9,2.9,2.9,2.8,2.7,2.6,2.6,2.6,2.6,2.6,2.6,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.8,2.8,3.6,4.1,4.1,4.1,3.8,3.7,3.7,3.6,3.8,3.5,3.6,3.6,3.6,3.4,2.7,2.8,2.7,2.7,79383.0,85911.0,87449.0,89011.0,90459.0,89567.0,89998.0,91963.0,94379.0,96616.0,74986.0,81461.0,83884.0,85777.0,87801.0,84331.0,87362.0,89808.0,92228.0,93749.0,4397.0,4450.0,3565.0,3234.0,2658.0,5236.0,2636.0,2155.0,2151.0,2867.0,"Auburn-Opelika, AL Metro Area",161867.0,330.0,133837.0,2064.0,12191.0,1424.0,12618.0,1279.0,2095.0,538.0,1126.0,381.0,2242999200000.0,20921006425690.22,932.72,5426756.36,0.0,556013768573.0,306030668106.07,55.04,7765762.44,0.0,28.07,40.16,21227037093796.29,13192518.8,5.54,5.18,4.08,3.63,2.94,5.85,2.93,2.34,2.28,2.97,19143.0,442397.0,4.33,3.54,10180.0,"Abilene, TX",Texas,0.0,0.0,485441.03,19906.67,483001.07,1375110.42,2751.96,118521.39,40474.26,246081.19,799068.31,333215.29,2730724.55,0.0,0.0,60137.66,957269.67,0.0,0.0,5501.59,67120.4,8539877.17,702.6,188083.44,63924.93,10175.72,30008.07,3730847.39,1285781.74,1894376.57,0.0,0.0,5298566.05,29422.02,0.0,0.0,2046680800000.0,2048316400000.0,2046680800000.0,2046680800000.0,2046680800000.0,2046411449150.22,24676263430.46,2046680800000.0,315200650271.38,2046680800000.0,2046680800000.0,0.0,0.0,309913109617.89,2046680800000.0,0.0,0.0,31702035918.0,31701499000.0,31702035918.0,31702035918.0,31702035918.0,31699663206.96,464553290.05,31702035918.0,4136462362.0,31702035918.0,31702035918.0,0.0,0.0,4755645584.11,31702035918.0,31702035918.0,176438.0,2785.55,14074.0,12714.0,1360.0,26788.0,1184.83,1675.58,2061.05,2043.93,"Abilene, TX",10180.0,3.1,2.9,2.9,2.9,2.7,2.6,2.6,2.4,2.2,2.1,2.0,2.1,2.2,2.2,2.2,2.1,2.1,2.1,2.1,2.1,2.1,2.2,2.3,2.3,2.4,2.4,2.5,2.5,2.5,2.6,2.7,2.9,2.9,2.9,3.0,3.3,3.2,3.3,3.3,3.4,3.6,3.8,4.2,3.9,4.0,74433.0,75307.0,76199.0,77324.0,79031.0,78666.0,80760.0,82450.0,84633.0,86805.0,71545.0,72353.0,73380.0,74757.0,76665.0,74302.0,77106.0,79593.0,81692.0,83807.0,2888.0,2954.0,2819.0,2567.0,2366.0,4364.0,3654.0,2857.0,2941.0,2998.0,"Abilene, TX Metro Area",169276.0,377.0,135412.0,2025.0,18575.0,1619.0,10581.0,1135.0,3493.0,500.0,1215.0,439.0,2046680800000.0,21117964272469.95,1031.82,7651703.48,0.0,538934610606.0,326374110787.12,60.56,21144387.7,0.01,43.37,119.84,21444338383257.07,28796091.18,3.88,3.92,3.7,3.32,2.99,5.55,4.52,3.47,3.48,3.45,15070.0,386527.0,3.9,3.36


# Step 0. Preprocessing

In [6]:
flow_data['Origin_loss_ratio'] = flow_data['Origin_climate_loss_total'] / flow_data['Origin_climate_exposure_total']
flow_data['Destination_loss_ratio'] = flow_data['Destination_climate_loss_total'] / flow_data['Destination_climate_exposure_total']

# 1: Within, 0: Between 
flow_data["within_region"] = (flow_data["Origin_Region"] == flow_data["Destination_Region"]).astype(int)

flow_data["Flow_rate"] = 100 * (flow_data["Flow"] / flow_data["Origin_Current_Residence_Population"] )

# Step1. Select Variables

In [7]:
## Select variables:
# y
flow_col      = "Flow"  # OD migration count

# x1
dep_risk_col  = "Origin_loss_ratio"
arr_risk_col  = "Destination_loss_ratio"   # e.g., Destination_loss_bldg_total or *_EALB / *_EALPE

# x2
dep_hpi_col   = "Origin_HousingPI25_2016_2020_mean"
arr_hpi_col   = "Destination_HousingPI25_2016_2020_mean"

# x3
dep_ue_col    = "Origin_avg_unemployment_rate_16_20"
arr_ue_col    = "Destination_avg_unemployment_rate_16_20"

# x4
move_type = "within_region"

# x5
origin_region = "Origin_Region"
destination_region = "Destination_Region"


train_cols = [flow_col, origin_region, destination_region, dep_risk_col, arr_risk_col, dep_hpi_col, arr_hpi_col, dep_ue_col, arr_ue_col, move_type]

In [8]:
flow_data[train_cols].sample(3)

Unnamed: 0,Flow,Origin_Region,Destination_Region,Origin_loss_ratio,Destination_loss_ratio,Origin_HousingPI25_2016_2020_mean,Destination_HousingPI25_2016_2020_mean,Origin_avg_unemployment_rate_16_20,Destination_avg_unemployment_rate_16_20,within_region
39165,3.5,Midwest,Midwest,0.0,0.0,3.42,2.38,3.76,6.35,1
4153,141.0,South,South,0.0,0.0,4.56,3.54,3.82,4.33,1
70871,718.0,West,West,0.0,0.0,3.94,4.42,2.65,3.38,1


# Step 2. Delta

#### Delta features for Regression
- dRisk      : risk(dest) - risk(origin); standardized to interpret per-σ effect
- dHousing   : P/I(dest) - P/I(origin);
- dUnemp     : UnempRate(dest) - UnempRate(origin)

In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

df = flow_data.copy()

# x1 - dRisk
df["dRisk"] = (df[arr_risk_col].astype(float) - df[dep_risk_col].astype(float)) 

# x2 - dHousing
df["dHousing"] = df[arr_hpi_col].astype(float) - df[dep_hpi_col].astype(float)

# x3 - dUnemp
df["dUnemp"]  = df[arr_ue_col].astype(float) - df[dep_ue_col].astype(float)

# Eliminate NaN value
need = [flow_col, "dRisk", "dHousing", "dUnemp"]

print(len(df['Origin_Code'].unique()), len(df['Destination_Code'].unique()))

df = df.dropna(subset=need).copy()
print(len(df['Origin_Code'].unique()), len(df['Destination_Code'].unique()))


# Standardization (z-score)
df["dRisk_z"] = scaler.fit_transform(df[["dRisk"]])


# in case consisder population size:
# log size variable:
df["log_pop_origin"] = np.log(df["Origin_Current_Residence_Population"] + 1)
df["log_pop_dest"]   = np.log(df["Destination_Current_Residence_Population"] + 1)

392 392
367 367


## z-dRisk Variable check

In [10]:
# dRisk basic statistics
mean_val = df["dRisk"].mean()
std_val  = df["dRisk"].std()
min_val  = df["dRisk"].min()
max_val  = df["dRisk"].max()

sigma_range = (max_val - min_val) / std_val

print("📊 dRisk Statistics")
print(f"mean: {mean_val:.6f}")
print(f"standard deviation: {std_val:.6f}")
print(f"min: {min_val:.6f}")
print(f"max: {max_val:.6f}")
print(f"min~max, total std: {sigma_range:.2f} σ")


📊 dRisk Statistics
mean: 0.000000
standard deviation: 0.000003
min: -0.000011
max: 0.000011
min~max, total std: 7.45 σ


In [11]:
df[['dRisk','dRisk_z']].sample(10)

Unnamed: 0,dRisk,dRisk_z
10505,-0.0,-2.1
60538,-0.0,-0.42
67726,-0.0,-1.68
2802,0.0,0.01
25358,-0.0,-0.72
80232,0.0,0.17
6892,-0.0,-2.03
63295,0.0,0.04
76480,-0.0,-0.59
12405,-0.0,-0.07


## Step 3. Possible Model Comparison

### 3.1. Set Formula

In [12]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.discrete.count_model import ZeroInflatedPoisson, ZeroInflatedNegativeBinomialP
import pandas as pd

# model 1
formula_base = f"""{flow_col} ~ dRisk_z 
                           + dHousing
                           + dUnemp
                           """

# model 2
formula_move_type = f"""{flow_col} ~ dRisk_z * within_region
                           + dHousing * within_region
                           + dUnemp * within_region
                           """

# model 3
formula_including_origin = f"""{flow_col} ~ dRisk_z * within_region * C(Origin_Region)
                           + dHousing * within_region * C(Origin_Region)
                           + dUnemp * within_region * C(Origin_Region)
                           """

# model 4
formula_origin_destination = f"""{flow_col} ~ (dRisk_z + dHousing + dUnemp)
        * C(Origin_Region)
        * C(Destination_Region)"""


### 3.2 Formula selection

In [13]:
# formula Selection
formula = formula_origin_destination

### 3.3 Model Comparison

#### Choose among: Poisson / Negative Binomial / Linear Regression 

In [14]:
mean_y = df[flow_col].mean()
var_y = df[flow_col].var()
print("Variance-to-mean ratio:", var_y / mean_y)

Variance-to-mean ratio: 3137.7564597742125


In [15]:
results = {}

# 1) Poisson
results["Poisson"] = smf.glm(formula=formula, data=df, family=sm.families.Poisson()).fit()

# 2) Negative Binomial
results["NegBin"] = smf.glm(formula=formula, data=df, family=sm.families.NegativeBinomial()).fit()

# 3) Linear (Gaussian)
results["Linear"] = smf.glm(formula=formula, data=df, family=sm.families.Gaussian()).fit()


# comparison
comp = pd.DataFrame({
    "Model": list(results.keys()),
    "AIC": [res.aic for res in results.values()],
    "LogLik": [res.llf for res in results.values()]
}).sort_values("AIC")

# Best model select
best_model_name = comp.iloc[0]["Model"]

print("📊 Model Comparison (lower AIC better):")
print(comp)

print(f"\n✅ Best model selected (by AIC): {best_model_name}")


📊 Model Comparison (lower AIC better):
     Model           AIC         LogLik
1   NegBin    890,079.95    -444,975.98
2   Linear  1,274,311.56    -637,091.78
0  Poisson 29,541,377.72 -14,770,624.86

✅ Best model selected (by AIC): NegBin


## Step 4. Run the Best Model

In [16]:
best_res = results[best_model_name]

# --- Pring Result ---
print(best_res.summary())
if best_model_name in ["Poisson", "NegBin"]:
    coef = best_res.params
    se = best_res.bse
    pval = best_res.pvalues
    irr = np.exp(coef)

    result_table = pd.DataFrame({
        "coef": coef,
        "IRR": irr,
        "SE": se,
        "pval": pval
    })
    print("\n📑 Regression Results (coef, IRR, SE, p-value):")
    print(print(result_table.to_string()))
else:
    coef = best_res.params
    se = best_res.bse
    pval = best_res.pvalues

    result_table = pd.DataFrame({
        "coef": coef,
        "SE": se,
        "pval": pval
    })
    print("\n Regression Results (Gaussian family):")
    print(print(result_table.to_string()))


                 Generalized Linear Model Regression Results                  
Dep. Variable:                   Flow   No. Observations:                81906
Model:                            GLM   Df Residuals:                    81842
Model Family:        NegativeBinomial   Df Model:                           63
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -4.4498e+05
Date:                Sat, 25 Oct 2025   Deviance:                   3.3205e+05
Time:                        19:49:43   Pearson chi2:                 8.89e+05
No. Iterations:                    23   Pseudo R-squ. (CS):             0.4067
Covariance Type:            nonrobust                                         
                                                                                coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------

## Step 5. Add more interpretability

Interpretability Module: IRR (Incident Rate Ratio) Breakdown
------------------------------------------------------------

Purpose:
- Convert regression coefficients (log-scale) into interpretable IRRs (exp(coef))
- Decompose results by Origin/Destination region and Within/Cross-region movement type
- Enable region- and flow-specific interpretation of marginal effects

Functions:
1. irr_by_origin_within(var)
   - Computes IRR for each combination of Origin Region × Within/Cross-region flow
   - Adjusts for interaction terms (Origin_Region × within_region)
   - Useful when model includes both Origin and within-region interactions

2. irr_by_within(var)
   - Computes IRR aggregated only by Within vs Cross-region movement
   - Simpler summary when no region-specific interactions are modeled

3. irr_by_origin_dest(var, coef)
   - Computes IRR for all Origin × Destination region pairs
   - Incorporates Origin, Destination, and their interaction effects
   - Produces a matrix-like IRR summary (OD interpretation)

Outputs:
- 'pivot' tables summarizing IRRs by flow type or region combination
- Facilitates interpretation of how ΔRisk, ΔHousing, and ΔUnemp affect migration
  across different regional patterns (e.g., Cross vs Within, Midwest vs South)

Interpretation:
- IRR > 1: positive association (↑ migration flow)
- IRR < 1: negative association (↓ migration flow)
- Region and within/cross distinctions clarify heterogeneity in response to Δ variables

In [17]:
def irr_by_origin_within(var):
    """Compute IRR for each Origin x Within-region combination for the given variable"""
    data = []
    for origin in ["Midwest", "Northeast", "South", "West"]:
        for within in [0, 1]:
            c = coef[var]
            if origin != "Midwest":
                c += coef.get(f"{var}:C(Origin_Region)[T.{origin}]", 0)
            if within == 1:
                c += coef.get(f"{var}:within_region", 0)
                if origin != "Midwest":
                    c += coef.get(f"{var}:within_region:C(Origin_Region)[T.{origin}]", 0)
            data.append({
                "Origin": origin,
                "within_region": "Within" if within else "Cross",
                "Variable": var,
                "IRR": np.exp(c)
            })
    return pd.DataFrame(data)

# --- helper: Compute IRR by Cross vs Within region ---
def irr_by_within(var):
    """Compute IRR by within-region status"""
    data = []
    for within in [0, 1]:
        c = coef[var]
        if within == 1:
            c += coef.get(f"{var}:within_region", 0)
        data.append({
            "within_region": "Within" if within else "Cross",
            "Variable": var,
            "IRR": np.exp(c)
        })
    return pd.DataFrame(data)

def irr_by_origin_dest(var, coef):
    """Compute IRR for each Origin × Destination region combination for the given variable"""
    data = []
    origins = ["Midwest", "Northeast", "South", "West"]
    dests   = ["Midwest", "Northeast", "South", "West"]
    
    for o in origins:
        for d in dests:
            c = coef.get(var, 0)
            # 1. Origin effect
            if o != "Midwest":
                c += coef.get(f"{var}:C(Origin_Region)[T.{o}]", 0)
            # 2. Destination effect
            if d != "Midwest":
                c += coef.get(f"{var}:C(Destination_Region)[T.{d}]", 0)
            # 3. Origin × Destination interaction
            if (o != "Midwest") and (d != "Midwest"):
                c += coef.get(f"{var}:C(Origin_Region)[T.{o}]:C(Destination_Region)[T.{d}]", 0)
            data.append({
                "Origin": o,
                "Destination": d,
                var: np.exp(c)
            })
    return pd.DataFrame(data)

# interpretation - friednly reshaping
if formula == formula_including_origin:
    print(1)
    coef = result_table["coef"]
    
    summary = pd.concat([
        irr_by_origin_within("dRisk_z"),
        irr_by_origin_within("dHousing"),
        irr_by_origin_within("dUnemp")
    ])
    
    # pivot for better readability
    pivot = summary.pivot(index=["Origin","within_region"], columns="Variable", values="IRR").round(2)

elif formula == formula_move_type:  
    # --- interpretation-friendly reshaping ---
    coef = result_table["coef"]
    
    summary = pd.concat([
        irr_by_within("dRisk_z"),
        irr_by_within("dHousing"),
        irr_by_within("dUnemp")
    ])
    # pivot for better readability
    pivot = summary.pivot(index="within_region", columns="Variable", values="IRR").round(2)
    pivot = pivot.reset_index()
    
    print("\n📊 Interpretation-friendly IRR summary (Cross vs Within):")
    print(pivot)
    
elif formula == formula_origin_destination: 
    # --- executiin ---
    coef = result_table["coef"]
    
    irr_risk    = irr_by_origin_dest("dRisk_z", coef)
    irr_housing = irr_by_origin_dest("dHousing", coef)
    irr_unemp   = irr_by_origin_dest("dUnemp", coef)
    
    pivot = irr_risk.merge(irr_housing, on=["Origin","Destination"]).merge(irr_unemp, on=["Origin","Destination"])
    pivot = pivot.round(2)
    
    print("\n📊 Interpretation-friendly OD IRR summary:")


📊 Interpretation-friendly OD IRR summary:


## Step 6. Regression Result

In [18]:
print(formula)

Flow ~ (dRisk_z + dHousing + dUnemp)
        * C(Origin_Region)
        * C(Destination_Region)


In [19]:
pivot

Unnamed: 0,Origin,Destination,dRisk_z,dHousing,dUnemp
0,Midwest,Midwest,0.93,0.94,0.9
1,Midwest,Northeast,1.77,1.09,0.87
2,Midwest,South,1.27,0.79,0.7
3,Midwest,West,1.16,0.96,0.86
4,Northeast,Midwest,0.2,0.83,1.1
5,Northeast,Northeast,0.97,0.88,0.94
6,Northeast,South,1.22,0.73,0.76
7,Northeast,West,2.01,0.86,0.81
8,South,Midwest,0.91,1.2,1.31
9,South,Northeast,1.11,1.15,1.09


## Next Step: Shapley Value calculation