### This file not intended for production code; all it does is calibrate the models for the LODES dataset and saves the model objects.

In [None]:
# Merge the dataframes -- DEPRECATED
o_map = dict(attrdf[['name', 'o_attr']].values)
d_map = dict(attrdf[['name', 'd_attr']].values)
p_map = dict(attrdf[['name', 'pop']].values)

flowdf['o_attr'] = flowdf['origin'].map(o_map)
flowdf['d_attr'] = flowdf['dest'].map(d_map)
flowdf['o_pop']  = flowdf['origin'].map(p_map)
flowdf['d_pop']  = flowdf['dest'].map(p_map)

In [None]:
# Create costs via Euclidean distance -- DEPRECATED
coords = attrdf[['lat', 'lon']].values
dists = pdist(coords)
cost_arr = np.zeros((flowdf.shape[0], 1))
names = attrdf['name'].values

for i in trange(flowdf.shape[0]):
    o_name = flowdf['origin'].iloc[i]
    d_name = flowdf['dest'].iloc[i]
    o_idx = np.where(names == o_name)[0][0]
    d_idx = np.where(names == d_name)[0][0]
    cost_arr[i] = dists[attrdf.shape[0] * o_idx + d_idx - ((o_idx + 2) * (o_idx + 1)) // 2]  # use formula from scipy docs

flowdf['cost'] = cost_arr    

## Calibrate models

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from tqdm import trange
from pysal.model.spint import Gravity, Production
from scipy.spatial.distance import pdist, squareform

In [None]:
# Load data (these filenames are where it is locally for me)
attrdf = pd.read_csv('../../../../data/attrs.csv', converters={'name' : str}, index_col=0)
flowdf = pd.read_csv('../../../../data/lodes-flows.csv', converters={'origin' : str, 'dest' : str}, index_col=0)

In [None]:
# Fit unconstrained gravity model to data
flows = flowdf['count'].values.reshape(-1, 1)
origins = flowdf[['o_attr', 'o_pop']].values
destinations = flowdf[['d_attr', 'd_pop']].values
cost = flowdf['cost'].values.reshape(-1, 1)

unconstrained = Gravity(flows, origins, destinations, cost, cost_func='pow').fit()  # better results with pow over exp
unconstrained.pseudoR2

In [None]:
# Fit production-constrained model to data
production = Production(flows, flowdf['origin'].values, destinations, cost, cost_func='pow').fit()
production.pseudoR2

In [None]:
localprod = Production(flows, flowdf['origin'].values, destinations, cost, cost_func='pow').local()

## Plotting

In [None]:
# Load shapefile for mapping
counties = gpd.read_file('../../../../data/tl_2018_us_county.shp')
counties.drop(counties[counties['STATEFP'].isin(['02', '15', '60', '66', '69', '72', '78'])].index, inplace=True)  # drop non CONUS

In [None]:
counties['pseudoR2'] = counties['GEOID'].map(dict(zip(np.unique(flowdf['origin'].values), localprod['pseudoR2'])))
counties.plot('pseudoR2')
plt.show()