In [None]:
import contextily as cx
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from shapely.wkt import loads
import pandas as pd
from tqdm import trange, tqdm
import pickle
import multiprocessing as mp

### Define functions

In [None]:
map_city_new = {
    1: "New York",
    2: "Los Angeles",
    3: "Chicago",
    4: "Houston",
    5: "Phoenix",
    6: "Philadelphia",
    7: "San Antonio",
    8: "San Diego",
    9: "Dallas",
    10: "San Jose",
}

# load mobility data
def load_city_data(city_id):
    city_name = map_city_new[city_id].replace(" ", "_")
    matrix = np.load(f".\data\Mobility\cbg_visit_2018-06_{city_id}.npy")
    np.fill_diagonal(matrix, 0)
    gdf = gpd.read_file(f"cbgs_of_cities/{city_name}_city.shp")
    gdf = gpd.GeoDataFrame(gdf, geometry=gpd.GeoSeries(loads(gdf["Centroid"]))).drop(
        columns=["Centroid", "Shape_Leng", "Shape_Area"]
    )
    gdf = gdf.set_index("CBG_Code")
    gdf.index = gdf.index.astype(int)
    gdf = gdf.to_crs(epsg=3857)
    with open(f".\data\Mobility\id_dict_{city_id}.pkl", "rb") as file:
        id_dict = pickle.load(file)
    return matrix, gdf, id_dict


# plot flow network
def plot_on(city_id, edgelist, edgelist_top, gdf, alpha=0.005, divs=[10, 2]):
    for _ in range(100):
        try:
            fig, axs = plt.subplots(1, 2)
            plt.suptitle(f"{map_city_new[city_id]}, alpha={alpha}, divs={divs}")
            for j, (el, ax) in enumerate(zip([edgelist, edgelist_top], axs)):
                src_x = el.reset_index()["source"].map(gdf["geometry"].x)
                src_y = el.reset_index()["source"].map(gdf["geometry"].y)
                tgt_x = el.reset_index()["target"].map(gdf["geometry"].x)
                tgt_y = el.reset_index()["target"].map(gdf["geometry"].y)

                gdf.plot(alpha=0, ax=ax)
                cx.add_basemap(ax, source=cx.providers.CartoDB.Positron, crs=gdf.crs)

                for i in trange(len(src_x)):
                    c = np.exp(el.values[i])
                    ax.plot(
                        [src_x[i], tgt_x[i]],
                        [src_y[i], tgt_y[i]],
                        # c=cmap(c_norm[i]),
                        c="b",
                        alpha=np.clip(el.values[i] * alpha, 0, 1),
                        linewidth=c / divs[j],
                    )
                    ax.axis("off")
            plt.tight_layout()
            plt.savefig(
                f"{map_city_new[city_id]}_{alpha}_{'__'.join([str(x) for x in divs])}.png",
                dpi=300,
            )
            # print("saving...")
            break
        except Exception as e:
            print(e)
            continue

### Visualize the original flow and 130-most-frequent destination network

In [None]:
# new york as an example
city_id = 1

tops = 130
matrix, gdf, id_dict = load_city_data(city_id)
edgelist = pd.DataFrame(
    matrix, index=list(id_dict.values()), columns=list(id_dict.values())
).stack()
edgelist = edgelist[edgelist > 0]
edgelist.index.names = ["source", "target"]

edgelist_top = edgelist.groupby("source").nlargest(tops).droplevel(1)
edgelist_top = edgelist_top.groupby("source").apply(
    lambda x: (len(x) - np.argsort(np.argsort(x)).droplevel(0)) / len(x)
)
edgelist = edgelist.groupby("source").apply(
    lambda x: (len(x) - np.argsort(np.argsort(x)).droplevel(0)) / len(x)
)

left_div = 11
right_div = 10.5
plot_on(
    edgelist,
    edgelist_top,
    matrix,
    gdf,
    id_dict,
    0.008,
    [left_div, right_div],
)