In [1]:
import pandas as pd
import numpy as np

from scipy.stats import zscore

import geopandas as gpd

import psycopg2
from fiona.crs import from_epsg

from netrc import netrc
user, acct, passwd = netrc().authenticators("harris")

import matplotlib.pyplot as plt

import json

%matplotlib inline

In [2]:
geo_tracts = gpd.read_postgis("SELECT state::bigint * 1000000000 + county * 1000000 + tract geoid, "
                              "ST_Transform(geom, 3528) geom "
                              "FROM census_tracts_2015 "
                              "WHERE state = 17 AND (county = 31 OR county = 43);",
                              con = psycopg2.connect(database = "census", user = user, password = passwd,
                                                     host = "saxon.harris.uchicago.edu", port = 5432),
                              index_col = "geoid", geom_col = "geom", crs = from_epsg(3528))

chicago = gpd.read_file("chicago.geojson").to_crs(epsg = 3528).unary_union
chi_tracts = sorted(list(geo_tracts[geo_tracts.centroid.within(chicago)].index))

In [3]:
traces = pd.read_csv("c_adjacencies.csv", names = ["A", "B", "link"])

mtraces = traces.pivot(index = "A", columns = "B", values = "link").fillna(0).sort_index()
mtraces = mtraces.loc[chi_tracts].dropna()
mtraces = mtraces[list(mtraces.index)]
mtraces = mtraces.div(mtraces.sum(axis=0), axis=1)

mtraces.index   = mtraces.index % 1000000
mtraces.columns = mtraces.columns % 1000000

In [4]:
for A in mtraces.index:
    
    place = mtraces.loc[A].reset_index().rename(columns = {"B" : "GEOID", A : "link"})

    place["loglink"] = np.log(place.link)
    place.replace([np.inf, -np.inf], np.nan, inplace = True)
    place.dropna(inplace = True)
    
    place["zlink"] = zscore(place.loglink)

    place = place[["GEOID", "loglink", "zlink"]].set_index("GEOID")

    with open("traces/" + str(A) + ".json", "w") as out: out.write(place.to_json())



In [5]:
# print(sorted(list(mtraces.index)))