In [2]:
import pandas as pd
import geopandas as gpd
from collections import defaultdict, OrderedDict
import json

In [3]:
def shared_boundaries(gdf, id1, id2):
    g1 = gdf[gdf["dauid"] == str(id1)].geometry.iloc[0]
    g2 = gdf[gdf["dauid"] == str(id2)].geometry.iloc[0]
    return g1.length, g2.length, g1.boundary.intersection(g2.boundary).length

def get_boundary_length(gdf, id1):
    g1 = gdf[gdf["dauid"] == str(id1)].geometry.iloc[0]
    return g1.boundary.length

In [4]:
df = pd.read_csv("data/DA Ontario Clean.csv")  # General information (id, population, area...)
df_adj = pd.read_csv("data/DA Ontario Adjacency.csv")  # Pair of adjacent territories
gdf_ontario = gpd.read_file("data/DA Ontario.gpkg")  # GeoDataFrame with the territories poligons

In [5]:
df.head()

Unnamed: 0,DAuid,DApop_2016,DAtdwell_2016,DAurdwell_2016,DAarea,CSDcode
0,35010155,457.0,209.0,189.0,65.4355,50
1,35010156,448.0,223.0,201.0,53.704,50
2,35010157,469.0,214.0,190.0,66.4776,50
3,35010158,492.0,229.0,200.0,39.0021,50
4,35010159,517.0,227.0,213.0,35.9957,5


In [6]:
df_adj.head()

Unnamed: 0,dauid,Neighbor_dauid
0,35061341,35060145
1,35060170,35060145
2,35060148,35060145
3,35060135,35060145
4,35060144,35060145


In [7]:
gdf_ontario.head()

Unnamed: 0,dauid,csduid,DApop_2016,geometry
0,35060145,3506008,647,"MULTIPOLYGON (((7476534.349 1196783.794, 74767..."
1,35060146,3506008,535,"MULTIPOLYGON (((7477527.120 1195489.051, 74776..."
2,35060147,3506008,482,"MULTIPOLYGON (((7477129.526 1195060.197, 74770..."
3,35060148,3506008,444,"MULTIPOLYGON (((7477220.320 1195056.371, 74772..."
4,35060149,3506008,968,"MULTIPOLYGON (((7477774.920 1195083.109, 74778..."


In [8]:
template = json.loads(open("scenario.json", "r").read())

In [9]:
df_adj.head()

Unnamed: 0,dauid,Neighbor_dauid
0,35061341,35060145
1,35060170,35060145
2,35060148,35060145
3,35060135,35060145
4,35060144,35060145


In [10]:
df_adj[(df_adj["dauid"] == 35600266) | (df_adj["Neighbor_dauid"] == 35600266)]

Unnamed: 0,dauid,Neighbor_dauid
124433,35600267,35600266
124434,35600401,35600266
124443,35600266,35600267
125791,35600266,35600401


In [11]:
df_adj.iloc[124433, :]["Neighbor_dauid"] in invalid_dauids

NameError: name 'invalid_dauids' is not defined

Unnamed: 0,DAuid,DApop_2016,DAtdwell_2016,DAurdwell_2016,DAarea,CSDcode
19,35010174,0.0,3.0,0.0,0.0606,7
20,35010175,0.0,8.0,0.0,0.1551,7
21,35010176,0.0,5.0,0.0,0.0237,7
22,35010177,0.0,26.0,0.0,0.1372,7
23,35010178,0.0,12.0,0.0,0.0689,7
...,...,...,...,...,...,...
20105,35600349,0.0,0.0,0.0,0.1584,21
20154,35600398,0.0,0.0,0.0,0.0227,21
20155,35600399,0.0,1.0,0.0,0.0346,21
20156,35600400,0.0,1.0,0.0,0.0241,21


In [30]:
nan_rows = df[df['DApop_2016'].isnull()]
zero_pop_rows = df[df["DApop_2016"] == 0]
invalid_dauids = list(pd.concat([nan_rows, zero_pop_rows])["DAuid"])
len(invalid_dauids), len(df)

(181, 20160)

In [31]:
adj_full = OrderedDict()  # Dictionary with the structure of the json output format

for ind, row in df_adj.iterrows():  # Iterate the different pair of adjacent territories
    if row["dauid"] in invalid_dauids:
        print("Invalid dauid found: ", row["dauid"])
        continue
    elif row["Neighbor_dauid"] in invalid_dauids:
        print("Invalid dauid found: ", row["Neighbor_dauid"])
        continue
    elif str(row["dauid"]) not in adj_full:
        rel_row = df[df["DAuid"] == row["dauid"]].iloc[0, :]
        pop = rel_row["DApop_2016"]
        area = rel_row["DAarea"]

        boundary_len = get_boundary_length(gdf_ontario, row["dauid"])
        state = {"population_density": pop/area, 
                 "age_divided_populations": [0.18, 0.07, 0.27, 0.31, 0.17],
                 "infected": [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                 "recovered": [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}
        adj_full[str(row["dauid"])] = {"cell_id": str(row["dauid"]), "state": state, "neighborhood": []}

    l1, l2, shared = shared_boundaries(gdf_ontario, row["dauid"], row["Neighbor_dauid"])
    correlation = (shared/l1 + shared/l2) / 2  # equation extracted from zhong paper (boundaries only, we don't have roads info for now)
    # correlation = math.e ** (-1/correlation)
    if correlation == 0:
        continue
    adj_full[str(row["dauid"])]["neighborhood"].append({"cell_id": str(row["Neighbor_dauid"]), "vicinity": {"correlation": correlation}})
    if ind % 1000 == 0:
        print(ind, "%.2f%%" % (100*ind/len(df_adj)))

template["cells"] = list(adj_full.values())

0 0.00%
1000 0.79%
2000 1.59%
3000 2.38%
4000 3.17%
5000 3.97%
6000 4.76%
7000 5.55%
8000 6.35%
10000 7.94%
11000 8.73%
12000 9.52%
13000 10.32%
14000 11.11%
15000 11.90%
18000 14.28%
19000 15.08%
20000 15.87%
21000 16.66%
22000 17.46%
23000 18.25%
24000 19.04%
25000 19.84%
26000 20.63%
27000 21.43%
28000 22.22%
29000 23.01%
30000 23.81%
31000 24.60%
32000 25.39%
33000 26.19%
36000 28.57%
37000 29.36%
38000 30.15%
40000 31.74%
41000 32.53%
42000 33.33%
43000 34.12%
44000 34.92%
45000 35.71%
46000 36.50%
47000 37.30%
48000 38.09%
49000 38.88%
50000 39.68%
51000 40.47%
52000 41.26%
53000 42.06%
54000 42.85%
55000 43.64%
56000 44.44%
57000 45.23%
58000 46.02%
59000 46.82%
60000 47.61%
61000 48.41%
62000 49.20%
63000 49.99%
64000 50.79%
65000 51.58%
67000 53.17%
68000 53.96%
70000 55.55%
71000 56.34%
72000 57.13%
73000 57.93%
74000 58.72%
76000 60.31%
77000 61.10%
78000 61.89%
79000 62.69%
80000 63.48%
81000 64.28%
84000 66.66%
85000 67.45%
86000 68.24%
87000 69.04%
88000 69.83%
89000 70.6

KeyboardInterrupt: 

In [None]:
adj_full_json = json.dumps(template, indent=4, sort_keys=False)  # Dictionary to string (with indentation=4 for better formatting)

In [None]:
with open("ontario_cadmium_w6.json", "w") as f:
    f.write(adj_full_json)