In [1]:
import pandas as pd
import geopandas
import pickle
db_dir = '/Users/chizhang/Desktop/didi/data/didi_cd'
tile_id_column = 'tile_ID'
tile_geometry = 'geometry'
oa_id_column = 'GEOID'
oa_geometry = 'geometry'
flow_origin_column = 'geoid_o'
flow_destination_column = 'geoid_d'
flow_flows_column = 'pop_flows'
from ast import literal_eval

In [2]:
print('Generating the processed files - it may take a while....')
print('Reading tessellation....')
try: 
    tessellation = geopandas.read_file(db_dir+'/tessellation.shp', dtype={tile_id_column:str})
except:
    tessellation = geopandas.read_file(db_dir+'/tessellation.geojson', dtype={tile_id_column:str})
tessellation = tessellation[[tile_id_column, tile_geometry]]
print('Reading output areas....')
try: 
    output_areas = geopandas.read_file(db_dir+'/output_areas.shp', dtype={oa_id_column:str})
except:
    output_areas = geopandas.read_file(db_dir+'/output_areas.geojson', dtype={oa_id_column:str})
output_areas = output_areas[[oa_id_column, oa_geometry]]
print('Reading features....')
try:
    features = pd.read_csv(db_dir+'/features.csv')
    if not oa_id_column in list(features.columns):
        raise ValueError('Features must be associated with an output area. Please add a column '++' to features.csv')
except:
    features = None
    print('Running without features. features.csv not found....')
    
print('Mapping output areas with tessellation....')    
output_areas['centroid'] = output_areas[oa_geometry].centroid
# prepare and write  oa_gdf.csv.gz
output_areas["area_km2"] = output_areas[oa_geometry].area/ 10**6
output_areas['x'] = output_areas['centroid'].x
output_areas['y'] = output_areas['centroid'].y
output_areas['ctrs'] = '[' + output_areas['x'].astype(str) + ',' + output_areas['y'].astype(str) + ']' 

temp_out = output_areas[[oa_id_column, 'ctrs','area_km2']]
temp_out.rename(columns={oa_id_column:'geo_code', 'ctrs':'centroid'},inplace=True)

temp_out.to_csv(db_dir+'/procession/oa_gdf.csv.gz')

oa2centroid = {}
for i,row in temp_out.iterrows():
    row['centroid'] = literal_eval(row['centroid'])
    oa2centroid[str(row['geo_code'])] = row['centroid']
    
with open(db_dir+'/procession/oa2centroid.pkl', 'wb') as handle:
    pickle.dump(oa2centroid, handle)

output_areas.drop(columns=[oa_geometry], inplace=True)
output_areas.rename(columns={'centroid':oa_geometry},inplace=True)

mapping = geopandas.sjoin(output_areas, tessellation, how="inner", op="within")
try:
    mapping.drop(columns=['index_right'],inplace=True)
except:
    pass

flows = pd.read_csv(db_dir+'/flows.csv', dtype={flow_origin_column:str, flow_destination_column:str, flow_flows_column:int})
flows = flows[[flow_origin_column, flow_destination_column, flow_flows_column]]

flows.rename(columns={flow_origin_column:'residence', flow_destination_column:'workplace', flow_flows_column:'commuters'},inplace=True)
flows.to_csv(db_dir+'/procession/flows_oa.csv.zip')

od2flow = {}
for i,row in flows.iterrows():
    od2flow[(row['residence'],row['workplace'])] = row['commuters']
    
with open(db_dir+'/procession/od2flow.pkl', 'wb') as handle:
    pickle.dump(od2flow, handle)

features = pd.read_csv(db_dir+'/features.csv', dtype={oa_id_column:str})

oa2features = {}
for i,row in features.iterrows():
    oa2features[row[1]]=list(row[2:].values)
with open(db_dir+'/procession/oa2features.pkl', 'wb') as handle:
    pickle.dump(oa2features, handle)

Generating the processed files - it may take a while....
Reading tessellation....
Reading output areas....
Reading features....
Mapping output areas with tessellation....



  output_areas['centroid'] = output_areas[oa_geometry].centroid

  output_areas["area_km2"] = output_areas[oa_geometry].area/ 10**6
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_out.rename(columns={oa_id_column:'geo_code', 'ctrs':'centroid'},inplace=True)
  if await self.run_code(code, result, async_=asy):


In [3]:
mapping

Unnamed: 0,GEOID,geometry,area_km2,x,y,ctrs,tile_ID
0,0,POINT (104.20210 30.67063),3.473192e-11,104.202099,30.670626,"[104.20209939461299,30.670626000618658]",169
9,9,POINT (104.18750 30.66250),6.944444e-11,104.187500,30.662500,"[104.18749999999896,30.662499999999568]",169
20,20,POINT (104.17083 30.64583),6.944444e-11,104.170833,30.645833,"[104.17083333333234,30.645833333332977]",169
36,36,POINT (104.17917 30.67917),6.944444e-11,104.179167,30.679167,"[104.17916666666568,30.679166666666333]",169
52,52,POINT (104.19583 30.69583),6.944444e-11,104.195833,30.695833,"[104.19583333333227,30.695833333332953]",169
...,...,...,...,...,...,...,...
581,581,POINT (103.95417 30.75417),6.944444e-11,103.954167,30.754167,"[103.95416666666566,30.754166666666265]",128
605,605,POINT (103.97083 30.71250),6.944444e-11,103.970833,30.712500,"[103.97083333333228,30.712499999999523]",128
687,687,POINT (103.96250 30.75417),6.944444e-11,103.962500,30.754167,"[103.96249999999903,30.754166666666265]",128
691,691,POINT (103.97083 30.72917),6.944444e-11,103.970833,30.729167,"[103.97083333333227,30.729166666666316]",128


In [51]:
tileid2oa2handmade_features = dict()
for i,row in mapping.iterrows():
    if row[tile_id_column] not in tileid2oa2handmade_features:
        tileid2oa2handmade_features[row[tile_id_column]] = dict()
        tileid2oa2handmade_features[row[tile_id_column]][row[oa_id_column]]=dict()
    else:
        tileid2oa2handmade_features[row[tile_id_column]][row[oa_id_column]]=dict()

In [52]:
tileid2oa2handmade_features

{'169': {0: {},
  9: {},
  20: {},
  36: {},
  52: {},
  82: {},
  124: {},
  128: {},
  135: {},
  154: {},
  188: {},
  190: {},
  201: {},
  240: {},
  249: {},
  250: {},
  253: {},
  264: {},
  266: {},
  289: {},
  290: {},
  302: {},
  325: {},
  343: {},
  380: {},
  385: {},
  397: {},
  406: {},
  413: {},
  415: {},
  421: {},
  438: {},
  449: {},
  454: {},
  462: {},
  474: {},
  479: {},
  513: {},
  519: {},
  526: {},
  529: {},
  530: {},
  539: {},
  567: {},
  571: {},
  585: {},
  613: {},
  622: {},
  625: {},
  631: {},
  660: {},
  662: {},
  683: {},
  732: {},
  735: {}},
 '157': {1: {},
  8: {},
  10: {},
  13: {},
  14: {},
  28: {},
  29: {},
  42: {},
  43: {},
  59: {},
  70: {},
  78: {},
  85: {},
  88: {},
  97: {},
  101: {},
  119: {},
  121: {},
  127: {},
  145: {},
  146: {},
  150: {},
  160: {},
  166: {},
  183: {},
  185: {},
  194: {},
  211: {},
  214: {},
  224: {},
  225: {},
  226: {},
  235: {},
  237: {},
  242: {},
  260: {},
  297: {}

In [12]:
oa2features

{'0': [3488, 5023, 8.50044202, 24.48, 2.88, 11.05227999],
 '1': [536, 772, 3.726960986, 5.76, 2.88, 11.9834046],
 '2': [1024, 1475, 6.506792709, 11.52, 10.08, 13.62580569],
 '3': [1486, 2140, 16.50708118, 34.56, 264.96, 9.213950356],
 '4': [2319, 3339, 5.091070684, 4.32, 7.2, 13.23396983],
 '5': [904, 1302, 2.176419975, 1.44, 1.44, 11.08884779],
 '6': [7961, 11464, 9.48177991, 27.36, 427.68, 16.93943868],
 '7': [7960, 1566, 19.11932394, 71.01691453, 271.0839563, 25.03574041],
 '8': [1607, 2314, 8.227274078, 21.6, 30.24, 11.7819944],
 '9': [2170, 3125, 2.660940368, 14.4, 17.28, 11.4895207],
 '10': [9659, 13909, 12.95360221, 36.0, 99.36, 13.65802679],
 '11': [9799, 14111, 4.164187242, 7.2, 0.0, 11.44560631],
 '12': [1268, 1826, 0.69264973, 0.0, 17.28, 9.319336222],
 '13': [23262, 33497, 10.75716318, 28.8, 1081.44, 12.94628237],
 '14': [8210, 11822, 15.7065033, 51.84, 48.96, 21.18743407],
 '15': [1936, 2788, 3.476765835, 1.44, 8.64, 12.16883066],
 '16': [10989, 15824, 9.763084547, 21.6, 2

In [6]:
features

Unnamed: 0.1,Unnamed: 0,GEOID,0,1,2,3,4,5
0,0,0,3488,5023,8.500442,24.48,2.88,11.052280
1,1,1,536,772,3.726961,5.76,2.88,11.983405
2,2,2,1024,1475,6.506793,11.52,10.08,13.625806
3,3,3,1486,2140,16.507081,34.56,264.96,9.213950
4,4,4,2319,3339,5.091071,4.32,7.20,13.233970
...,...,...,...,...,...,...,...,...
746,746,746,602,867,9.124699,31.68,18.72,11.796066
747,747,747,6537,9413,8.847477,30.24,25.92,25.567458
748,748,748,569,819,7.061171,28.80,21.60,12.002166
749,749,749,2730,3931,7.732232,21.60,48.96,10.534842


In [7]:
features = features[['GEOID', '0', '1', '2', '3', '4', '5']]

In [8]:
features

Unnamed: 0,GEOID,0,1,2,3,4,5
0,0,3488,5023,8.500442,24.48,2.88,11.052280
1,1,536,772,3.726961,5.76,2.88,11.983405
2,2,1024,1475,6.506793,11.52,10.08,13.625806
3,3,1486,2140,16.507081,34.56,264.96,9.213950
4,4,2319,3339,5.091071,4.32,7.20,13.233970
...,...,...,...,...,...,...,...
746,746,602,867,9.124699,31.68,18.72,11.796066
747,747,6537,9413,8.847477,30.24,25.92,25.567458
748,748,569,819,7.061171,28.80,21.60,12.002166
749,749,2730,3931,7.732232,21.60,48.96,10.534842


In [33]:
for i, row in features.iterrows():
    print(i, '--\n', row)

0 --
 GEOID           0
0            3488
1            5023
2        8.500442
3           24.48
4            2.88
5        11.05228
Name: 0, dtype: object
1 --
 GEOID            1
0              536
1              772
2         3.726961
3             5.76
4             2.88
5        11.983405
Name: 1, dtype: object
2 --
 GEOID            2
0             1024
1             1475
2         6.506793
3            11.52
4            10.08
5        13.625806
Name: 2, dtype: object
3 --
 GEOID            3
0             1486
1             2140
2        16.507081
3            34.56
4           264.96
5          9.21395
Name: 3, dtype: object
4 --
 GEOID           4
0            2319
1            3339
2        5.091071
3            4.32
4             7.2
5        13.23397
Name: 4, dtype: object
5 --
 GEOID            5
0              904
1             1302
2          2.17642
3             1.44
4             1.44
5        11.088848
Name: 5, dtype: object
6 --
 GEOID            6
0             796

In [56]:
for i,row in features.iterrows():
    for item in zip(list(row.keys()),row.values):
        for j, rows in mapping.iterrows():
            tileid2oa2handmade_features[rows[tile_id_column]][rows['GEOID']][item[0]] = [item[1]]

In [57]:
tileid2oa2handmade_features

{'169': {0: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  9: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  20: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  36: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  52: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  82: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  124: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],
   '2': [17.67913702],
   '3': [36.0],
   '4': [4.32],
   '5': [11.2462076]},
  128: {'GEOID': ['750'],
   '0': [771],
   '1': [1110],

In [55]:
for i , row in mapping.iterrows():
    print(i, row[tile_id_column], row['GEOID'])

0 169 0
9 169 9
20 169 20
36 169 36
52 169 52
82 169 82
124 169 124
128 169 128
135 169 135
154 169 154
188 169 188
190 169 190
201 169 201
240 169 240
249 169 249
250 169 250
253 169 253
264 169 264
266 169 266
289 169 289
290 169 290
302 169 302
325 169 325
343 169 343
380 169 380
385 169 385
397 169 397
406 169 406
413 169 413
415 169 415
421 169 421
438 169 438
449 169 449
454 169 454
462 169 462
474 169 474
479 169 479
513 169 513
519 169 519
526 169 526
529 169 529
530 169 530
539 169 539
567 169 567
571 169 571
585 169 585
613 169 613
622 169 622
625 169 625
631 169 631
660 169 660
662 169 662
683 169 683
732 169 732
735 169 735
1 157 1
8 157 8
10 157 10
13 157 13
14 157 14
28 157 28
29 157 29
42 157 42
43 157 43
59 157 59
70 157 70
78 157 78
85 157 85
88 157 88
97 157 97
101 157 101
119 157 119
121 157 121
127 157 127
145 157 145
146 157 146
150 157 150
160 157 160
166 157 166
183 157 183
185 157 185
194 157 194
211 157 211
214 157 214
224 157 224
225 157 225
226 157 226
235 15

In [45]:
for i, row in mapping.iterrows():
    print(row[tile_id_column])

169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
169
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
157
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158
158


In [43]:
tile_id_column

AttributeError: 'str' object has no attribute 'value'

In [41]:
tileid2oa2handmade_features.keys()

dict_keys(['169', '157', '158', '168', '144', '142', '170', '156', '127', '143', '126', '128'])

In [28]:
for i,row in features.iterrows():
    print(row.keys())

Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='object')
Index(['GEOID', '0', '1', '2', '3', '4', '5'], dtype='o

In [13]:
tileid2oa2handmade_features

{'169': {0: {},
  9: {},
  20: {},
  36: {},
  52: {},
  82: {},
  124: {},
  128: {},
  135: {},
  154: {},
  188: {},
  190: {},
  201: {},
  240: {},
  249: {},
  250: {},
  253: {},
  264: {},
  266: {},
  289: {},
  290: {},
  302: {},
  325: {},
  343: {},
  380: {},
  385: {},
  397: {},
  406: {},
  413: {},
  415: {},
  421: {},
  438: {},
  449: {},
  454: {},
  462: {},
  474: {},
  479: {},
  513: {},
  519: {},
  526: {},
  529: {},
  530: {},
  539: {},
  567: {},
  571: {},
  585: {},
  613: {},
  622: {},
  625: {},
  631: {},
  660: {},
  662: {},
  683: {},
  732: {},
  735: {}},
 '157': {1: {},
  8: {},
  10: {},
  13: {},
  14: {},
  28: {},
  29: {},
  42: {},
  43: {},
  59: {},
  70: {},
  78: {},
  85: {},
  88: {},
  97: {},
  101: {},
  119: {},
  121: {},
  127: {},
  145: {},
  146: {},
  150: {},
  160: {},
  166: {},
  183: {},
  185: {},
  194: {},
  211: {},
  214: {},
  224: {},
  225: {},
  226: {},
  235: {},
  237: {},
  242: {},
  260: {},
  297: {}

In [20]:
import json
with open('tileid2oa2handmade_features.json', 'w') as f:
    json.dump(tileid2oa2handmade_features, f)

In [None]:
json.

In [37]:
import json
with open('/Users/chizhang/Desktop/didi/data/didi_cd/processed/tileid2oa2handmade_features.json', 'w') as f:
    tileid2oa2handmade_features = json.load(f)

UnsupportedOperation: not readable

In [6]:
import json
with open('/Users/chizhang/Desktop/didi/data/didi_cd/processed/tileid2oa2handmade_features.json') as f:
    tileid2s = json.load(f)

In [7]:
tileid2s

{'13896': {'0': {'0': [3488],
   '1': [5023],
   '2': [8.50044202],
   '3': [24.48],
   '4': [2.88],
   '5': [11.05227999]},
  '735': {'0': [2732],
   '1': [3934],
   '2': [2.521167659],
   '3': [4.32],
   '4': [10.08],
   '5': [12.28818924]}},
 '13379': {'1': {'0': [536],
   '1': [772],
   '2': [3.726960986],
   '3': [5.76],
   '4': [2.88],
   '5': [11.9834046]}},
 '13279': {'2': {'0': [1024],
   '1': [1475],
   '2': [6.506792709],
   '3': [11.52],
   '4': [10.08],
   '5': [13.62580569]}},
 '13184': {'3': {'0': [1486],
   '1': [2140],
   '2': [16.50708118],
   '3': [34.56],
   '4': [264.96],
   '5': [9.213950356]}},
 '13648': {'4': {'0': [2319],
   '1': [3339],
   '2': [5.091070684],
   '3': [4.32],
   '4': [7.2],
   '5': [13.23396983]},
  '110': {'0': [3153],
   '1': [4540],
   '2': [8.577651416],
   '3': [18.72],
   '4': [2.88],
   '5': [13.85357979]}},
 '11657': {'5': {'0': [904],
   '1': [1302],
   '2': [2.176419975],
   '3': [1.44],
   '4': [1.44],
   '5': [11.08884779]},
  '566'

In [8]:
tileid2s.keys()

dict_keys(['13896', '13379', '13279', '13184', '13648', '11657', '11392', '12318', '13478', '13817', '12963', '12116', '11654', '12752', '13166', '13903', '12759', '12766', '12745', '11037', '13655', '13734', '11883', '10917', '11888', '13810', '11638', '13470', '12544', '12751', '11408', '11411', '10794', '11873', '13057', '11871', '13742', '11879', '13577', '10802', '12105', '13750', '13272', '13477', '13575', '12322', '12637', '11402', '11648', '13078', '11531', '12660', '13899', '10805', '12003', '13159', '11765', '11518', '12327', '13370', '12439', '13826', '11391', '11998', '12096', '13074', '13054', '12436', '11526', '12955', '12749', '13653', '11052', '11640', '12948', '11523', '11163', '10919', '12542', '12429', '11752', '13733', '13657', '10798', '11290', '13473', '12861', '11400', '12965', '12526', '12336', '12970', '11522', '12969', '13283', '11151', '11275', '13275', '11870', '12338', '10925', '12958', '12007', '11277', '12009', '11519', '11520', '11170', '13162', '11763',

In [10]:
for i in range(20021):
    if str(i) not in list(tileid2s.keys()):
        tileid2s[str(i)] = dict()

In [11]:
tileid2s

{'13896': {'0': {'0': [3488],
   '1': [5023],
   '2': [8.50044202],
   '3': [24.48],
   '4': [2.88],
   '5': [11.05227999]},
  '735': {'0': [2732],
   '1': [3934],
   '2': [2.521167659],
   '3': [4.32],
   '4': [10.08],
   '5': [12.28818924]}},
 '13379': {'1': {'0': [536],
   '1': [772],
   '2': [3.726960986],
   '3': [5.76],
   '4': [2.88],
   '5': [11.9834046]}},
 '13279': {'2': {'0': [1024],
   '1': [1475],
   '2': [6.506792709],
   '3': [11.52],
   '4': [10.08],
   '5': [13.62580569]}},
 '13184': {'3': {'0': [1486],
   '1': [2140],
   '2': [16.50708118],
   '3': [34.56],
   '4': [264.96],
   '5': [9.213950356]}},
 '13648': {'4': {'0': [2319],
   '1': [3339],
   '2': [5.091070684],
   '3': [4.32],
   '4': [7.2],
   '5': [13.23396983]},
  '110': {'0': [3153],
   '1': [4540],
   '2': [8.577651416],
   '3': [18.72],
   '4': [2.88],
   '5': [13.85357979]}},
 '11657': {'5': {'0': [904],
   '1': [1302],
   '2': [2.176419975],
   '3': [1.44],
   '4': [1.44],
   '5': [11.08884779]},
  '566'

In [60]:
import json
with open('tileid2oa2handmade_features2.json', 'w') as f:
    json.dump(tileid2oa2handmade_features, f)