In [1]:
import geopandas
import pickle
import pandas as pd
db = '/Users/chizhang/Desktop/didi/data/didi_cd'
tile_id_column = 'tile_ID'
tile_geometry = 'geometry'
oa_id_column = 'GEOID'
oa_geometry = 'geometry'
flow_origin_column = 'geoid_o'
flow_destination_column = 'geoid_d'
flow_flows_column = 'pop_flows'

In [2]:
try:
    tessellation = geopandas.read_file(db + '/tessellation.shp', dtype={tile_id_column: str})
except:
    tessellation = geopandas.read_file(db + '/tessellation.geojson', dtype={tile_id_column: str})
tessellation = tessellation[[tile_id_column, tile_geometry]]
print('Reading output areas....')
try:
    output_areas = geopandas.read_file(db + '/output_areas.shp', dtype={oa_id_column: str})
except:
    output_areas = geopandas.read_file(db + '/output_areas.geojson', dtype={oa_id_column: str})
output_areas = output_areas[[oa_id_column, oa_geometry]]
print('Reading features....')
try:
    features = pd.read_csv(db + '/features.csv')
    # 确定让features里面的主键geo_id和output_areas里的主键geo_id一一对应
    if not oa_id_column in list(features.columns):
        raise ValueError(
            'Features must be associated with an output area. Please add a column ' + ' to features.csv')
except:
    features = None
    print('Running without features. features.csv not found....')

print('Mapping output areas with tessellation....')
output_areas['centroid'] = output_areas[oa_geometry].centroid
# prepare and write  oa_gdf.csv.gz
output_areas["area_km2"] = output_areas[oa_geometry].area / 10 ** 6
output_areas['x'] = output_areas['centroid'].x
output_areas['y'] = output_areas['centroid'].y
output_areas['ctrs'] = '[' + output_areas['x'].astype(str) + ',' + output_areas['y'].astype(str) + ']'

temp_out = output_areas[[oa_id_column, 'ctrs', 'area_km2']]
temp_out.rename(columns={oa_id_column: 'geo_code', 'ctrs': 'centroid'}, inplace=True)

temp_out.to_csv(db + '/processed/oa_gdf.csv.gz')

oa2centroid = {}
for i, row in temp_out.iterrows():
    oa2centroid[row['geo_code']] = row['centroid']

with open(db + '/processed/oa2centroid.pkl', 'wb') as handle:
    pickle.dump(oa2centroid, handle)

output_areas.drop(columns=[oa_geometry], inplace=True)
output_areas.rename(columns={'centroid': oa_geometry}, inplace=True)

mapping = geopandas.sjoin(output_areas, tessellation, how="inner", op="within")
try:
    mapping.drop(columns=['index_right'], inplace=True)
except:
    pass

Reading output areas....
Reading features....
Mapping output areas with tessellation....



  output_areas['centroid'] = output_areas[oa_geometry].centroid

  output_areas["area_km2"] = output_areas[oa_geometry].area / 10 ** 6
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_out.rename(columns={oa_id_column: 'geo_code', 'ctrs': 'centroid'}, inplace=True)
  if await self.run_code(code, result, async_=asy):


In [5]:
db_dir = db
flows = pd.read_csv(db_dir+'/flows.csv', dtype={flow_origin_column:str, flow_destination_column:str, flow_flows_column:int})
flows = flows[[flow_origin_column, flow_destination_column, flow_flows_column]]

flows.rename(columns={flow_origin_column:'residence', flow_destination_column:'workplace', flow_flows_column:'commuters'},inplace=True)
flows.to_csv(db_dir+'/processed/flows_oa.csv.zip')

od2flow = {}
for i,row in flows.iterrows():
    od2flow[(row['residence'],row['workplace'])] = row['commuters']
    
with open(db_dir+'/processed/od2flow.pkl', 'wb') as handle:
    pickle.dump(od2flow, handle)

features = pd.read_csv(db_dir+'/features.csv', dtype={oa_id_column:str})

In [7]:
features = features[['GEOID', '0','1','2','3','4','5']]

In [8]:
features

Unnamed: 0,GEOID,0,1,2,3,4,5
0,0,3488,5023,8.500442,24.48,2.88,11.052280
1,1,536,772,3.726961,5.76,2.88,11.983405
2,2,1024,1475,6.506793,11.52,10.08,13.625806
3,3,1486,2140,16.507081,34.56,264.96,9.213950
4,4,2319,3339,5.091071,4.32,7.20,13.233970
...,...,...,...,...,...,...,...
746,746,602,867,9.124699,31.68,18.72,11.796066
747,747,6537,9413,8.847477,30.24,25.92,25.567458
748,748,569,819,7.061171,28.80,21.60,12.002166
749,749,2730,3931,7.732232,21.60,48.96,10.534842


In [10]:
oa2features = {}
for i,row in features.iterrows():
    oa2features[row[0]]=list(row[1:].values)
with open(db_dir+'/processed/oa2features.pkl', 'wb') as handle:
    pickle.dump(oa2features, handle)

In [60]:
tileid2oa2handmade_features = dict()
for i, row in mapping.iterrows():
    if row[tile_id_column] not in tileid2oa2handmade_features:
        tileid2oa2handmade_features[row[tile_id_column]] = dict()
        tileid2oa2handmade_features[row[tile_id_column]][row[oa_id_column]] = dict()
    else:
        tileid2oa2handmade_features[row[tile_id_column]][row[oa_id_column]] = dict()

In [11]:
mapping

Unnamed: 0,GEOID,geometry,area_km2,x,y,ctrs,tile_ID
0,0,POINT (104.20210 30.67063),3.473192e-11,104.202099,30.670626,"[104.20209939461299,30.670626000618658]",13896
735,735,POINT (104.19583 30.67083),6.944444e-11,104.195833,30.670833,"[104.19583333333227,30.670833333332897]",13896
1,1,POINT (104.14583 30.70417),6.944444e-11,104.145833,30.704167,"[104.14583333333222,30.7041666666662]",13379
2,2,POINT (104.13750 30.72083),6.944444e-11,104.137500,30.720833,"[104.13749999999897,30.720833333332966]",13279
3,3,POINT (104.12820 30.77652),2.181471e-11,104.128198,30.776521,"[104.1281981791558,30.776521027747595]",13184
...,...,...,...,...,...,...,...
745,745,POINT (104.05417 30.72917),6.944444e-11,104.054167,30.729167,"[104.05416666666571,30.72916666666632]",12334
746,746,POINT (104.16250 30.73750),6.944444e-11,104.162500,30.737500,"[104.16249999999889,30.73749999999958]",13578
747,747,POINT (104.12083 30.62917),6.944444e-11,104.120833,30.629167,"[104.12083333333226,30.629166666666265]",13060
748,748,POINT (104.18750 30.72083),6.944444e-11,104.187500,30.720833,"[104.18749999999895,30.720833333332962]",13825


In [4]:
tileid2oa2handmade_features

{'13896': {0: {}, 735: {}},
 '13379': {1: {}},
 '13279': {2: {}},
 '13184': {3: {}},
 '13648': {4: {}, 110: {}},
 '11657': {5: {}, 566: {}},
 '11392': {6: {}},
 '12318': {7: {}},
 '13478': {8: {}},
 '13817': {9: {}},
 '12963': {10: {}, 183: {}},
 '12116': {11: {}},
 '11654': {12: {}},
 '12752': {13: {}},
 '13166': {14: {}},
 '13903': {15: {}},
 '12759': {16: {}},
 '12766': {17: {}},
 '12745': {18: {}},
 '11037': {19: {}},
 '13655': {20: {}},
 '13734': {21: {}},
 '11883': {22: {}},
 '10917': {23: {}},
 '11888': {24: {}},
 '13810': {25: {}, 322: {}},
 '11638': {26: {}},
 '13470': {27: {}},
 '12544': {28: {}},
 '12751': {29: {}},
 '11408': {30: {}},
 '11411': {31: {}},
 '10794': {32: {}},
 '11873': {33: {}},
 '13057': {34: {}},
 '11871': {35: {}},
 '13742': {36: {}},
 '11879': {37: {}},
 '13577': {38: {}},
 '10802': {39: {}},
 '12105': {40: {}},
 '13750': {41: {}, 553: {}},
 '13272': {42: {}},
 '13477': {43: {}},
 '13575': {44: {}},
 '12322': {45: {}},
 '12637': {46: {}},
 '11402': {47: {

In [17]:
features

Unnamed: 0,GEOID,0,1,2,3,4,5
0,0,3488,5023,8.500442,24.48,2.88,11.052280
1,1,536,772,3.726961,5.76,2.88,11.983405
2,2,1024,1475,6.506793,11.52,10.08,13.625806
3,3,1486,2140,16.507081,34.56,264.96,9.213950
4,4,2319,3339,5.091071,4.32,7.20,13.233970
...,...,...,...,...,...,...,...
746,746,602,867,9.124699,31.68,18.72,11.796066
747,747,6537,9413,8.847477,30.24,25.92,25.567458
748,748,569,819,7.061171,28.80,21.60,12.002166
749,749,2730,3931,7.732232,21.60,48.96,10.534842


In [37]:
type(tileid2oa2handmade_features)
for i, row in features.iterrows():
    for item in zip(list(row.keys()),row.values):
        print(item[0], '---', item[1])

GEOID --- 0
0 --- 3488
1 --- 5023
2 --- 8.50044202
3 --- 24.48
4 --- 2.88
5 --- 11.05227999
GEOID --- 1
0 --- 536
1 --- 772
2 --- 3.726960986
3 --- 5.76
4 --- 2.88
5 --- 11.9834046
GEOID --- 2
0 --- 1024
1 --- 1475
2 --- 6.506792709
3 --- 11.52
4 --- 10.08
5 --- 13.62580569
GEOID --- 3
0 --- 1486
1 --- 2140
2 --- 16.50708118
3 --- 34.56
4 --- 264.96
5 --- 9.213950356
GEOID --- 4
0 --- 2319
1 --- 3339
2 --- 5.091070684
3 --- 4.32
4 --- 7.2
5 --- 13.23396983
GEOID --- 5
0 --- 904
1 --- 1302
2 --- 2.176419975
3 --- 1.44
4 --- 1.44
5 --- 11.08884779
GEOID --- 6
0 --- 7961
1 --- 11464
2 --- 9.48177991
3 --- 27.36
4 --- 427.68
5 --- 16.93943868
GEOID --- 7
0 --- 7960
1 --- 1566
2 --- 19.11932394
3 --- 71.01691453
4 --- 271.0839563
5 --- 25.03574041
GEOID --- 8
0 --- 1607
1 --- 2314
2 --- 8.227274078
3 --- 21.6
4 --- 30.24
5 --- 11.7819944
GEOID --- 9
0 --- 2170
1 --- 3125
2 --- 2.660940368
3 --- 14.4
4 --- 17.28
5 --- 11.4895207
GEOID --- 10
0 --- 9659
1 --- 13909
2 --- 12.95360221
3 --- 36.

In [41]:
tileid2oa2handmade_features['13896']

{0: {}, 735: {}}

In [61]:
for id_col in tileid2oa2handmade_features.keys():
    for i,row in features.iterrows():
        for item in zip(list(row.keys()),row.values):
            try:
                tileid2oa2handmade_features[str(id_col)][i][item[0]]=[item[1]]
            except:
                KeyError

In [62]:
tileid2oa2handmade_features
        

{'13896': {0: {'GEOID': ['0'],
   '0': [3488],
   '1': [5023],
   '2': [8.50044202],
   '3': [24.48],
   '4': [2.88],
   '5': [11.05227999]},
  735: {'GEOID': ['735'],
   '0': [2732],
   '1': [3934],
   '2': [2.521167659],
   '3': [4.32],
   '4': [10.08],
   '5': [12.28818924]}},
 '13379': {1: {'GEOID': ['1'],
   '0': [536],
   '1': [772],
   '2': [3.726960986],
   '3': [5.76],
   '4': [2.88],
   '5': [11.9834046]}},
 '13279': {2: {'GEOID': ['2'],
   '0': [1024],
   '1': [1475],
   '2': [6.506792709],
   '3': [11.52],
   '4': [10.08],
   '5': [13.62580569]}},
 '13184': {3: {'GEOID': ['3'],
   '0': [1486],
   '1': [2140],
   '2': [16.50708118],
   '3': [34.56],
   '4': [264.96],
   '5': [9.213950356]}},
 '13648': {4: {'GEOID': ['4'],
   '0': [2319],
   '1': [3339],
   '2': [5.091070684],
   '3': [4.32],
   '4': [7.2],
   '5': [13.23396983]},
  110: {'GEOID': ['110'],
   '0': [3153],
   '1': [4540],
   '2': [8.577651416],
   '3': [18.72],
   '4': [2.88],
   '5': [13.85357979]}},
 '11657'

In [63]:
import json
with open('tileid2oa2handmade_features.json', 'w') as f:
    json.dump(tileid2oa2handmade_features, f)