In [1]:
import pandas as pd
import jismesh.utils as ju
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import geojson
import json
import folium
import pickle as pk

### 原始geojson 数据读取
### 数据源 https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-v2_4.html#prefecture13

### Section 1.  数据预处理

In [7]:
with open("N03-20_200101.geojson") as f:
    gj = geojson.load(f)
df = pd.read_json(json.dumps(gj['features']))
df["geometry"] = df.apply(lambda x: { "type": "Feature","properties": {},"geometry":x['geometry']},axis=1)

#### 1.1 计算每个多边形的中心和点的数目

In [34]:
def get_center_coord_and_point_count(x):
    coords = x['geometry']['coordinates'][0]
    sum_lat, sum_lon = 0.0, 0.0
    point_count = len(coords) -1
    for coord in coords[0:-1]:
        sum_lon+=coord[0]
        sum_lat+=coord[1]
    return (sum_lat/point_count, sum_lon/point_count,point_count)

In [40]:
df['tmp'] = df.apply(lambda x: get_center_coord_and_point_count(x['geometry']),axis=1) 

#### 1.2 提取有效信息

In [41]:
df['pref'] = df.apply(lambda x: x['properties']['N03_001'], axis=1)
df['county_name'] = df.apply(lambda x: x['properties']['N03_003'], axis=1)
df['city_name'] = df.apply(lambda x: x['properties']['N03_004'], axis=1)
df['city_code'] = df.apply(lambda x: x['properties']['N03_007'], axis=1)

#### 1.3 计算唯一名称编码

In [45]:
def get_name_code(x):
    if x['county_name']: 
        if x['county_name'][-1]=="市" or x['city_name']=="泊村":
            return x['pref']+"-"+x['county_name']+x['city_name']
    return  x['pref']+"-"+x['city_name']
df['name_code'] = df.apply(lambda x: get_name_code(x),axis=1)

#### 1.4 填充 所属未定地 的 city_code

In [55]:
def get_prefix_code_of_pref(city_codes):
    for code in city_codes:
        if code !="":
            return int(code[0:2])

def assign_code_to_unknown(pref_name, city_code):
    if city_code!="":
        return city_code
    else:
        return str(-pref2city_code[pref_name]['city_code'])
tmp_df = df.groupby(['pref'])['city_code'].apply(get_prefix_code_of_pref).reset_index()
pref2city_code = tmp_df.set_index('pref').T.to_dict()
df['city_code'] = df.apply(lambda x: assign_code_to_unknown(x['pref'],x['city_code']),axis=1)

In [56]:
df

Unnamed: 0,type,id,geometry,properties,pref,county_name,city_name,city_code,tmp,name_code
0,Feature,1,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '北海道', 'N03_002': '石狩振興局', 'N03_00...",北海道,札幌市,中央区,01101,"(43.03501930360105, 141.28842483878108, 1805)",北海道-札幌市中央区
1,Feature,2,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '北海道', 'N03_002': '石狩振興局', 'N03_00...",北海道,札幌市,北区,01102,"(43.13941717954423, 141.3467463752087, 1799)",北海道-札幌市北区
2,Feature,3,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '北海道', 'N03_002': '石狩振興局', 'N03_00...",北海道,札幌市,東区,01103,"(43.10852312763463, 141.39895381030442, 854)",北海道-札幌市東区
3,Feature,4,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '北海道', 'N03_002': '石狩振興局', 'N03_00...",北海道,札幌市,白石区,01104,"(43.063684942500046, 141.42413809625015, 800)",北海道-札幌市白石区
4,Feature,5,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '北海道', 'N03_002': '石狩振興局', 'N03_00...",北海道,札幌市,豊平区,01105,"(42.989584637347754, 141.38476075575105, 1478)",北海道-札幌市豊平区
...,...,...,...,...,...,...,...,...,...,...
118894,Feature,118895,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '沖縄県', 'N03_002': '', 'N03_003': '...",沖縄県,八重山郡,与那国町,47382,"(24.461987055555554, 123.04561844444444, 18)",沖縄県-与那国町
118895,Feature,118896,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '沖縄県', 'N03_002': '', 'N03_003': '...",沖縄県,八重山郡,与那国町,47382,"(24.462934066666666, 123.04352853333334, 15)",沖縄県-与那国町
118896,Feature,118897,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '沖縄県', 'N03_002': '', 'N03_003': '...",沖縄県,八重山郡,与那国町,47382,"(24.463521636363634, 123.0432830909091, 11)",沖縄県-与那国町
118897,Feature,118898,"{'type': 'Feature', 'properties': {}, 'geometr...","{'N03_001': '沖縄県', 'N03_002': '', 'N03_003': '...",沖縄県,八重山郡,与那国町,47382,"(24.4605611, 123.0433505, 10)",沖縄県-与那国町


### Section 2. 计算代表的中心点

In [93]:
def get_representative_center(tmps):
    best = sorted(tmps, key=lambda tup: tup[2])[-1]
    return best[0], best[1]
city_center_df = df.groupby('city_code')['tmp'].apply(get_representative_center).reset_index()

In [94]:
city_center_df['lat_c'] = city_center_df.apply(lambda x: x['tmp'][0],axis=1)
city_center_df['lon_c'] = city_center_df.apply(lambda x: x['tmp'][1],axis=1)
city_center_df = city_center_df[['city_code','lat_c',"lon_c"]]
city_center_df

Unnamed: 0,city_code,lat_c,lon_c
0,-12,35.670829,139.965640
1,-13,30.486176,140.299082
2,-23,35.002576,136.810843
3,-30,33.606212,135.961814
4,-40,33.827809,131.008663
...,...,...,...
1904,47361,26.350004,126.759016
1905,47362,26.119702,127.734803
1906,47375,24.655470,124.698142
1907,47381,24.330345,123.780024


### Section 3. 合并每个城市所有的 polygon

In [84]:
def get_city_border(small_regions):
    return [{"type": "FeatureCollection","features": small_regions.to_list()}]
city_border_df = df.groupby('city_code')['geometry'].apply(get_city_border).reset_index()

In [85]:
city_border_df

Unnamed: 0,city_code,geometry
0,-12,"[{'type': 'FeatureCollection', 'features': [{'..."
1,-13,"[{'type': 'FeatureCollection', 'features': [{'..."
2,-23,"[{'type': 'FeatureCollection', 'features': [{'..."
3,-30,"[{'type': 'FeatureCollection', 'features': [{'..."
4,-40,"[{'type': 'FeatureCollection', 'features': [{'..."
...,...,...
1904,47361,"[{'type': 'FeatureCollection', 'features': [{'..."
1905,47362,"[{'type': 'FeatureCollection', 'features': [{'..."
1906,47375,"[{'type': 'FeatureCollection', 'features': [{'..."
1907,47381,"[{'type': 'FeatureCollection', 'features': [{'..."


In [117]:
res_df = pd.merge(city_center_df, city_border_df, how='left', on=['city_code'])
res_df = res_df[res_df['city_code']!="-30"].reset_index()
res_df = res_df[['city_code','lat_c',"lon_c",'geometry']]
res_df

Unnamed: 0,city_code,lat_c,lon_c,geometry
0,-12,35.670829,139.965640,"[{'type': 'FeatureCollection', 'features': [{'..."
1,-13,30.486176,140.299082,"[{'type': 'FeatureCollection', 'features': [{'..."
2,-23,35.002576,136.810843,"[{'type': 'FeatureCollection', 'features': [{'..."
3,-40,33.827809,131.008663,"[{'type': 'FeatureCollection', 'features': [{'..."
4,-46,31.449095,129.733040,"[{'type': 'FeatureCollection', 'features': [{'..."
...,...,...,...,...
1903,47361,26.350004,126.759016,"[{'type': 'FeatureCollection', 'features': [{'..."
1904,47362,26.119702,127.734803,"[{'type': 'FeatureCollection', 'features': [{'..."
1905,47375,24.655470,124.698142,"[{'type': 'FeatureCollection', 'features': [{'..."
1906,47381,24.330345,123.780024,"[{'type': 'FeatureCollection', 'features': [{'..."


In [118]:
pk.dump(res_df,open("./city_geometry_and_center_df.pk","wb"))

### 结果可视化

In [119]:
import folium
def draw_city_border(data):
    m = folium.Map(
        location=[data['lat_c'], data['lon_c']],
        zoom_start=10 
    )
    folium.GeoJson(data['geometry'][0],name='geojson').add_to(m)
    folium.Marker( location=[ data['lat_c'], data['lon_c'] ], fill_color='#43d9de', radius=8).add_to(m)
    folium.LayerControl().add_to(m)
    return m

In [120]:
draw_city_border(res_df.loc[1100])