In [1]:
import pandas as pd
import geopandas as gp

# 连接房地产信息和坐标

In [2]:
community_info = pd.read_json("../community_info.jsonl", lines=True)
community_info = community_info.join(pd.json_normalize(community_info["info"]))
community_info = community_info.loc[community_info["info"] != {}, :]
community_info = community_info.drop(columns="info")
community_info.info()

<class 'pandas.core.frame.DataFrame'>
Index: 777 entries, 0 to 776
Data columns (total 69 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   name      777 non-null    object
 1   link      777 non-null    object
 2   district  777 non-null    object
 3   物业类别      10 non-null     object
 4   项目特色      56 non-null     object
 5   建筑类别      10 non-null     object
 6   装修状况      56 non-null     object
 7   产权年限      10 non-null     object
 8   环线位置      766 non-null    object
 9   开发商       777 non-null    object
 10  楼盘地址      10 non-null     object
 11  销售状态      10 non-null     object
 12  开盘时间      10 non-null     object
 13  交房时间      10 non-null     object
 14  售楼地址      298 non-null    object
 15  咨询电话      10 non-null     object
 16  主力户型      10 non-null     object
 17  预售许可证     10 non-null     object
 18  占地面积      777 non-null    object
 19  建筑面积      777 non-null    object
 20  容积率       777 non-null    object
 21  绿化率       777 non-nul

In [3]:
community_id = pd.read_csv("../community.csv", index_col=0)
community_id.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4794 entries, 0 to 4794
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   uuid     4794 non-null   object
 1   name     4794 non-null   object
 2   address  4794 non-null   object
dtypes: object(3)
memory usage: 149.8+ KB


In [4]:
community_loc = pd.read_json("../community_geolocation.jsonl", lines=True).drop(columns=["index", "name", "address", "waiting"])
community_loc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4300 entries, 0 to 4299
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   uuid    4300 non-null   object 
 1   lon     4300 non-null   float64
 2   lat     4300 non-null   float64
dtypes: float64(2), object(1)
memory usage: 100.9+ KB


In [5]:
community = pd.merge(community_id, community_loc, how="inner", on="uuid")
community.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4300 entries, 0 to 4299
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   uuid     4300 non-null   object 
 1   name     4300 non-null   object 
 2   address  4300 non-null   object 
 3   lon      4300 non-null   float64
 4   lat      4300 non-null   float64
dtypes: float64(2), object(3)
memory usage: 168.1+ KB


# 将房地产转换为空间矢量数据

In [6]:
### 首先需要将高德坐标系转换为 WGS84 坐标系
from coords_trans import CoordinateTransform

In [7]:
transer = CoordinateTransform()

In [10]:
community[["lon_wgs", "lat_wgs"]] = [transer.gcj02_to_wgs84(u, v) for u, v in zip(community["lon"], community["lat"])]
community.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4300 entries, 0 to 4299
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   uuid     4300 non-null   object 
 1   name     4300 non-null   object 
 2   address  4300 non-null   object 
 3   lon      4300 non-null   float64
 4   lat      4300 non-null   float64
 5   lon_wgs  4300 non-null   float64
 6   lat_wgs  4300 non-null   float64
dtypes: float64(4), object(3)
memory usage: 235.3+ KB


In [11]:
community_geo = gp.points_from_xy(community["lon_wgs"], community["lat_wgs"])
community_gdf = gp.GeoDataFrame(community, geometry=community_geo, crs=4326)
community_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 4300 entries, 0 to 4299
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   uuid      4300 non-null   object  
 1   name      4300 non-null   object  
 2   address   4300 non-null   object  
 3   lon       4300 non-null   float64 
 4   lat       4300 non-null   float64 
 5   lon_wgs   4300 non-null   float64 
 6   lat_wgs   4300 non-null   float64 
 7   geometry  4300 non-null   geometry
dtypes: float64(4), geometry(1), object(3)
memory usage: 268.9+ KB


In [12]:
community_gdf.to_file("../shp/community_locations.shp")

# 查找没有编码出坐标的小区

In [36]:
uuid_found = community_loc["uuid"].tolist()
community_noloc = community_id.loc[[(x not in uuid_found) for x in community_id["uuid"]], :]
community_noloc

Unnamed: 0,uuid,name,address
23,8de2570c-4739-45fe-bf49-affaae932dcb,绿地海珀兰轩,经开南三环与机场快速路交汇处
80,35871bae-b1eb-4e57-b1f7-cbb61274edde,河南省水利第二工程局家属院,经开
81,4df3f0a7-4c02-42dd-876e-954a1e29afcc,经济开发区公安分局家属院,经开
94,ee9d6c55-603d-4554-924e-4eb38c898898,经开管委会家属院,经开航海路三大街交汇处东南角
99,4349bc80-44ed-4f15-ba0a-2677ccedae45,航海路第二大街29号院,经开
...,...,...,...
4790,97abfe39-25dc-4b4f-b285-65690e68f7c1,瀚海·光辉城市,航空港区志洋路孙武路交会处东南
4791,610777e5-7ff7-4a77-b122-464c3d59724d,浩创梧桐花语,航空港区薛店神州路与枣园路交会处
4792,63110159-4347-4096-8665-ea89b6c8a3f6,中绘·集云筑,航空港区华夏大道与领航路交汇处
4793,00eb2a8b-4a9e-4097-8fe0-de19de519503,旭辉·空港时代,航空港区G107与志洋路交叉口东南角


In [37]:
community_noloc.to_csv("../community_noloc.csv")