# 本文档提取主城区小区aoi及房价数据
- 数据准备
    - 上海主城区行政区划数据
    - 上海全城房价数据
    - 上海全城小区数据

In [1]:
# 初始化依赖包与qgis
import os
import numpy as np
import pandas as pd


import geopandas as gpd
import matplotlib.pyplot as plt

# import pyqgis
from qgis.core import *

# Supply path to qgis install location
QgsApplication.setPrefixPath('/Applications/QGIS-LTR.app/Contents/MacOS', True)

# Create a reference to the QgsApplication.  Setting the
# second argument to False disables the GUI.
qgs = QgsApplication([], False)

# Load providers
qgs.initQgis()

# Write your code here to load some layers, use processing
# algorithms, etc.

# Finally, exitQgis() is called to remove the
# provider and layer registries from memory


from qgis.analysis import QgsNativeAlgorithms
import processing
from processing.core.Processing import Processing
Processing.initialize()
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
qgs.exitQgis()


def getAttributeTable(vlayer):
    """ get attribute table of a vector layer.
    
    Args:
        vlayer : (QgsVectorLayer instance). vector layer

    returns:
         pandas DataFrame: attribute table
    """
    attribute_dict={}
    
    for field in vlayer.fields(): # 初始化各要素属性为空列表
        attribute_dict[field.name()] = []
        
    for feature in vlayer.getFeatures(): # 遍历矢量图层中个要素
        for field in vlayer.fields(): # 遍历各属性字段
            attribute_dict[field.name()].append(feature[field.name()]) # 添加属性字段值进入字典
            
            
    return pd.DataFrame(attribute_dict) #返回DataFrame



In [3]:
input_path = '/Users/oo/Desktop/5.Learning/esri-ces-contest/Codes/input_data'
sh_main_town_filepath = r'{}/市中心-面/center_city1.shp'.format(input_path)
sh_house_price_filepath = r'{}/shanghai_house_price.shp'.format(input_path)
sh_aoi_price_filepath = r'{}/上海市小区和写字楼/上海市AOI数据.shp'.format(input_path)


sh_main_town = QgsVectorLayer(sh_main_town_filepath, "Shanghai main town", "ogr")
sh_house_price = QgsVectorLayer(sh_house_price_filepath, "Shanghai house price", "ogr")
sh_aoi_layer = QgsVectorLayer(sh_aoi_price_filepath, "Shanghai aoi", "ogr")


In [9]:
display(getAttributeTable(sh_house_price).head())

display(getAttributeTable(sh_aoi_layer).head(1))

Unnamed: 0,name,price,county,town,address,lng_wgs84,lat_wgs84
0,上海玫瑰园商贸城,5442,奉贤,奉城,(奉贤奉城)南奉公路999弄,121.617538,30.915823
1,平安家园,5523,奉贤,四团,(奉贤四团)平福路1625弄,121.739173,30.898313
2,燎原农场,6034,奉贤,海湾,"(奉贤海湾)燎原农场运输楼, 燎原农场供销楼, 燎原农场燎原设备楼, 燎原农场医院楼, 燎原农场场部楼",121.652068,30.861294
3,褚聚街4号,6090,奉贤,奉城,(奉贤奉城)褚聚街4号,121.612381,30.902529
4,泊兰花苑,6412,松江,泗泾,(松江泗泾)张泾路1339弄,121.275846,31.104524


Unnamed: 0,name,province,city,area,address,uid,detail_url,tag,wgs84_lng,wgs84_lat,行业大类,行业子类,Get_time
0,盛世年华小区,上海市,上海市,浦东新区,上海市浦东新区洋泾街道,1e0f0deb63876ada1d1d1148,"https://map.baidu.com/poi/怡家园/@13530401.154588437,3640824.8874104554,18.43z?uid=1e0f0deb63876ada1d1d1148&info_merge=1&isBizPoi=false&ugc_type=3&ugc_ver=1&device_ratio=2&compat=1&pcevaname=pc4.1&querytype=detailConInfo&da_src=shareurl",房地产;住宅区,,,房地产,住宅区,2021-10-25 09:56:15


# 裁切矢量数据

In [10]:
temp_path = '/Users/oo/Desktop/5.Learning/esri-ces-contest/Codes/temp'

clip_params = {"INPUT":sh_house_price,"OVERLAY":sh_main_town,"OUTPUT":r'{}/sh_main_house_price.shp'.format(temp_path)}
#processing.run('qgis:clip',clip_params)



{'OUTPUT': '/Users/oo/Desktop/5.Learning/esri-ces-contest/Codes/temp/sh_main_house_price.shp'}

In [11]:
clip_params = {"INPUT":sh_aoi_layer,"OVERLAY":sh_main_town,"OUTPUT":r'{}/sh_main_aoi.shp'.format(temp_path)}
#processing.run('qgis:clip',clip_params)



{'OUTPUT': '/Users/oo/Desktop/5.Learning/esri-ces-contest/Codes/temp/sh_main_aoi.shp'}

In [13]:
sh_main_house_price_filepath = r'{}/sh_main/sh_main_house_price.shp'.format(input_path)
sh_main_aoi_price_filepath = r'{}/sh_main/sh_main_aoi.shp'.format(input_path)
sh_main_town_filepath = r'{}/sh_main/sh_main_town.shp'.format(input_path)

sh_main_town = QgsVectorLayer(sh_main_town_filepath, "Shanghai main town", "ogr")
sh_main_house_price = QgsVectorLayer(sh_main_house_price_filepath, "Shanghai house price", "ogr")
sh_main_aoi = QgsVectorLayer(sh_main_aoi_price_filepath, "Shanghai aoi", "ogr")

sh_main_town_attr = getAttributeTable(sh_main_town)

sh_main_house_price_attr = getAttributeTable(sh_main_house_price)

sh_main_aoi_attr = getAttributeTable(sh_main_aoi)

In [15]:
sh_main_house_price_attr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9510 entries, 0 to 9509
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       9510 non-null   object 
 1   price      9510 non-null   int64  
 2   county     9510 non-null   object 
 3   town       9510 non-null   object 
 4   address    9510 non-null   object 
 5   lng_wgs84  9510 non-null   float64
 6   lat_wgs84  9510 non-null   float64
dtypes: float64(2), int64(1), object(4)
memory usage: 520.2+ KB


In [16]:
sh_main_aoi_attr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12636 entries, 0 to 12635
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        12636 non-null  object
 1   province    12636 non-null  object
 2   city        12636 non-null  object
 3   area        12636 non-null  object
 4   address     12636 non-null  object
 5   uid         12636 non-null  object
 6   detail_url  12636 non-null  object
 7   tag         12636 non-null  object
 8   wgs84_lng   12636 non-null  object
 9   wgs84_lat   12636 non-null  object
 10  行业大�        12636 non-null  object
 11  行业子�        12636 non-null  object
 12  Get_time    12636 non-null  object
dtypes: object(13)
memory usage: 1.3+ MB


In [18]:
sh_main_house_price_attr

Unnamed: 0,name,price,county,town,address,lng_wgs84,lat_wgs84
0,中海万锦城(四期),6885,闸北,不夜城,(闸北不夜城)永兴路928弄,121.455056,31.252708
1,美隆大厦,8406,宝山,大华,(宝山大华)大华三路81号,121.414087,31.278280
2,爱久家园,10987,杨浦,中原,(杨浦中原)国伟路300号,121.528517,31.333909
3,亚新生活广场,11440,普陀,长寿路,(普陀长寿路)长寿路401、401甲号,121.432953,31.240889
4,香樟小筑,12276,闵行,老闵行,(闵行老闵行)江川路633弄,121.399647,31.002328
...,...,...,...,...,...,...,...
9505,中邦晶座,98291,浦东,联洋,(浦东联洋)丁香路1066弄,121.551680,31.229577
9506,霍山路66弄,98507,虹口,北外滩,(虹口北外滩)霍山路66弄,121.505928,31.255162
9507,东方丰甸苑(别墅),98582,浦东,南码头,(浦东南码头)东方路2995弄,121.515086,31.195889
9508,东昌新村,99917,浦东,陆家嘴,"(浦东陆家嘴)东昌新村, 东宁路348号, 东宁路350号, 东宁路360号",121.507249,31.234651


In [20]:
sh_main_aoi_attr.head(1)

Unnamed: 0,name,province,city,area,address,uid,detail_url,tag,wgs84_lng,wgs84_lat,行业大�,行业子�,Get_time
0,盛世年华小区,上海市,上海市,浦东新区,上海市浦东新区洋泾街道,1e0f0deb63876ada1d1d1148,"https://map.baidu.com/poi/怡家园/@13530401.154588437,3640824.8874104554,18.43z?uid=1e0f0deb63876ada1d1d1148&info_merge=1&isBizPoi=false&ugc_type=3&ugc_ver=1&device_ratio=2&compat=1&pcevaname=pc4.1&querytype=detailConInfo&da_src=shareurl",房地产;住宅区,,,房地产,住宅区,2021-10-25 09:56:15


In [21]:
sh_main_house_price_attr.merge(sh_main_aoi_attr, on='name', how='inner').info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4397 entries, 0 to 4396
Data columns (total 19 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        4397 non-null   object 
 1   price       4397 non-null   int64  
 2   county      4397 non-null   object 
 3   town        4397 non-null   object 
 4   address_x   4397 non-null   object 
 5   lng_wgs84   4397 non-null   float64
 6   lat_wgs84   4397 non-null   float64
 7   province    4397 non-null   object 
 8   city        4397 non-null   object 
 9   area        4397 non-null   object 
 10  address_y   4397 non-null   object 
 11  uid         4397 non-null   object 
 12  detail_url  4397 non-null   object 
 13  tag         4397 non-null   object 
 14  wgs84_lng   4397 non-null   object 
 15  wgs84_lat   4397 non-null   object 
 16  行业大�        4397 non-null   object 
 17  行业子�        4397 non-null   object 
 18  Get_time    4397 non-null   object 
dtypes: float64(2), int64(1), ob

In [24]:
# add fields exactly the same with house_price
add_fields_param = {"INPUT":sh_main_aoi, "FIELD_NAME":"exa_pri", "FIELD_TYPE":0, "OUTPUT":r'{}/sh_main_house_price_add_exa.shp'.format(temp_path)}
processing.run('native:addfieldtoattributestable', add_fields_param)

{'OUTPUT': '/Users/oo/Desktop/5.Learning/esri-ces-contest/Codes/temp/sh_main_house_price_add_exa.shp'}

In [25]:
sh_main_aoi_price_add_exa = QgsVectorLayer(r'/Users/oo/Desktop/5.Learning/esri-ces-contest/Codes/temp/sh_main_house_price_add_exa.shp',
                                           'sh main aoi temp', 'ogr')
getAttributeTable(sh_main_aoi_price_add_exa)

Unnamed: 0,name,province,city,area,address,uid,detail_url,tag,wgs84_lng,wgs84_lat,行业大�,行业子�,Get_time,exa_pri
0,盛世年华小区,上海市,上海市,浦东新区,上海市浦东新区洋泾街道,1e0f0deb63876ada1d1d1148,"https://map.baidu.com/poi/怡家园/@13530401.154588437,3640824.8874104554,18.43z?uid=1e0f0deb63876ada1d1d1148&info_merge=1&isBizPoi=false&ugc_type=3&ugc_ver=1&device_ratio=2&compat=1&pcevaname=pc4.1&querytype=detailConInfo&da_src=shareurl",房地产;住宅区,,,房地产,住宅区,2021-10-25 09:56:15,
1,上海财经大学-研究生公寓,上海市,上海市,宝山区,上海市宝山区三门路409号,00f1c7fb5384611ec156acab,http://api.map.baidu.com/place/detail?uid=00f1c7fb5384611ec156acab&output=html&source=placeapi_v2,房地产;宿舍,121.490871,31.314259,房地产,宿舍,2022-01-10 20:20:26,
2,交大昂立学生公寓,上海市,上海市,徐汇区,上海市徐汇区吴中东路500弄5号,00fff6407a281014b5147f1f,http://api.map.baidu.com/place/detail?uid=00fff6407a281014b5147f1f&output=html&source=placeapi_v2,房地产;宿舍,121.418847,31.192359,房地产,宿舍,2022-01-10 20:20:26,
3,上海中医药大学学生公寓,上海市,上海市,浦东新区,上海市浦东新区华佗路280号,0284ab6af1097d61ca1b950a,http://api.map.baidu.com/place/detail?uid=0284ab6af1097d61ca1b950a&output=html&source=placeapi_v2,房地产;宿舍,121.588238,31.194564,房地产,宿舍,2022-01-10 20:20:26,
4,机关大院,上海市,上海市,宝山区,上海市宝山区泰和路245号,03a6319405f7980b01294f02,http://api.map.baidu.com/place/detail?uid=03a6319405f7980b01294f02&output=html&source=placeapi_v2,房地产;宿舍,121.489963,31.376962,房地产,宿舍,2022-01-10 20:20:26,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12631,华宏商务中心,上海市,上海市,普陀区,上海市普陀区大渡河路388弄5号12层,ff2d6d6babe4f1821576435f,http://api.map.baidu.com/place/detail?uid=ff2d6d6babe4f1821576435f&output=html&source=placeapi_v2,房地产;写字楼,121.388662,31.226074,房地产,写字楼,2021-10-25 01:43:08,
12632,宝华中心,上海市,上海市,静安区,上海市静安区广中西路355号,ff9c5ede621ce31e49fd3f6d,http://api.map.baidu.com/place/detail?uid=ff9c5ede621ce31e49fd3f6d&output=html&source=placeapi_v2,房地产;写字楼,121.441097,31.281619,房地产,写字楼,2021-10-25 01:43:08,
12633,迈睿中国中心大楼,上海市,上海市,徐汇区,上海市徐汇区平福路218号,ffa54ca9c2a3ade64c6d465d,http://api.map.baidu.com/place/detail?uid=ffa54ca9c2a3ade64c6d465d&output=html&source=placeapi_v2,房地产;写字楼,121.444477,31.138355,房地产,写字楼,2021-10-25 01:43:08,
12634,南泰大厦,上海市,上海市,静安区,上海市静安区南京西路920号,ffd6d4034da9b78e16b54b8a,http://api.map.baidu.com/place/detail?uid=ffd6d4034da9b78e16b54b8a&output=html&source=placeapi_v2,房地产;写字楼,121.454809,31.232381,房地产,写字楼,2021-10-25 01:43:08,
