# 本文件用于基础数据准备
- 上海乡镇矢量数据:要素属性匹配
- 第六次人口普查数据:清洗与整理


In [2]:
# 初始化依赖包与qgis
import os
import numpy as np
import pandas as pd

# import pyqgis
from qgis.core import *

# Supply path to qgis install location
QgsApplication.setPrefixPath("/Applications", True)

# Create a reference to the QgsApplication.  Setting the
# second argument to False disables the GUI.
qgs = QgsApplication([], False)

# Load providers
qgs.initQgis()

# Write your code here to load some layers, use processing
# algorithms, etc.

# Finally, exitQgis() is called to remove the
# provider and layer registries from memory
qgs.exitQgis()

# 1. 上海乡镇矢量数据属性整理
   
* 本部分整理`上海乡镇矢量数据.shp` (来源于`百度地图`)属性,将其与上海市第六次人口普查数据(来源于`中华人民共和国政区大典`)融合,获取项目所需基础数据.

## 1.1 加载矢量图层

In [52]:
shanghai_town_filepath = r'/Users/oo/Desktop/5.Learning/urban-green-esri-contest/Codes/input_data/shanghai/上海市_乡镇边界.shp'
shanghai_county_filepath = r'/Users/oo/Desktop/5.Learning/urban-green-esri-contest/Codes/input_data/shanghai/上海市_县界.shp'
sh_town_layer = QgsVectorLayer(shanghai_town_filepath, "Shanghai Town", "ogr")
sh_county_layer = QgsVectorLayer(shanghai_county_filepath, "Shanghai County", "ogr")

## 1.2 转换编码为GBK,避免属性表乱码

In [54]:
sh_town_layer.setProviderEncoding(u'GBK')
sh_county_layer.setProviderEncoding(u'GBK')

## 1.3 建立函数获取矢量图层属性表,转换为DataFrame

In [46]:
def getAttributeTable(vlayer):
    """ get attribute table of a vector layer.
    
    Args:
        vlayer : (QgsVectorLayer instance). vector layer

    returns:
         pandas DataFrame: attribute table
    """
    attribute_dict={}
    
    for field in vlayer.fields(): # 初始化各要素属性为空列表
        attribute_dict[field.name()] = []
        
    for feature in vlayer.getFeatures(): # 遍历矢量图层中个要素
        for field in vlayer.fields(): # 遍历各属性字段
            attribute_dict[field.name()].append(feature[field.name()]) # 添加属性字段值进入字典
            
            
    return pd.DataFrame(attribute_dict) #返回DataFrame

## 1.4 合并区县与乡镇属性表,使得乡镇属性表包括区县字段

In [83]:
shangtown_attribute_table = getAttributeTable(sh_town_layer)
shangtown_attribute_table['code_front6'] = shangtown_attribute_table['code'].apply(lambda row: row[0:6]) # 利用乡镇数据code前6位对应区县前六位进行匹配
shangcounty_attribute_table = getAttributeTable(sh_county_layer)
shangcounty_attribute_table['code_front6'] = shangcounty_attribute_table['code'].apply(lambda row: row[0:6])
shangtown_merge = shangtown_attribute_table.merge(shangcounty_attribute_table[['Name', 'code', 'code_front6']], 
                                how='left', on='code_front6', suffixes=('_town', '_county')) # 合并乡镇与区县属性表
display(shangtown_merge)


Unnamed: 0,gml_id,Name_town,layer,code_town,grade,code_front6,Name_county,code_county
0,layer_township_pg.1,江桥镇,乡镇,310114118000,4,310114,嘉定区,310114000000
1,layer_township_pg.2,菊园新区管委会,乡镇,310114003000,4,310114,嘉定区,310114000000
2,layer_township_pg.3,外冈镇,乡镇,310114114000,4,310114,嘉定区,310114000000
3,layer_township_pg.4,南翔镇,乡镇,310114102000,4,310114,嘉定区,310114000000
4,layer_township_pg.5,新成路街道,乡镇,310114001000,4,310114,嘉定区,310114000000
...,...,...,...,...,...,...,...,...
246,layer_township_pg.27567,姚庄镇,乡镇,330421111000,4,330421,嘉善县,330421000000
247,layer_township_pg.27571,新仓镇,乡镇,330482103000,4,330482,平湖市,330482000000
248,layer_township_pg.27574,广陈镇,乡镇,330482106000,4,330482,平湖市,330482000000
249,layer_township_pg.27575,新埭镇,乡镇,330482102000,4,330482,平湖市,330482000000


In [86]:
# 输出结果
# shangtown_merge.to_csv('output_data/上海乡镇单元人口普查统计.csv', index=False, encoding='utf_8_sig')