In [1]:
import pyogrio
import geopandas as gpd

print(f'pyogrio: {pyogrio.__version__}', f'geopandas: {gpd.__version__}')

pyogrio: v0.4.0 geopandas: 0.11.0


# 2 详解geopandas中的pyogrio读写引擎

## 2.1 基于pyogrio的矢量文件读取

### 2.1.1 利用columns参数指定需要读入的字段

In [2]:
counties = gpd.read_file('./示例数据/县.shp', columns=['市代码', '市', 'geometry'])
counties.head()

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型,geometry
0,110101,东城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,110102,西城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,110105,朝阳区,110000,北京市,110000,北京市,市辖区,"MULTIPOLYGON (((116.48030 40.07965, 116.48970 ..."
3,110106,丰台区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.31980 39.89578, 116.31978 39.894..."
4,110107,石景山区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.14485 39.99233, 116.14568 39.991..."


In [3]:
counties = gpd.read_file('./示例数据/县.shp', engine='pyogrio', columns=['市代码', '市', 'geometry'])
counties.head()

Unnamed: 0,市,市代码,geometry
0,北京市,110000,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,北京市,110000,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,北京市,110000,"MULTIPOLYGON (((116.48030 40.07965, 116.48970 ..."
3,北京市,110000,"POLYGON ((116.31980 39.89578, 116.31978 39.894..."
4,北京市,110000,"POLYGON ((116.14485 39.99233, 116.14568 39.991..."


### 2.1.2 利用read_geometry参数设置是否忽略矢量列

In [4]:
counties = gpd.read_file('./示例数据/县.shp', engine='pyogrio', read_geometry=False)
counties.head()

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型
0,110101,东城区,110000,北京市,110000,北京市,市辖区
1,110102,西城区,110000,北京市,110000,北京市,市辖区
2,110105,朝阳区,110000,北京市,110000,北京市,市辖区
3,110106,丰台区,110000,北京市,110000,北京市,市辖区
4,110107,石景山区,110000,北京市,110000,北京市,市辖区


In [5]:
type(counties)

pandas.core.frame.DataFrame

### 2.1.3 利用force_2d参数强制忽略z轴信息

In [6]:
from shapely.geometry import Point

demo_gdf = gpd.GeoDataFrame({
    'id': range(3),
    'geometry': [Point(i, i, 0) for i in range(3)]
})
demo_gdf

Unnamed: 0,id,geometry
0,0,POINT Z (0.00000 0.00000 0.00000)
1,1,POINT Z (1.00000 1.00000 0.00000)
2,2,POINT Z (2.00000 2.00000 0.00000)


In [7]:
demo_gdf.to_file('./带z轴矢量示例.geojson')

In [8]:
gpd.read_file('./带z轴矢量示例.geojson')

Unnamed: 0,id,geometry
0,0,POINT Z (0.00000 0.00000 0.00000)
1,1,POINT Z (1.00000 1.00000 0.00000)
2,2,POINT Z (2.00000 2.00000 0.00000)


In [9]:
gpd.read_file('./带z轴矢量示例.geojson', engine='pyogrio', force_2d=True)

Unnamed: 0,id,geometry
0,0,POINT (0.00000 0.00000)
1,1,POINT (1.00000 1.00000)
2,2,POINT (2.00000 2.00000)


### 2.1.4 利用skip_features与max_features参数控制读入数据规模

In [10]:
gpd.read_file('./示例数据/县.shp', engine='pyogrio').head()

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型,geometry
0,110101,东城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,110102,西城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,110105,朝阳区,110000,北京市,110000,北京市,市辖区,"MULTIPOLYGON (((116.48030 40.07965, 116.48970 ..."
3,110106,丰台区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.31980 39.89578, 116.31978 39.894..."
4,110107,石景山区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.14485 39.99233, 116.14568 39.991..."


In [11]:
gpd.read_file('./示例数据/县.shp', engine='pyogrio', skip_features=3).head()

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型,geometry
0,110106,丰台区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.31980 39.89578, 116.31978 39.894..."
1,110107,石景山区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.14485 39.99233, 116.14568 39.991..."
2,110108,海淀区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.20369 40.13931, 116.20469 40.139..."
3,110109,门头沟区,110000,北京市,110000,北京市,市辖区,"POLYGON ((115.80080 40.15195, 115.80129 40.151..."
4,110111,房山区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.04840 39.84461, 116.05077 39.841..."


In [12]:
gpd.read_file('./示例数据/县.shp', engine='pyogrio').shape

(2900, 8)

In [13]:
gpd.read_file('./示例数据/县.shp', engine='pyogrio', max_features=3)

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型,geometry
0,110101,东城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,110102,西城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,110105,朝阳区,110000,北京市,110000,北京市,市辖区,"MULTIPOLYGON (((116.48030 40.07965, 116.48970 ..."


### 2.1.5 利用参数where对矢量文件进行条件过滤

In [14]:
gpd.read_file('./示例数据/县.shp', engine='pyogrio', where="NAME IN ('东城区', '西城区', '海淀区')")

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型,geometry
0,110101,东城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,110102,西城区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,110108,海淀区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.20369 40.13931, 116.20469 40.139..."


In [15]:
# 针对中文字段名，需要将字段名包在""中
where = '''
"省" = '北京市' AND NAME NOT IN ('东城区', '西城区', '海淀区')
'''
gpd.read_file('./示例数据/县.shp', engine='pyogrio', where=where)

Unnamed: 0,PAC,NAME,省代码,省,市代码,市,类型,geometry
0,110105,朝阳区,110000,北京市,110000,北京市,市辖区,"MULTIPOLYGON (((116.48030 40.07965, 116.48970 ..."
1,110106,丰台区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.31980 39.89578, 116.31978 39.894..."
2,110107,石景山区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.14485 39.99233, 116.14568 39.991..."
3,110109,门头沟区,110000,北京市,110000,北京市,市辖区,"POLYGON ((115.80080 40.15195, 115.80129 40.151..."
4,110111,房山区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.04840 39.84461, 116.05077 39.841..."
5,110112,通州区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.62105 40.02026, 116.63179 40.020..."
6,110113,顺义区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.86578 40.28082, 116.87218 40.263..."
7,110114,昌平区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.28879 40.38977, 116.29643 40.386..."
8,110115,大兴区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.52907 39.82348, 116.53390 39.822..."
9,110116,怀柔区,110000,北京市,110000,北京市,市辖区,"POLYGON ((116.67527 41.04010, 116.67616 41.040..."


### 2.1.6 利用sql参数在原数据上直接进行sql查询

In [16]:
# 查看图层名，对于shapefile、GeoJSON等单图层文件，取第一个作为表名
# 对于gdb、gpkg等多图层文件，挑选实际所需图层名作为表名
pyogrio.list_layers('带z轴矢量示例.geojson')

array([['带z轴矢量示例', '2.5D Point']], dtype=object)

In [17]:
gpd.read_file('带z轴矢量示例.geojson', engine='pyogrio', sql='SELECT * FROM "带z轴矢量示例" WHERE id > 0')

Unnamed: 0,id,geometry
0,1,POINT Z (1.00000 1.00000 0.00000)
1,2,POINT Z (2.00000 2.00000 0.00000)


In [18]:
# sql参数执行顺序先于其他过滤类参数
gpd.read_file('带z轴矢量示例.geojson', engine='pyogrio', sql='SELECT * FROM "带z轴矢量示例" WHERE id > 0', max_features=999)

Unnamed: 0,id,geometry
0,1,POINT Z (1.00000 1.00000 0.00000)
1,2,POINT Z (2.00000 2.00000 0.00000)


In [19]:
(
    gpd
    .read_file('./示例数据/县.shp', engine='pyogrio', where="NAME IN ('东城区', '西城区', '海淀区')")
    .to_file('./中文geojson测试.geojson')
)

# GeoJSON文件sql读取正常
(
    gpd
    .read_file('./中文geojson测试.geojson', 
               engine='pyogrio', 
               sql='SELECT "省", "市", PAC, NAME FROM "中文geojson测试"', 
               encoding='utf-8')
)

Unnamed: 0,省,市,PAC,NAME,geometry
0,北京市,北京市,110101,东城区,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,北京市,北京市,110102,西城区,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,北京市,北京市,110108,海淀区,"POLYGON ((116.20369 40.13931, 116.20469 40.139..."


In [20]:
(
    gpd
    .read_file('./示例数据/县.shp', engine='pyogrio', where="NAME IN ('东城区', '西城区', '海淀区')")
    .to_file('./中文geojson测试.gpkg', layer='测试图层')
)

# gpkg文件sql读取正常，注意geom字段对应矢量列
(
    gpd
    .read_file('./中文geojson测试.gpkg', 
               engine='pyogrio', 
               sql='SELECT "省", "市", PAC, NAME, geom FROM "测试图层"')
)

Unnamed: 0,省,市,PAC,NAME,geometry
0,北京市,北京市,110101,东城区,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,北京市,北京市,110102,西城区,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,北京市,北京市,110108,海淀区,"POLYGON ((116.20369 40.13931, 116.20469 40.139..."


In [21]:
# 读取shapefile时会出现编码异常问题
(
    gpd
    .read_file('./示例数据/县.shp', 
               engine='pyogrio', 
               sql='SELECT "省", "市", PAC, NAME FROM "县"')
)

Unnamed: 0,ç,å¸,PAC,NAME,geometry
0,åäº¬å¸,åäº¬å¸,110101,ä¸ååº,"POLYGON ((116.40581 39.96245, 116.40783 39.960..."
1,åäº¬å¸,åäº¬å¸,110102,è¥¿ååº,"POLYGON ((116.38139 39.96006, 116.38053 39.956..."
2,åäº¬å¸,åäº¬å¸,110105,æé³åº,"MULTIPOLYGON (((116.48030 40.07965, 116.48970 ..."
3,åäº¬å¸,åäº¬å¸,110106,ä¸°å°åº,"POLYGON ((116.31980 39.89578, 116.31978 39.894..."
4,åäº¬å¸,åäº¬å¸,110107,ç³æ¯å±±åº,"POLYGON ((116.14485 39.99233, 116.14568 39.991..."
...,...,...,...,...,...
2895,å°æ¹¾ç,å°åå¸,710210,åéåº,"POLYGON ((120.32254 22.61569, 120.32342 22.613..."
2896,å°æ¹¾ç,å°åå¸,710211,ææ´¥åº,"POLYGON ((120.27170 22.61347, 120.27464 22.611..."
2897,å°æ¹¾ç,å°åå¸,710212,å°æ¸¯åº,"POLYGON ((120.36048 22.50767, 120.35556 22.510..."
2898,é¦æ¸¯ç¹å«è¡æ¿åº,é¦æ¸¯ç¹å«è¡æ¿åº,810000,é¦æ¸¯ç¹å«è¡æ¿åº,"MULTIPOLYGON (((113.91455 22.16283, 113.91516 ..."


## 2.2 基于pyogrio的矢量文件写出

In [22]:
demo_gdf

Unnamed: 0,id,geometry
0,0,POINT Z (0.00000 0.00000 0.00000)
1,1,POINT Z (1.00000 1.00000 0.00000)
2,2,POINT Z (2.00000 2.00000 0.00000)


In [23]:
demo_gdf.set_crs('EPSG:4326').to_file('./promote_to_multi参数测试.shp', promote_to_multi=True, engine='pyogrio')
gpd.read_file('./promote_to_multi参数测试.shp')

Unnamed: 0,id,geometry
0,0,MULTIPOINT (0.00000 0.00000)
1,1,MULTIPOINT (1.00000 1.00000)
2,2,MULTIPOINT (2.00000 2.00000)


## 2.3 pyogrio引擎支持的所有矢量文件类型

In [24]:
pyogrio.list_drivers()

{'ESRIC': '?',
 'FITS': '?',
 'PCIDSK': '?',
 'netCDF': '?',
 'PDS4': '?',
 'VICAR': '?',
 'JP2OpenJPEG': '?',
 'PDF': '?',
 'MBTiles': '?',
 'BAG': '?',
 'EEDA': '?',
 'OGCAPI': '?',
 'ESRI Shapefile': 'rw',
 'MapInfo File': '?',
 'UK .NTF': '?',
 'LVBAG': '?',
 'OGR_SDTS': '?',
 'S57': '?',
 'DGN': '?',
 'OGR_VRT': '?',
 'Memory': '?',
 'CSV': '?',
 'NAS': '?',
 'GML': 'rw',
 'GPX': '?',
 'LIBKML': '?',
 'KML': '?',
 'GeoJSON': 'rw',
 'GeoJSONSeq': 'rw',
 'ESRIJSON': '?',
 'TopoJSON': 'r',
 'OGR_GMT': '?',
 'GPKG': 'rw',
 'SQLite': '?',
 'ODBC': '?',
 'WAsP': '?',
 'PGeo': '?',
 'MSSQLSpatial': '?',
 'PostgreSQL': '?',
 'OpenFileGDB': 'r',
 'DXF': '?',
 'CAD': '?',
 'FlatGeobuf': 'rw',
 'Geoconcept': '?',
 'GeoRSS': '?',
 'VFK': '?',
 'PGDUMP': '?',
 'OSM': '?',
 'GPSBabel': '?',
 'OGR_PDS': '?',
 'WFS': '?',
 'OAPIF': 'r',
 'EDIGEO': '?',
 'SVG': '?',
 'Idrisi': '?',
 'XLS': '?',
 'ODS': '?',
 'XLSX': '?',
 'Elasticsearch': '?',
 'Carto': '?',
 'AmigoCloud': '?',
 'SXF': '?',
 'Sela