Carga de librerías

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'

import geopandas as gpd

Definición de rutas relativas a archivos de datos

In [2]:
# Path a archivos de datos
ROOT = os.path.dirname(os.getcwd())
data_path = os.path.join(
    ROOT
    ,'data','Data pt1')

# Path a archivos generados en script
out_path = os.path.join(
    ROOT
    ,'data','outputs','chp04')

# Geopandas

_Geopandas_ es una extensión de `pandas` que incorpora funciones para soportar las estructuras de datos necesarias para trabajar con datos espaciales. Dentro de las funciones centrales de _Geopandas_ se encuentran:

* [Lectura y escritura de datos](#Lectura-y-escritura-de-datos)
* Estructuras de datos espaciales
* Gestión de proyecciones
* Visualización de datos espaciales
* Manipulación y transformación de datos
* [Geocoding](#Geocoding)

## Lectura y escritura de datos

Geopandas soporta de forma nativa la lectura y escritura de casi cualquier formato de datos tipo vector, ya sean _ESRI shapefile_ or _GeoJSON_. Para leer se utilizan los siguientes comandos:

In [3]:
# Lectura de datos
capitals = gpd.read_file(
    filename=os.path.join(data_path, 'Graticule', 'ne_110m_graticules_10.shp')
    ,driver='shapefile')

capitals.head(3)

Unnamed: 0,degrees,direction,display,scalerank,dd,geometry
0,80,N,80 N,4,80,"LINESTRING (180.00000 79.99848, 179.99664 79.9..."
1,70,N,70 N,4,70,"LINESTRING (180.00000 69.99847, 179.99664 69.9..."
2,60,N,60 N,4,60,"LINESTRING (180.00000 59.99866, 179.99664 59.9..."


También tiene soporte para leer archivos en línea:

In [4]:
# Lectura desde url
url = 'https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_admin_1_states_provinces_shp.geojson'
us_states = gpd.read_file(url)

us_states.head(3)

Unnamed: 0,scalerank,adm1_code,diss_me,adm1_cod_1,iso_3166_2,wikipedia,sr_sov_a3,sr_adm0_a3,iso_a2,adm0_sr,...,area_sqkm,sameascity,labelrank,featurec_1,admin,name_len,mapcolor9,mapcolor13,featureclass,geometry
0,2,USA-3514,3514,USA-3514,US-MN,http://en.wikipedia.org/wiki/Minnesota,US1,USA,US,1,...,0,-99,0,Admin-1 scale rank,United States of America,9,1,1,Admin-1 scale rank,"POLYGON ((-89.59941 48.01027, -89.48888 48.013..."
1,2,USA-3515,3515,USA-3515,US-MT,http://en.wikipedia.org/wiki/Montana,US1,USA,US,1,...,0,-99,0,Admin-1 scale rank,United States of America,7,1,1,Admin-1 scale rank,"POLYGON ((-111.19419 44.56116, -111.29155 44.7..."
2,2,USA-3516,3516,USA-3516,US-ND,http://en.wikipedia.org/wiki/North_Dakota,US1,USA,US,1,...,0,-99,0,Admin-1 scale rank,United States of America,12,1,1,Admin-1 scale rank,"POLYGON ((-96.60136 46.35136, -96.53891 46.199..."


De igual manera, soporta leer archivos comprimidos sin necesidad de descomprimirlos explícitamente:

In [5]:
# Reading in data stored in a zipped file
us_cbsas = gpd.read_file(
    filename=os.path.join(data_path, 'tl_2021_us_cbsa.zip'))

us_cbsas.head(3)

Unnamed: 0,CSAFP,CBSAFP,GEOID,NAME,NAMELSAD,LSAD,MEMI,MTFCC,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,122,12020,12020,"Athens-Clarke County, GA","Athens-Clarke County, GA Metro Area",M1,1,G3110,2654607902,26109459,33.943984,-83.2138965,"POLYGON ((-83.36003 34.04057, -83.36757 34.043..."
1,122,12060,12060,"Atlanta-Sandy Springs-Alpharetta, GA","Atlanta-Sandy Springs-Alpharetta, GA Metro Area",M1,1,G3110,22495873026,386782308,33.693728,-84.3999113,"POLYGON ((-84.27014 32.99101, -84.27084 32.991..."
2,428,12100,12100,"Atlantic City-Hammonton, NJ","Atlantic City-Hammonton, NJ Metro Area",M1,1,G3110,1438775279,301270067,39.4693555,-74.6337591,"POLYGON ((-74.58640 39.30989, -74.58665 39.309..."


### Filtering using the mask parameter

In [None]:
# Read in the US States file from the Census Tiger Files saved as a zip
us_states = gpd.read_file(data_path + 'tl_2021_us_state.zip')

# Filter the US States file to be just California
california = us_states[us_states['NAME']=="California"] 

# Show the dataframe  
california.head()

In [None]:
# Create a new geopandas dataframe that only includes the CBSAs that are in California using the geopandas mask parameter and passing the previously defined california geopandas dataframe
ca_cbsas = gpd.read_file(data_path + 'tl_2021_us_cbsa.zip', mask=california)

# Show the first 5 records of the ca_cbsas geopandas dataframe
ca_cbsas.head()

In [None]:
len(ca_cbsas)

### Filtering using the bounding box parameter

In [None]:
# Defining the bounding box
bounding_box = (-123.82239, 42.15933, -123.82246, 38.7)

#Reading in the CA_CBSAs and filtering based on the bounding box
cbsas_bbox = gpd.read_file(data_path + 'tl_2021_us_cbsa.zip', bbox=bounding_box)

# Showing the first 5 rows of the filtered dataframe
cbsas_bbox.head()

## Writing geospatial data with GeoPandas

In [None]:
# Writing out the data as a shape file
ca_cbsas.to_file(out_path+"ca_cbsas.shp")

In [None]:
# Writing out the data as a shape file
cbsas_bbox.to_file(out_path+"cbsas_bbox.geojson", driver="GeoJSON")

## Geometric Manipulations

In [None]:
from shapely.geometry import Point, LineString, Polygon

s = gpd.GeoSeries(
    [
        Point(1, 5),
        LineString([(2, -1), (3, 0), (5, 0), (4, 2)]),
        Polygon([(5, -2), (3, 5), (5, 2)]),
    ]
)

s[1]

### Performing Buffer

In [None]:
s_b = s.buffer(.4)
s_b[1]

### Performing a dissolve operation

In [None]:
us_states = gpd.read_file(data_path + 'tl_2021_us_state.zip')

# Filtering out territories 
us_states = us_states[~us_states['STUSPS'].isin(['MP','VI','GU','AS','PR'])]

# Setting the PCS
us_states = us_states.to_crs('ESRI:54032')
us_states.plot()

In [None]:
us_states_d = us_states.dissolve()
us_states_d.plot()

### Simplifying the geometry

In [None]:
LA = us_states[us_states['STUSPS']=='LA']
LA.plot()

In [None]:
LA_s = LA.simplify(tolerance=10000)
LA_s.plot()

## Ploting Geospatial Data

In [None]:
# Basic map of California CBSAs
ca_cbsas.plot()

In [None]:
# Choropleth Map of the world's population 

# Reading in the natural earth lowres data
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Plotting the data colored by the pop_est GeoSeries
world.plot(column='pop_est')

In [None]:
world.head()

In [None]:
continents = world.dissolve(by="continent")
continents.head()

In [None]:
continents.plot()

## Geocoding

In [None]:
import pandas as pd
dc_attractions = pd.read_csv(data_path + 'DC_Attractions.csv')
dc_attractions.head()

In [None]:
from geopandas.tools import geocode

# Geocode addresses using Nominatim. Remember to provide a custom "application name" in the user_agent parameter!
dc_attractions_gpd = geocode(dc_attractions['Address'], provider='openmapquest', timeout=4, api_key="API KEY")
dc_attractions_gpd.head()

In [None]:
dc_attractions_gpd.plot()