In [None]:
# default_exp geoloader

In [None]:
#export
import chardet
import pandas as pd
from pathlib import Path
import shapefile
from shapely.geometry import Polygon, MultiPolygon
from shapely.geometry import shape
from os import listdir
from os.path import join

# geoloader

> This module gives the tool to load geospatial datas into a dataframe similar to the geopandas library dataframes. Geopandas will not be used as it is difficult to build and put into an executable form. Two types of files are supported : ESRI shapefiles and geojsons.

## Load shapefiles

There is a shapefile library (pyshp) that allows to load and parse shapefile.But there are a few problems for our utilization.

In [None]:
SHP_SENEGAL = [file for file in listdir("geospatial_data/Senegal") if file.endswith(".shp")]
SHP_GUYANA = [file for file in listdir("geospatial_data/Guyana") if file.endswith(".shp")]
SHP_ECUADOR = [file for file in listdir("geospatial_data/Ecuador") if file.endswith(".shp")]

list_shp = shp_senegal + shp_guyana + shp_ecuador

for shp in SHP_SENEGAL:
    shapefile.Reader("./geospatial_data/Senegal/"+shp)

for shp in SHP_ECUADOR:
    shapefile.Reader("./geospatial_data/Ecuador/"+shp)

for shp in SHP_GUYANA:
    shapefile.Reader("./geospatial_data/Guyana/"+shp)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 9: unexpected end of data

>Some attributes tables in the shapefiles can not be opened with the default encoding (utf-8), so there is a need to detect automatically the encoding of the .dbf file. 

This function can detect the encoding used in a shapefile by opening the .dbf file (containing the attribute table) and using the chardet library to detect the encoding.

In [None]:
#export
def detect_shapefile_encoding(path) -> str:
    """Read the encoding of a dbf file associated to a shapefile."""
    path = Path(path)
    file = path.with_suffix(".dbf")
    
    with open(file, 'rb') as rawdata:
        character_encoding = chardet.detect(rawdata.read())['encoding']
        
    if character_encoding:
        return character_encoding
    
    return "utf-8"

In [None]:
assert detect_shapefile_encoding("./geospatial_data/Senegal/2018 06 27 pm Thille boubacar") == "ascii"

assert detect_shapefile_encoding("./geospatial_data/Ecuador/2013 07 30 pm all Pacayacu") == "ISO-8859-1"

In [None]:
try:
    for shp in SHP_SENEGAL:
        path = "./geospatial_data/Senegal/"+shp
        shapefile.Reader(path,encoding=detect_shapefile_encoding(path))

    for shp in SHP_ECUADOR:
        path = "./geospatial_data/Ecuador/"+shp
        shapefile.Reader(path,encoding=detect_shapefile_encoding(path))

    for shp in SHP_GUYANA:
        path = "./geospatial_data/Guyana/"+shp
        shapefile.Reader(path,encoding=detect_shapefile_encoding(path))
except:
    print(shp)

> The only failing load is on what seems to be a faulty shapefile. (The attribute table is empty)

The pyshp put the shapefile in this form :

In [None]:
geofile = shapefile.Reader(path,encoding=detect_shp_encoding(path))
print(geofile)

shapefile Reader
    9 shapes (type 'POLYGON')
    9 records (6 fields)


The column names are in the field field

In [None]:
geofile.fields

[('DeletionFlag', 'C', 1, 0),
 ['Id', 'N', 6, 0],
 ['ZADA', 'C', 60, 0],
 ['LAND_USE', 'C', 50, 0],
 ['ETHNIES', 'C', 50, 0],
 ['LAND COVER', 'C', 200, 0]]

The shapes are accessible via the shapes function

In [None]:
geofile.shapes()

Shapes: [<shapefile.Shape object at 0x7f56b507f550>, <shapefile.Shape object at 0x7f56b4180040>, <shapefile.Shape object at 0x7f56b4003490>, <shapefile.Shape object at 0x7f56b5ed1df0>, <shapefile.Shape object at 0x7f56b4889700>, <shapefile.Shape object at 0x7f56b48892e0>, <shapefile.Shape object at 0x7f56b4308fd0>, <shapefile.Shape object at 0x7f56b4308490>, <shapefile.Shape object at 0x7f56b4715b20>]

The values are accessible via the records function

In [None]:
geofile.records()

[Record #0: [0, 'Zone brésilienne et de métissage', 'Habitations', 'Brésilien+Amérindien', 'Forêt+Habitations'],
 Record #1: [0, 'Zone à relief -  Zone des teko', 'Habitations', 'Teko', 'Forêt+Habitations'],
 Record #2: [0, 'Décharge brésilienne', 'Décharge', 'Brésilien', 'Berge'],
 Record #3: [0, "Zone pas peuplée, présence d'abattis", 'Forêt+Abattis', 'Inhabité', 'Forêt+Abattis'],
 Record #4: [0, 'Zone wayapi', 'Habitations', 'Wayapi', 'Forêt+Habitations'],
 Record #5: [0, 'Baignade en été', 'Baignade', 'Inhabité', 'Berge'],
 Record #6: [0, "Pas d'information", 'Forêt', 'NSP', 'Forêt'],
 Record #7: [0, 'Baignade en été', 'Baignade', 'Inhabité', 'Berge'],
 Record #8: [0, 'Fleuve Oyapock', 'Transit', 'Inhabité', 'Berge']]

This function can load any ESRI shapefile given the path to it and put it in a pandas dataframe. The path must be the path to the .shp file.

In [None]:
#export
def read_file_shp(path) -> pd.DataFrame:
    """Read a ESRI shapefile and convert it to a dataframe"""
    character_encoding = detect_shapefile_encoding(path)
    try:
        sf = shapefile.Reader(path, encoding= character_encoding)
    except:
        print("This shapefile is unreadable")
        print(path)


    col = [x[0] for x in sf.fields[1:]]

    df = pd.DataFrame(data = sf.records(), columns = col)

    shapes = [sf.shapes()[i] for i in range(len(sf.shapes()))]
    shapes = [shape(x) for x in shapes] #convert shapefile.shape to shapely.shape in order to make the conversion to multipolygons easier
    shapes = [MultiPolygon([x]) if x.geom_type != 'MultiPolygon' and x.area > 0 else None for x in shapes]

    df['geometry'] = shapes
    
    df = df[df['geometry']!=None]
    
    return df

Here is an example :

In [None]:
read_file_shp("./geospatial_data/Ecuador/2013 07 30 pm all Pacayacu.shp")

  result[:] = values
  iter(obj)  # Can iterate over it.
  len(obj)  # Has a length associated with it.
  s = iter(seq)
  for i in range(min(nitems, len(seq)))
  if nitems < len(seq):
  iter(obj)  # Can iterate over it.
  len(obj)  # Has a length associated with it.
  s = iter(seq)
  for i in range(min(nitems, len(seq)))
  if nitems < len(seq):


Unnamed: 0,Id,name,prod_ptrol,oil_palm,indigenos,oil_confli,geometry
0,0,A,1,0,0,1,"(POLYGON ((887406.5009568967 10006111.9820256,..."
1,0,B2,0,0,1,0,(POLYGON ((953068.8517980967 9926649.261819638...
2,0,B1,0,1,0,0,(POLYGON ((927786.0690071248 9981384.029080968...


## Load geojsons