In [None]:
# default_exp geoloader

In [None]:
#export
import chardet
import pandas as pd
import shapefile
from shapely.geometry import Polygon, MultiPolygon
from shapely.geometry import shape
from os import listdir
from os.path import join

# geoloader

> This module gives the tool to load geospatial datas into a dataframe similar to the geopandas library dataframes. Geopandas will not be used as it is difficult to build and put into an executable form. Two types of files are supported : ESRI shapefiles and geojsons.

## Load shapefiles
> There is a shapefile library (pyshp) that allows to load and parse shapefile

In [None]:
shp_senegal = [file for file in listdir("geospatial_data/Senegal") if file.endswith(".shp")]
shp_guyana = [file for file in listdir("geospatial_data/Guyana") if file.endswith(".shp")]
shp_ecuador = [file for file in listdir("geospatial_data/Ecuador") if file.endswith(".shp")]

for shp in shp_senegal:
    shapefile.Reader("./geospatial_data/Senegal/"+shp)

for shp in shp_ecuador:
    shapefile.Reader("./geospatial_data/Ecuador/"+shp)

for shp in shp_guyana:
    shapefile.Reader("./geospatial_data/Guyana/"+shp)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 9: unexpected end of data

>Some attributes tables in the shapefiles can not be opened with the default encoding (utf-8), so there is a need to detect automatically the encoding of the .dbf file. 

In [None]:
#export

def detect_shp_encoding(path):
    infile = path[:-3]+"dbf"
    rawdata = open(infile, 'rb').read()
    result = chardet.detect(rawdata)
    charenc = result['encoding']
    if charenc != None:
        return charenc
    else:
        return "utf-8"

In [None]:
try:
    for shp in shp_senegal:
        path = "./geospatial_data/Senegal/"+shp
        shapefile.Reader(path,encoding=detect_shp_encoding(path))

    for shp in shp_ecuador:
        path = "./geospatial_data/Ecuador/"+shp
        shapefile.Reader(path,encoding=detect_shp_encoding(path))

    for shp in shp_guyana:
        path = "./geospatial_data/Guyana/"+shp
        shapefile.Reader(path,encoding=detect_shp_encoding(path))
except:
    print(shp)

> The only failing load is on what seems to be a faulty shapefile. (The attribute table is empty)

> The pyshp put the shapefile in this form :

In [None]:
geofile = shapefile.Reader(path,encoding=detect_shp_encoding(path))
print(geofile)
print()
print(geofile.shapes())
print()
print(geofile.records())

shapefile Reader
    9 shapes (type 'POLYGON')
    9 records (6 fields)

Shapes: [<shapefile.Shape object at 0x7f57f18e16a0>, <shapefile.Shape object at 0x7f57f0d284f0>, <shapefile.Shape object at 0x7f57f0d9bd90>, <shapefile.Shape object at 0x7f57f0d9b280>, <shapefile.Shape object at 0x7f57f0d9beb0>, <shapefile.Shape object at 0x7f57f0d9b100>, <shapefile.Shape object at 0x7f57f0d9b7f0>, <shapefile.Shape object at 0x7f57f0d9b490>, <shapefile.Shape object at 0x7f57f0d9b400>]

[Record #0: [0, 'Zone brésilienne et de métissage', 'Habitations', 'Brésilien+Amérindien', 'Forêt+Habitations'], Record #1: [0, 'Zone à relief -  Zone des teko', 'Habitations', 'Teko', 'Forêt+Habitations'], Record #2: [0, 'Décharge brésilienne', 'Décharge', 'Brésilien', 'Berge'], Record #3: [0, "Zone pas peuplée, présence d'abattis", 'Forêt+Abattis', 'Inhabité', 'Forêt+Abattis'], Record #4: [0, 'Zone wayapi', 'Habitations', 'Wayapi', 'Forêt+Habitations'], Record #5: [0, 'Baignade en été', 'Baignade', 'Inhabité', 'Be

In [None]:
#export
def read_file_shp(path):
    charenc = detect_shp_encoding(path)
    try:
        sf = shapefile.Reader(path, encoding= charenc)
    except:
        try:
            sf = shapefile.Reader(path, encoding= "utf-8")
        except:
            print("Ce fichier est illisible")
            print(path)


    col = [x[0] for x in sf.fields[1:]]

    shp = pd.DataFrame(data = sf.records(), columns = col)

    shapes = [sf.shapes()[i] for i in range(len(sf.shapes()))]

    shapes = [shape(x) for x in shapes] #convert shapefile.shape to shapely.shape in order to make the conversion to multipolygons easier

    shapes = [MultiPolygon([x]) if x.geom_type != 'MultiPolygon' and x.area > 0 else None for x in shapes]

    shp['geometry'] = shapes

    df = shp[shp['geometry']!=None]

    return df

This function can load any ESRI shapefile given the path to it. The path must be the path to the .shp file.

In [None]:
for shp in shp_senegal:
    read_file_shp("./geospatial_data/Senegal/"+shp)

for shp in shp_ecuador:
    read_file_shp("./geospatial_data/Ecuador/"+shp)

for shp in shp_guyana:
    read_file_shp("./geospatial_data/Guyana/"+shp)

## Load geojsons