In [1]:
from osgeo import ogr
import json
import pyproj
import pandas as pd
from shapely.geometry import shape
from shapely.geometry.polygon import Polygon
from shapely.ops import transform

Constantes

In [2]:
MAPS_PATHS = [
    './data-insivumeh/MapasGuatemala/Fisiografia/FISIOGRAFIAFINAL_GTM.shp',
    './data-insivumeh/MapasGuatemala/Geología/GEOLOGIA_GTM.shp',
    './data-insivumeh/MapasGuatemala/Suelo Nivel Reconocimiento/SSUELOS_GTM.shp',
    './data-insivumeh/MapasGuatemala/Taxonomia Suelos/Shapefile Taxonomia a nivel de Orden Nacional/TAXONOMIA_ORDENES_GTM.shp',
    './data-insivumeh/MapasGuatemala/Uso de Suelo/USDA_GTM.shp'
]
PATH = './data-insivumeh/data_train.json'
RES_DATA = './results/data_insivumeh.csv'

In [3]:
class DataGenerator:
    properties = {
        'GEO': 'rock_geo',
        'EDADGEOGP': 'rock_period',
        'NOMR': 'fis_nomr',
        'TIPO_ROCA': 'rock_type',
        'SIMBOLO': 'soil_rec_symbol',
        'MAT_ORIG': 'soil_rec_mat_origin',
        'CLAS_DRE': 'soil_rec_sew_class',
        'TEX_SUP': 'soil_rec_tex_sup',
        'C_TEXSUP': 'soil_rec_c_tex_sup',
        'TEX_SUB': 'soil_rec_tex_sub',
        'C_TEXSUB': 'soil_rec_c__tex_sub',
        'RIES_ERO': 'soil_rec_risk_eros',
        'ORDEN': 'soil_tax',
        'CAPACIDAD': 'soil_use'
    }
    def __init__(self, points_directory, maps_directories):
        with open(points_directory, 'r', encoding="utf8") as f:
            self.data: dict = json.load(f)

        for feature in self.data['features']:
            feature['properties']['landslide_id'] = int(feature['properties']['landslide_id'])
            feature['geometry'] = shape(feature['geometry'])

        self.maps = dict()
        self.count = dict()
        target = pyproj.CRS('EPSG:4326')
        for directory in maps_directories:
            file = ogr.Open(directory)
            layer = file.GetLayer()
            source = pyproj.CRS(layer.GetSpatialRef().GetName())
            features =  [json.loads(layer.GetFeature(i).ExportToJson()) for i in range(layer.GetFeatureCount())]
            project = pyproj.Transformer.from_crs(source, target, always_xy=True).transform if source != target else None
            
            for feature in features:  
                if project is None:
                    feature['geometry'] = shape(feature['geometry'])
                else:
                    feature['geometry'] = transform(project, shape(feature['geometry']))
            
            self.maps[layer.GetName()] =  features
            self.count[layer.GetName()] =  0
        
    def __getitem__(self, index: int): 
        return (
            self.data['features'][index]['properties'], 
            self.data['features'][index]['geometry']
            )
    
    def __getmapproperties__(self, map_name: str, point):   
        for feature in self.maps[map_name]:  
                polygon: Polygon = feature['geometry']
                if polygon.contains(point):
                    return {self.properties[k]: v for k, v in feature['properties'].items() if k in self.properties.keys()}

    def getData(self):
        output = pd.DataFrame()
        for i in range(len(self.data['features'])):
            dictData = dict()
            point_properties, point = self.__getitem__(i)
            
            # Point information
            if point_properties:   
                dictData.update(point_properties)
            
            # Maps information
            for map_name in self.maps.keys():
                properties = self.__getmapproperties__(map_name, point)
                if properties:   
                    dictData.update(properties)
                else: 
                    self.count[map_name] += 1

            dictData.update()
            output = pd.concat([output, pd.DataFrame(dictData, index=[point_properties['landslide_id']])])
        
        for map_name in self.maps.keys():
            print('Cantidad de puntos con propiedades del mapa "{}" faltantes: {}'.format(map_name, self.count[map_name]))
        return output            

In [4]:
dg = DataGenerator(PATH, MAPS_PATHS)
data = dg.getData()

Cantidad de puntos con propiedades del mapa "FISIOGRAFIAFINAL_GTM" faltantes: 22
Cantidad de puntos con propiedades del mapa "GEOLOGIA_GTM" faltantes: 22
Cantidad de puntos con propiedades del mapa "SSUELOS_GTM" faltantes: 22
Cantidad de puntos con propiedades del mapa "TAXONOMIA_ORDENES_GTM" faltantes: 22
Cantidad de puntos con propiedades del mapa "USDA_GTM" faltantes: 22


In [5]:
data.to_csv(RES_DATA)  