In [82]:
import numpy as np
import geopandas as gpd
import rasterio
from pathlib import Path
from rasterio.features import rasterize
from concurrent.futures import ProcessPoolExecutor

In [83]:
class GeoSpatialDataProcessor:
    """
    A class to process and visualize geospatial data from a local GeoPackage file.
    """

    def __init__(self, geopackage_path: str, layer_name: str, output_dir: str, code_column: str):
        """
        Initialize the GeoSpatialDataProcessor with the GeoPackage details and output directory.
        """
        self.geopackage_path = Path(geopackage_path)
        self.layer_name = layer_name
        self.output_dir = Path(output_dir)
        self.code_column = code_column

        if not self.geopackage_path.exists():
            raise FileNotFoundError(f"GeoPackage path {geopackage_path} does not exist.")
        self.output_dir.mkdir(parents=True, exist_ok=True)

        self.land_use_code_mapping = {
            1: [211, 212, 213, 221, 222, 223, 241, 242, 243, 244],  # agriculture
            2: [111, 112],  # urban
            3: [121, 122, 123, 124, 131, 132, 133],  # industry
            4: [523],  # water_bodies
            5: [311, 312, 313],  # nature_forest
            6: [231]  # meadow
        }

        self.category_names = {
            1: 'agriculture',
            2: 'urban',
            3: 'industry',
            4: 'water_bodies',
            5: 'nature_forest',
            6: 'meadow'
        }

    def read_data(self) -> gpd.GeoDataFrame:
        """
        Reads the data from the GeoPackage file.
        """
        try:
            data = gpd.read_file(self.geopackage_path, layer=self.layer_name)
            if data.empty:
                raise ValueError("The data is empty.")
            print("First few rows of the data:")
            display(data.head())
            return data
        except Exception as e:
            print(f"An error occurred while reading the data: {e}")
            return None

    def reclassify_data(self, data):
        """
        Reclassifies the raster data into broader land use categories.
        """
        print("Reclassifying data")

        # Create a mapping from the original land use codes to the new categories
        reverse_mapping = {code: category for category, codes in self.land_use_code_mapping.items() for code in codes}

        # Create a new column for the reclassified codes
        data['reclassified'] = data[self.code_column].map(reverse_mapping).fillna(-1).astype(int)

        # Display the unique categories after reclassification
        unique, counts = np.unique(data['reclassified'], return_counts=True)
        print(f"Unique categories after reclassification: {dict(zip(unique, counts))}")

        return data['reclassified'], self.category_names

    def export_data(self, reclassified, data, category):
        """
        Exports the data of each land use type as a separate tiff file.
        """
        output_path = self.output_dir / f"{self.category_names[category]}.tif"
        if output_path.exists():
            print(f"File {output_path} already exists, skipping export.")
            return
        print(f"Exporting data to {output_path}")

        # Only include the geometries that match the current category
        shapes = [(geom, 1) for geom, value in zip(data.geometry, reclassified) if value == category]

        # TODO: Adjust the out_shape parameter based on your data
        rasterized = rasterize(shapes, out_shape=(1000, 1000))

        with rasterio.open(output_path, 'w', driver='GTiff', height=rasterized.shape[0],
                        width=rasterized.shape[1], count=1, dtype='float32',
                        crs='EPSG:3857') as dst:
            dst.write(rasterized.astype(rasterio.float32), 1)

    def process_data(self):
        """
        Main method to process the data.
        """
        data = self.read_data()
        if data is not None:
            reclassified, category_names = self.reclassify_data(data)
            categories = [category for category in np.unique(reclassified) if category in category_names]

            with ProcessPoolExecutor() as executor:
                executor.map(self.export_data, [(reclassified, data, category) for category in categories])

In [84]:
if __name__ == "__main__":
    processor = GeoSpatialDataProcessor(
        geopackage_path='corine_data_landcover/zuid-holland/Results/U2018_CLC2018_V2020_20u1.gpkg',
        layer_name='U2018_CLC2018_V2020_20u1',
        output_dir='corine_reclassify_GTiff',
        code_column='Code_18'
    )
    processor.process_data()

First few rows of the data:


Unnamed: 0,OBJECTID,Code_18,Remark,Area_Ha,Shape_Length,Shape_Area,geometry
0,1570275,112,,159.046461,6514.551351,1590465.0,"POLYGON ((468651.184 6745659.977, 468786.427 6..."
1,1570318,112,,37.40135,3300.250382,374013.5,"POLYGON ((477542.762 6749182.121, 477509.903 6..."
2,1570324,112,,62.038357,3990.132619,620383.6,"POLYGON ((455634.865 6747345.528, 455315.104 6..."
3,1570325,112,,52.610017,3237.477859,526100.2,"POLYGON ((464041.012 6749066.793, 464221.592 6..."
4,1570351,112,,174.041492,6552.864183,1740415.0,"POLYGON ((494883.960 6753496.606, 494864.230 6..."


Reclassifying data
Unique categories after reclassification: {-1: 2873}
