In [4]:
import os
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from matplotlib.colors import ListedColormap

In [5]:
class DataProcessor:
    """A class to process and visualize geospatial data from a local GeoPackage file."""

    def __init__(self, geopackage_path, layer_name, output_dir):
        """Initialize the DataProcessor with the GeoPackage details and output directory."""
        self.geopackage_path = geopackage_path
        self.layer_name = layer_name
        self.output_dir = output_dir

    def read_data(self):
        """Reads the data from the GeoPackage file."""
        data = gpd.read_file(self.geopackage_path, layer=self.layer_name)
        return data

    def reclassify_data(self, data):
        """Reclassifies the raster data into broader land use categories."""
        display("Reclassifying data")
        if data is None or data.size == 0:
            raise ValueError("The data is empty or not correctly loaded.")
        reclassification_rules = {
            1: [211, 212, 213, 221, 222, 223, 241, 242, 243, 244],  # agriculture
            2: [111, 112],  # urban
            3: [121, 122, 123, 124, 131, 132, 133],  # industry
            4: [523],  # water_bodies
            5: [311, 312, 313],  # nature_forest
            6: [231]  # meadow
        }
        category_names = {
            1: 'agriculture',
            2: 'urban',
            3: 'industry',
            4: 'water_bodies',
            5: 'nature_forest',
            6: 'meadow'
        }
        reclassified = np.empty(data.shape, dtype=np.float32)
        for category, codes in reclassification_rules.items():
            for code in codes:
                reclassified[data == code] = category
        return reclassified, category_names

    def export_data(self, reclassified, category, category_names):
        """Exports the data of each land use type as a separate tiff file."""
        output_path = os.path.join(self.output_dir, f"{category_names[category]}.tif")
        display(f"Exporting data to {output_path}")
        values = reclassified == category
        with rasterio.open(output_path, 'w', driver='GTiff', height=values.shape[0],
                        width=values.shape[1], count=1, dtype='float32',
                        crs='EPSG:3857', transform=from_origin(0, 0, 1, 1)) as dst:
            dst.write(values.astype(rasterio.float32), 1)

    def visualize_data(self, reclassified, category_names):
        """Visualizes the reclassified data."""
        color_map = ListedColormap([
            'red',  # agriculture
            'blue',  # urban
            'darkgrey',  # industry
            'lightblue',  # water_bodies
            'green',  # nature_forest
            'yellow',  # meadow
            'white'  # other
        ])
        fig, ax = plt.subplots(1, 1)
        for category in np.unique(reclassified):
            if category > 0:
                ax.imshow((reclassified == category), cmap=color_map, alpha=0.5)
        plt.show()

    def process_data(self):
        """Main method to process the data."""
        try:
            # Check if the files already exist
            files_exist = all(os.path.exists(os.path.join(self.output_dir, f"{category}.tif")) for category in ['agriculture', 'urban', 'industry', 'water_bodies', 'nature_forest', 'meadow'])
            
            if not files_exist:
                data = self.read_data()
                reclassified, category_names = self.reclassify_data(data)
                for category in np.unique(reclassified):
                    if category in category_names:
                        self.export_data(reclassified, category, category_names)
                self.visualize_data(reclassified, category_names)
            else:
                print("Files already exist, skipping processing.")
        except Exception as e:
            print(f"Failed to process data: {e}")

In [6]:
if __name__ == "__main__":
    processor = DataProcessor(
        geopackage_path='path_to_your_geopackage_file.gpkg',
        output_dir='corine_reclassify_GTiff'
    )
    processor.process_data()

'Downloading data from https://image.discomap.eea.europa.eu/arcgis/services/Corine/CLC2018_WM/MapServer/WMSServer?service=WMS&request=GetCapabilities&version=1.3.0'

Failed to process data: The GeoTIFF data is not correctly georeferenced.
