# Angepasster Code zur Verarbeitung des Copernicus Corine Landcover Datensatzes zu den CAMELS-DE Daten zur Landnutzung

In [None]:
import rasterio
import geopandas as gpd
import pandas as pd
from rasterstats import zonal_stats
from simpledbf import Dbf5
import numpy as np
from catchments import get_catchment_gdf
import os

Festlegen der Gebiets-ID

In [None]:
ID = 3

Einladen und Verarbeiten der Daten

In [None]:
# Loading the data
print("Loading CORINE raster")
corine = rasterio.open(".../input_data/U2018_CLC2018_V2020_20u1.tif")

print("Loading CORINE classes")
dbf = Dbf5(".../input_data/U2018_CLC2018_V2020_20u1.tif.vat.dbf")
corine_classes = dbf.to_dataframe()

In [None]:
# Mapping the CORINE classes to the main classes(1-5)
corine_classes['mainclass'] = corine_classes['CODE_18'].astype(str).str[:1].astype(int)
corine_mapping = dict(zip(corine_classes['Value'], corine_classes['mainclass']))

print(corine_mapping)

data = corine.read(1)
unique_values = pd.unique(data.flatten())
print(unique_values)

In [None]:
# Reclassifying the raster
def reclassify_raster(raster, mapping, nodata_value=-128):
    data = raster.read(1)
    reclassified_data = np.full(data.shape, nodata_value, dtype=np.float32)
    unique_values = np.unique(data)
    
    for value in unique_values:
        if value in mapping:
            reclassified_data[data == value] = mapping[value]

    reclassified_data[data == nodata_value] = np.nan
    return reclassified_data

print("Reclassifying the raster to main classes ...")
reclassified_data = reclassify_raster(corine, corine_mapping)
print(reclassified_data)

catchments = get_catchment_gdf(ID)
catchments = catchments.to_crs(corine.crs)

In [None]:
# Extracting the raster data
print("Extracting the raster data...")
stats = zonal_stats(catchments, reclassified_data, affine=corine.transform, categorical=True)

In [None]:
# Convert the stats to a DataFrame and calculate the percentage of each class
stats_df = pd.DataFrame(stats).fillna(0)
stats_df = stats_df.div(stats_df.sum(axis=1), axis=0) * 100
stats_df = stats_df.round(2)

# Check the columns in catchments
print(catchments.columns)

# Check the columns in stats_df
print("Columns in stats_df:", stats_df.columns)
print("Number of columns in stats_df:", len(stats_df.columns))

# Define the main class names
main_class_names = ["artificial_surfaces", "agricultural_areas", "forests_and_seminatural_areas", "wetlands", "water_bodies"]
stats_df.columns = main_class_names[:len(stats_df.columns)]

# Add the gauge_id to the stats_df
stats_df.insert(0, 'gauge_id', ID)

# Make sure that all main class names are in the stats_df even if there are no values for them
for col in main_class_names:
    if col not in stats_df.columns: 
        stats_df[col] = 0  

# Edit the column names to include the percentage
stats_df.columns = ['gauge_id'] + [f"{col}_perc" for col in main_class_names[:len(stats_df.columns)-1]]

Abspeichern der Daten

In [None]:
# Save the results
print("Saving the extracted data")
file_path = ".../output_data/camels_de/CAMELS_DE_landcover_attributes.csv"
if not os.path.exists(file_path):
    stats_df.to_csv(file_path, index=False)
else:
    stats_df.to_csv(file_path, mode='a', header=False, index=False)

Leere Zellen werden mit o.00 gefüllt, um gültige Werte zu enthalten

In [None]:
df = pd.read_csv(file_path)

# Fill all empty cells with 0.00
df = df.fillna(0.00)

# Save the filled data back to the file
df.to_csv(file_path, index=False)

print("Empty cells filled and data saved.")