In [1]:
import os


This notebook prepares the data for the rural beauty model, by reprojecting, cropping and or resampling of raw datasets.

To do so, it loads paths from the rural_beauty/rural_beauty/config.py. 

It then either calls python functions from the same module or calls bash and R scripts from the scripts folder. 
To do the latter, variable are passed to the OS. 

In [1]:
%load_ext rpy2.ipython

# Beauty/Uniqueness/Diversity

Transform the beauty shapefiles to raster data.


In [3]:
# import paths from config.py
from rural_beauty.config import bild_vector_dir, bild_raster_dir # load absolute paths as defined in config.py

# pass them to the OS
os.environ['bild_vector_dir'] = str(bild_vector_dir)
os.environ['bild_raster_dir'] = str(bild_raster_dir)


This is the rural_beauty module


Take every .shp file in the -i directory and rasterizes it into the -o direcory and adjusts the naming. 


In [4]:
!Rscript ../scripts/rasterize_ratings.R -i $bild_vector_dir -o $bild_raster_dir

Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
^C

Execution halted


In [None]:
import json
import rasterio
import numpy as np
from shapely.geometry import Point, mapping

# find the low valued points, as they are underrepresented 

from rural_beauty.config import beauty_raster, unique_raster, diverse_raster


def save_ugly(input_raster_path):

    output_points_path =  input_raster_path.with_name(f"lowvalue_{input_raster_path.stem}.geojson")
    # Open the raster and find points where data == 1
    with rasterio.open(input_raster_path) as src:
        data = src.read(1)
        transform = src.transform
        rows, cols = np.where(data == 1)
        coordinates = [rasterio.transform.xy(transform, row, col) for row, col in zip(rows, cols)]

    # Create GeoJSON Feature Collection
    features = []
    for x, y in coordinates:
        point = Point(x, y)
        features.append({
            "type": "Feature",
            "geometry": mapping(point),
            "properties": {}
        })

    geojson = {
        "type": "FeatureCollection",
        "features": features
    }

    with open(output_points_path, "w") as f:
        json.dump(geojson, f, indent=2)


save_ugly(beauty_raster)
save_ugly(unique_raster)
save_ugly(diverse_raster)



This is the rural_beauty module


# UK Scenic data

In [3]:
from rural_beauty.config import UK_scenic_raw, UK_scenic_points

import pandas as pd
import geopandas as gpd
from shapely import Point

# Load the TSV file into a DataFrame
df = pd.read_csv(UK_scenic_raw, sep='\t')

# Create a geometry column using Lat and Lon for points
df['geometry'] = df.apply(lambda row: Point(row['Lon'], row['Lat']), axis=1)

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')

# Set a Coordinate Reference System (CRS) if needed (e.g., WGS84)
gdf.set_crs("EPSG:4326", inplace=True)

# transform it to 4346 so we can use meters based grids. 
gdf.to_crs("EPSG:4647", inplace=True)

# Display the GeoDataFrame
print(gdf.head())

   ID      Lat      Lon  Average  Variance              Votes  \
0   1  51.7026 -2.20985   4.1111    1.8765  4,5,3,5,1,4,4,5,6   
1   2  51.7026 -2.19538   4.0000    0.5000    4,4,3,5,4,3,5,4   
2   3  51.7116 -2.18094   4.2222    2.1728  5,4,6,5,3,4,1,4,6   
3   4  53.3110 -2.51786   3.8000    7.7600          2,4,1,9,3   
4   5  53.3021 -2.50274   4.1667    3.4722        8,4,2,4,3,4   

                          Geograph URI                          geometry  
0   http://www.geograph.org.uk/photo/7  POINT (31726519.445 5787681.796)  
1   http://www.geograph.org.uk/photo/8  POINT (31727514.781 5787527.082)  
2  http://www.geograph.org.uk/photo/11  POINT (31728662.641 5788369.252)  
3  http://www.geograph.org.uk/photo/20  POINT (31734104.502 5968958.066)  
4  http://www.geograph.org.uk/photo/22  POINT (31734945.288 5967810.331)  


In [4]:
# write to disc
gdf.to_file(UK_scenic_points)

In [6]:
import numpy as np
import pandas as pd
import geopandas as gpd
from scipy.spatial import cKDTree
from rasterio.transform import from_origin
import rasterio
from rural_beauty.config import UK_scenic_raster, NUTS_UK

# Define IDW function
def idw_interpolation(points, values, xi, yi, power=2):
    tree = cKDTree(points)
    dist, idx = tree.query(np.array([xi.ravel(), yi.ravel()]).T, k=5)  # Use the 5 nearest neighbors
    weights = 1 / dist**power
    weights[dist == 0] = 0  # Avoid division by zero for exact points
    z = np.sum(weights * values[idx], axis=1) / np.sum(weights, axis=1)
    return z.reshape(xi.shape)

# Load point data with values
points = np.array([(x, y) for x, y in zip(gdf.geometry.x, gdf.geometry.y)])
values = gdf['Average'].to_numpy()  # Replace with your actual column

# Define the raster grid with desired extent and resolution
minx, miny, maxx, maxy = gdf.total_bounds
resolution = 1000  # 1 km resolution
x_coords = np.arange(minx, maxx, resolution)
y_coords = np.arange(miny, maxy, resolution)
xi, yi = np.meshgrid(x_coords, y_coords)

# Interpolate values using IDW
interpolated_values = idw_interpolation(points, values, xi, yi)

# mask values outside the UK
from rasterio.features import geometry_mask

gdf_boundary = gpd.read_file(NUTS_UK)
# Create the mask using geometries instead of shapes
transform = from_origin(minx, maxy, resolution, resolution)
mask = geometry_mask(
    geometries=gdf_boundary.geometry,  # Note: 'geometries' is used here instead of 'shapes'
    out_shape=xi.shape,
    transform=transform,
    invert=True  # Set cells inside geometries to True, outside to False
)


flipped_values = np.flipud(interpolated_values)

nodata_value = -99
masked_values = np.where(mask, flipped_values, nodata_value)



# Write the interpolated raster to a GeoTIFF file
transform = from_origin(minx, maxy, resolution, resolution)
with rasterio.open(
    UK_scenic_raster,
    "w",
    driver="GTiff",
    height=interpolated_values.shape[0],
    width=interpolated_values.shape[1],
    count=1,
    nodata = -99,
    dtype='float32',
    crs=gdf.crs,
    transform=transform
) as dst:
    dst.write(masked_values.astype('float32'), 1)


# DEM

Take the digital elevation model and calculate the range of elevation in the 1x1 km grid. 

In [2]:
# import paths from config.py
from rural_beauty.config import DEM_EU, DEM_EU_range, DEM_EU_range_scaled

# pass them to the OS
os.environ['DEM_EU'] = str(DEM_EU)
os.environ['DEM_range'] = str(DEM_EU_range)
os.environ['DEM_range_scaled'] = str(DEM_EU_range_scaled)

This is the rural_beauty module


In [9]:
!../scripts/process_dem_data_minmax_EU.sh $DEM_EU $DEM_range $DEM_range_scaled

/h/u145/hofer/MyDocuments/Granular/beauty/data/processed/dem/EU_DEM_EU_max.tif already exists. Skipping generation.
/h/u145/hofer/MyDocuments/Granular/beauty/data/processed/dem/EU_DEM_EU_min.tif already exists. Skipping generation.
/h/u145/hofer/MyDocuments/Granular/beauty/data/processed/dem/DEM_EU_range.tif already exists. Skipping generation.
Maximum Range: 1869.229
Generating scaled (0,1) output to /h/u145/hofer/MyDocuments/Granular/beauty/data/cleaned/dem/DEM_EU_range_scaled.tif
Input file size is 6566, 4060
0...10...20...30...40...50...60...70...80...90...100 - done.
Process completed. The final output is normalized between 0 and 1 at /h/u145/hofer/MyDocuments/Granular/beauty/data/cleaned/dem/DEM_EU_range_scaled.tif


# Process the OSM data. 

We downloaded the whole osm data from geofabrik on July 3rd from:

https://download.geofabrik.de/europe-latest.osm.pbf

https://download.geofabrik.de/germany-latest.osm.pbf


In [56]:
# import paths from config.py
from rural_beauty.config import OSM_full_EU
from rural_beauty.config import powerlines_EU_vector, powerlines_EU_raster, powerlines_EU_raster_scaled
from rural_beauty.config import streets_EU_vector, streets_EU_raster, streets_EU_raster_scaled
from rural_beauty.config import windpower_EU_vector, windpower_EU_raster, windpower_EU_raster_scaled

import os
# pass them to the OS
os.environ['powerlines_vector'] = str(powerlines_EU_vector)
os.environ['powerlines_raster'] = str(powerlines_EU_raster)
os.environ['powerlines_raster_scaled'] = str(powerlines_EU_raster_scaled)

os.environ['streets_vector'] = str(streets_EU_vector)
os.environ['streets_raster'] = str(streets_EU_raster)
os.environ['streets_raster_scaled'] = str(streets_EU_raster_scaled)

os.environ['windpower_vector'] = str(windpower_EU_vector)
os.environ['windpower_raster'] = str(windpower_EU_raster)
os.environ['windpower_raster_scaled'] = str(windpower_EU_raster_scaled)

Subset the big .pbf file to windpower, streets, and powerlines

In [None]:
!../scripts/osm_extract_streets.sh    $OSM_full_EU $streets_vector
!../scripts/osm_extract_powerlines.sh $OSM_full_EU $powerlines_vector 
!../scripts/osm_extract_windpower.sh  $OSM_full_EU $windpower_vector

In [58]:
!echo $windpower_EU_raster_scaled

/h/u145/hofer/MyDocuments/Granular/beauty/data/cleaned/osm/freq_scaled_windpowerplants_EU_4647.tif


In [None]:
!Rscript ../scripts/rasterize_OSM_line_geom_EU.R $powerlines_vector $powerlines_raster $powerlines_raster_scaled
!Rscript ../scripts/rasterize_OSM_line_geom_EU.R $streets_vector    $streets_raster    $streets_raster_scaled


Usage: ../scripts/rasterize_OSM_point_geom_EU.sh <input_vector_data> <amounts_raster> <output_raster>
../scripts/rasterize_OSM_point_geom_EU.sh
/h/u145/hofer/MyDocuments/Granular/beauty/data/processed/osm/windpowerplants_EU_4647.geojson
/h/u145/hofer/MyDocuments/Granular/beauty/data/processed/osm/freq_windpowerplants_EU_4647.tif



In [69]:
# This lien below didn't work. The last argument never gets correctly passed to the function. Unsure why. 
# !../scripts/rasterize_OSM_point_geom_EU.sh $windpower_vector $windpower_raster $windpower_EU_raster_scaled

command = f"../scripts/rasterize_OSM_point_geom_EU.sh {windpower_EU_vector} {windpower_EU_raster} {windpower_EU_raster_scaled}"

# Execute the command
os.system(command)

/h/u145/hofer/MyDocuments/Granular/beauty/data/processed/osm/freq_windpowerplants_EU_4647.tif already exists. Skipping generation.
Maximum Range: 71.000
Scaling OSM raster to 0,1
Input file size is 6893, 5218
0...10...20...30...40...50...60...70...80...90...100 - done.


0

# Corine Landcover (CLC)

In [1]:
# import paths from config.py
from rural_beauty.config import CLC_EU, CLC_boolean_layers_dir, CLC_coverage_EU_dir
from rural_beauty import split_CLC_layers

split_CLC_layers.main(CLC_EU, CLC_boolean_layers_dir)

os.listdir(CLC_boolean_layers_dir)


This is the rural_beauty module


['code_acker.tif',
 'code_heide.tif',
 'code_natgru.tif',
 'code_noveg.tif',
 'code_obst.tif',
 'code_seemee.tif',
 'code_spfr.tif',
 'code_stoer.tif',
 'code_wald.tif']

In [2]:
# pass the coverage output folder and the boolean layers folder to the OS for the next step
os.environ['CLC_boolean_layers_dir'] = str(CLC_boolean_layers_dir)
os.environ['CLC_coverage_dir'] = str(CLC_coverage_EU_dir)


In [4]:
!../scripts/resample_CLC_DE.sh $CLC_boolean_layers_dir $CLC_coverage_EU_dir

File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_acker.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_heide.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_natgru.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_noveg.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_obst.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_seemee.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_spfr.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_stoer.tif already exists, skipping...
File /media/sf_Granular/beauty/data/cleaned/clc/layer_coverage_DE/code_wald.tif already exists, skipping...
Resampling complete.

# Hemerobieindex


In [3]:
from rural_beauty.config import heme2012_DE, heme2012_DE_repojected

os.environ['heme2012_DE'] = str(heme2012_DE)
os.environ['heme2012_DE_repojected'] = str(heme2012_DE_repojected)

In [6]:
%%bash 
echo "Input: $heme2012_DE"
echo "Output: $heme2012_DE_repojected"

../scripts/reproject_heme.sh $heme2012_DE $heme2012_DE_reprojected

Input: /media/sf_Granular/beauty/data/raw/hemerobie_IOER/heme2012.tif
Output: /media/sf_Granular/beauty/data/cleaned/hemerobie_IOER/heme2012.tif


Creating output file that is 642P x 867L.
Processing /media/sf_Granular/beauty/data/raw/hemerobie_IOER/heme2012.tif [1/1] : 0Using internal nodata values (e.g. -9998) for image /media/sf_Granular/beauty/data/raw/hemerobie_IOER/heme2012.tif.
Copying nodata values from source /media/sf_Granular/beauty/data/raw/hemerobie_IOER/heme2012.tif to destination /media/sf_Granular/beauty/data/cleaned/hemerobie_IOER/heme2012.tif.
...10...20...30...40...50...60...70...80...90...100 - done.


# Protected Areas

In [None]:
from rural_beauty.config import protected0, protected1, protected2, protected_EU, protected_raster, protected_raster_scaled

os.environ['protected0'] = str(protected0)
os.environ['protected1'] = str(protected1)
os.environ['protected2'] = str(protected2)

# intermediary stack of EU based polygons of protected areas
os.environ['protected_EU'] = str(protected_EU)

# raster of 'protected area within px'
os.environ['protected_raster'] = str(protected_raster)

# scaled to 0/1
os.environ['protected_raster_scaled'] = str(protected_raster_scaled)

This is the rural_beauty module


In [None]:
%%bash 
echo "Output: $protected_EU"

../scripts/WDPA_subset_reproject.sh $protected0 $protected1 $protected2 $protected_EU

In [None]:
%%bash
echo "Rasterizing... Output: $protected_raster"

Rscript ../scripts/rasterize_protected_poly_geom_EU.R $protected_EU $protected_raster $protected_raster_scaled

Rasterizing... Output: 


terra 1.7.83
Error: args: <input_vector> <intermediary_area_raster> <scaled_area_raster>
Execution halted


CalledProcessError: Command 'b'echo "Rasterizing... Output: $protected_raster"\n\nRscript ../scripts/rasterize_protected_poly_geom_EU.R $protected_EU $protected_raster\n'' returned non-zero exit status 1.