In [3]:
import os,sys
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import shapely
import pandas as pd
import xarray as xr
import dask_geopandas
from tqdm import tqdm
sys.path.append('c://projects//osm-flex/src') 

import osm_flex.download as dl
import osm_flex.extract as ex
import osm_flex.config
import osm_flex.clip as cp

import rasterio
from rasterio.enums import Resampling

from rasterstats import point_query

In [4]:
data_path = 'c://data//CEED'
input_data = os.path.join(data_path,'input_data')
bucco_path = os.path.join(data_path,'..','EUBUCCO')
osm_path = os.path.join(data_path,'..','CIS_EU')

In [5]:
mask_filename = 'mask_europe.tif'
mask_merit = xr.open_dataset(os.path.join(input_data,mask_filename))

In [14]:
def resample_raster(input_file, output_file, resample_factor,resampling_method='bilinear'):
    """
    Resample a raster image.

    Args:
        input_file (str): Path to the input raster file.
        output_file (str): Path to the output resampled raster file.
        resample_factor (float): Resampling factor.

    Returns:
        None
    """

    with rasterio.open(input_file) as dataset:
        # Resample data to target shape
        data = dataset.read(
            out_shape=(
                dataset.count,
                int(dataset.height * resample_factor),
                int(dataset.width * resample_factor)
            ),
            resampling=Resampling[resampling_method]
        )

        # Scale image transform
        transform = dataset.transform * dataset.transform.scale(
            (dataset.width / data.shape[-1]),
            (dataset.height / data.shape[-2])
        )

    # Extract the first channel of the resampled data
    Z = data[0, :, :]

    with rasterio.open(output_file,
        'w',
        driver='GTiff',
        height=Z.shape[0],
        width=Z.shape[1],
        count=1,
        dtype=data.dtype,
        compress='LZW', 
        transform=transform,
        ) as dst:
                # Write the resampled data to the output file
                dst.write(Z, 1)
                
def raster_to_vector(xr_raster):
    """
    Convert a raster to a vector representation.

    Args:
        xr_raster (xarray.DataArray): Input raster data as xarray.DataArray.

    Returns:
        gpd.GeoDataFrame: Vector representation of the input raster.
    """

    # Convert xarray raster to pandas DataFrame
    df = xr_raster.to_dataframe()

    # Filter DataFrame to select rows where band_data is 1
    df_1 = df.loc[df.band_data == 1].reset_index()

    # Create a Shapely Point geometry column from x and y values
    df_1['geometry'] = shapely.points(df_1.x.values, df_1.y.values)

    # Remove unnecessary columns from the DataFrame
    df_1 = df_1.drop(['x', 'y', 'band', 'spatial_ref'], axis=1)

    # Calculate the resolution of the raster
    resolution = xr_raster.x[1].values - xr_raster.x[0].values

    # Buffer the Point geometries by half of the resolution with square caps
    df_1.geometry = shapely.buffer(df_1.geometry, distance=resolution/2, cap_style='square').values

    # Convert the DataFrame to a GeoDataFrame
    return gpd.GeoDataFrame(df_1)             

In [15]:
%%time
input_file = os.path.join(input_data,mask_filename)
output_file = os.path.join(input_data,'mask_europe_00083.tif')
resample_factor = 0.1

resample_raster(input_file,output_file,resample_factor)

CPU times: total: 1min 7s
Wall time: 1min 7s


In [16]:
%%time
mask_eu = xr.open_dataset(os.path.join(input_data,output_file))
vector_mask = raster_to_vector(mask_merit)
vector_mask = vector_mask.set_crs(4326)
vector_mask = vector_mask.to_crs(3035)

In [61]:
tree = shapely.STRtree(vector_mask.geometry.values)

In [74]:
%%time

country_codes = [x.split('_')[0] for x in os.listdir(bucco_path) if x.endswith('.parquet')]

for country_code in tqdm(country_codes,total=len(country_codes)):
    file_path = os.path.join(bucco_path,'{}_bucco.parquet'.format(country_code))
    out_path = os.path.join(input_data,'..','coastal_bucco_exact','{}_bucco.parquet').format(country_code)
    if os.path.exists(out_path):
        continue
    else:
        bucco = gpd.read_parquet(file_path)
        
        coastal_overlay = tree.query(bucco.geometry.values,predicate='intersects')
        coastal_bucco = bucco.iloc[coastal_overlay[0]].reset_index(drop=True)
        coastal_bucco.to_parquet(out_path)

100%|███████████████████████████████████████████████████████████████████████████████| 19/19 [1:24:38<00:00, 267.30s/it]

CPU times: total: 1h 2min
Wall time: 1h 24min 38s





In [76]:
%%time

country_codes = [x.split('_')[0] for x in os.listdir(osm_path) if x.endswith('.parquet')]

vector_mask = raster_to_vector(mask_merit)
vector_mask = vector_mask.set_crs(4326)
tree = shapely.STRtree(vector_mask.geometry.values)

for country_code in tqdm(country_codes,total=len(country_codes)):
    file_path = os.path.join(osm_path,'{}_cis.parquet'.format(country_code))
    out_path = os.path.join(input_data,'..','coastal_osm_exact','{}_cis.parquet').format(country_code)
    if os.path.exists(out_path):
        continue
    else:
        osm = gpd.read_parquet(file_path)
        collect_all = {}
        for infra in osm.groupby(level=0):
            uniq_infra = infra[1].reset_index(drop=True)
            
            coastal_overlay = tree.query(uniq_infra.geometry.values,predicate='intersects')
            collect_all[infra[0]] = uniq_infra.iloc[coastal_overlay[0]].reset_index(drop=True)
        
        gpd.GeoDataFrame(pd.concat(collect_all)).to_parquet(out_path)

100%|█████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 925.91it/s]

CPU times: total: 21.9 s
Wall time: 22 s



