# City cube processing loop - get CLC Water classes in buffered city
Get land cover data from CLC 2018 in and around city, by creating a buffer around city geometry.
This notebook focuses on Water classes (CLC4**).

In [12]:
%matplotlib inline

import numpy as np
import fiona
import shapely
from shapely.geometry import MultiLineString, MultiPolygon, Polygon, box, shape
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import os
from pathlib import Path
import rasterio
import IPython.display
import time  
import datetime
from contextlib import redirect_stdout
import rasterio
import rasterio.features

# load utils functions
from src import utils

# Sentinel Hub
from sentinelhub import (
    CRS,
    BBox,
    BBoxSplitter,
    ByocCollection,
    ByocCollectionAdditionalData,
    ByocCollectionBand,
    ByocTile,
    DataCollection,
    DownloadFailedException,
    SentinelHubDownloadClient,
    MimeType,
    SentinelHubBYOC,
    SentinelHubRequest,
    SHConfig,
    bbox_to_dimensions,
    os_utils,
    Geometry
)

config = SHConfig()
config.instance_id = os.environ.get("SH_INSTANCE_ID")
config.sh_client_id = os.environ.get("SH_CLIENT_ID")
config.sh_client_secret = os.environ.get("SH_CLIENT_SECRET")
config.aws_access_key_id = os.environ.get("username")
config.aws_secret_access_key = os.environ.get("password")

In [19]:
# read in the city polygons
from src import db_connect, utils
from sqlalchemy import text
import geopandas as gpd

home_dir = os.environ.get('HOME')
home_dir = 'C:/Users/MariaRicci/Projects_Cdrive/FAIRiCube'
engine_postgresql = db_connect.create_engine(db_config = f"{home_dir}/uc1-urban-climate/database.ini")

with engine_postgresql.begin() as conn:
    query = text("""
              SELECT urau_code, urau_name, geometry
              FROM lut.l_city_urau2021
              """)
    gdf = gpd.read_postgis(query, conn, geom_col='geometry')

resolution = 100

row = gdf[gdf.urau_name == 'Verona']
geometry = row.geometry
print(row.bounds.values[0])

def buffer_bounds(row, buffer_multiplier, crs, resolution):
    bbox_coords = tuple(row.bounds.values[0])
    bbox = BBox(bbox=bbox_coords, crs=crs)
    bbox_size = bbox_to_dimensions(bbox, resolution=resolution)
    bbox_size
    # Compute the parameters of the georeference
    dirx = (bbox_coords[2] - bbox_coords[0]) / bbox_size[1] # pixel size in the x-direction in map units/pixel
    diry = -(bbox_coords[3] - bbox_coords[1]) / bbox_size[0] # pixel size in the y-direction in map units, almost always negative
    bbox_coords_b = [bbox_coords[0]-dirx,
                    bbox_coords[1]+buffer_multiplier*diry,
                    bbox_coords[2]+dirx,
                    bbox_coords[3]-buffer_multiplier*diry]
    bbox_b = BBox(bbox=bbox_coords_b, crs=crs)
    bbox_size_b = bbox_to_dimensions(bbox_b, resolution=resolution)
    bbox_size_b
    return bbox_b, bbox_size_b

def polygon_to_mask(coords, geometry, crs):
    bbox = BBox(bbox=coords, crs=crs)
    bbox_size = bbox_to_dimensions(bbox, resolution=resolution)
    # Compute the parameters of the georeference
    dirx = (coords[2] - coords[0]) / bbox_size[1] # pixel size in the x-direction in map units/pixel
    diry = -(coords[3] - coords[1]) / bbox_size[0] # pixel size in the y-direction in map units, almost always negative
    x0 = coords[0] # x-coordinate of the center of the upper left pixel
    y0 = coords[3] # y-coordinate of the center of the upper left pixel
    transform = rasterio.Affine(dirx, 0, x0, 
                                0, diry, y0)
    rasterized = rasterio.features.rasterize(
            [geometry],
            out_shape=bbox_size,
            fill=0,
            default_value=1,
            transform=transform,
            all_touched=True,
            dtype='float64'
        )
    return rasterized
bbox,bbox_size = buffer_bounds(row, 2, CRS.WGS84, 100)
coords = bbox.min_x, bbox.min_y, bbox.max_x, bbox.max_y
rasterized = polygon_to_mask(coords, row.geometry.item(), CRS.WGS84)
# utils.plot_image(rasterized)

[10.8768365 45.349396  11.1243715 45.5417995]


In [20]:
# plot bounding box
print(f"Image shape at {100} m resolution: {bbox_size} pixels") 
IPython.display.GeoJSON(shapely.geometry.box(*bbox).__geo_interface__)

Image shape at 100 m resolution: (190, 223) pixels


<IPython.display.GeoJSON object>

## Collections

In [4]:
# copy CollectionId from FAIRiCube catalog https://catalog.fairicube.eu/
collection_id_clc = "cbdba844-f86d-41dc-95ad-b3f7f12535e9"
collection_name_clc="CLC"
end_point = "https://creodias.sentinel-hub.com"
# define collection
data_collection_clc = DataCollection.define_byoc(collection_id_clc, name=collection_name_clc, service_url = end_point)
data_collection_clc

<DataCollection.CLC: DataCollectionDefinition(
  api_id: byoc-cbdba844-f86d-41dc-95ad-b3f7f12535e9
  catalog_id: byoc-cbdba844-f86d-41dc-95ad-b3f7f12535e9
  wfs_id: byoc-cbdba844-f86d-41dc-95ad-b3f7f12535e9
  service_url: https://creodias.sentinel-hub.com
  collection_type: BYOC
  collection_id: cbdba844-f86d-41dc-95ad-b3f7f12535e9
  is_timeless: False
  has_cloud_coverage: False
)>

## Prepare request
Define evalscript and an helper function to get the SentinelHub requests

In [5]:
evalscript = """
//VERSION=3
function setup() {
  return {
    input: ["CLC", "dataMask"],
    output: {
      bands: 1,
      sampleType: "UINT16"
    }
  }
}

function evaluatePixel(sample) {
    if(sample.CLC < 45 && sample.CLC > 39) {
        return [sample.CLC*0];
    } else {
        return [sample.CLC/sample.CLC]
    }
}

"""
input_data = [
        SentinelHubRequest.input_data(
            data_collection=DataCollection.CLC,
            time_interval=("2017-01-01", "2019-01-01") # select only CLC 2018
        ),
    ]
def sentinelhub_request(evalscript, input_data, geometry, bbox, bbox_size, config):
    request = SentinelHubRequest(
        evalscript=evalscript,
        input_data=input_data,
        responses=[SentinelHubRequest.output_response("default", MimeType.PNG)],
        bbox=bbox,
        size=bbox_size,
        geometry = geometry,
        config=config,
        )
    return request

In [8]:
from scipy import ndimage
# data processing function
def avg_distance_to_water(data_array,mask_array):
    res = ndimage.distance_transform_edt(data_array)
    return res, np.mean(res, where=mask_array)

In [None]:
bbox,bbox_size = buffer_bounds(row, 2, CRS.WGS84, 100)
coords = bbox.min_x, bbox.min_y, bbox.max_x, bbox.max_y
mask_array = polygon_to_mask(coords, row.geometry.item(), CRS.WGS84)
request = sentinelhub_request(evalscript,input_data, bbox, bbox_size, config)
try:
    data = request.get_data()[0]
    res, avg = avg_distance_to_water(data, mask_array)
    utils.plot_image(res)
except:
    print("an error occurred")

In [14]:
# create temporary df
df_all = pd.DataFrame(columns=['URAU_CODE', 'CLC', 'count'])
# skipped Oulu/Uleåborg
for row in gdf_city.itertuples():
    print("Downloading " + row.URAU_NAME + " data")
    
    #------------------------------------------
    geometry_gdf = row.geometry
    # get only buffer zone
    geometry_b, bbox_b, bbox_size_b = utils.buffer_geometry(geometry_gdf, CRS.WGS84, buffer_size=1, resolution=resolution)
    
    bbox_subsize_b = utils.bbox_optimal_subsize(bbox_size_b)
    if(bbox_subsize_b == 1 ):
        request = sentinelhub_request(evalscript,input_data, bbox_b, bbox_size_b, config)
        try:
            data = request.get_data()[0]
        except:
            print("an error occurred")
            break
        # do something with the data
        mask_array = polygon_to_mask(bbox_b, geometry)
        res, avg = avg_distance_to_water(data, mask_array)
    else:
        print("Splitting bounding box in", (bbox_subsize_b,bbox_subsize_b), " subgrid")
        bbox_split = BBoxSplitter([bbox_b], CRS.WGS84, bbox_subsize_b)
        # create a list of requests
        bbox_list = bbox_split.get_bbox_list()
        sh_requests = [sentinelhub_request(evalscript,input_data, subbbox, bbox_to_dimensions(subbbox, resolution=resolution), config) for 
                       subbbox in bbox_list]
        tiles = []
        error=False
        for req,subbbox in zip(sh_requests,bbox_list):
            try:
                tile = req.get_data()[0]
            except:
                print("an error occurred")
                error=True
                break
            # do something with the data
            tiles.append(tile)
        if(error):
            break
        # TODO 
        data = combine_tiles(bbox_list, tiles)
        mask_array = polygon_to_mask(bbox_b, geometry)
        res, avg = avg_distance_to_water(data, mask_array)
    print("---------------------------")

Downloading Mons data
  URAU_CODE CLC  count
1    BE009C  41  25656
---------------------------
Downloading Genève data
  URAU_CODE CLC count
1    CH003C  41  6682
---------------------------
Downloading Basel data
  URAU_CODE CLC  count
0    CH004C  40  25084
---------------------------
Downloading Lausanne data
  URAU_CODE CLC  count
0    CH005C  41  15687
---------------------------
Downloading La Rochelle data
  URAU_CODE CLC  count
1    FR054C  44  10560
---------------------------
Downloading Lorient data
  URAU_CODE CLC  count
0    FR055C  43  22642
---------------------------
Downloading Cherbourg-en-Cotentin data
  URAU_CODE CLC  count
0    FR056C  44  11911
---------------------------
Downloading Béziers data
No water features
  URAU_CODE CLC  count
0    FR056C  44  11911
---------------------------
Downloading Kortrijk data
No water features
  URAU_CODE CLC  count
0    FR056C  44  11911
---------------------------
Downloading Oostende data
  URAU_CODE CLC count
1    BE011C  

In [59]:
# post processing

# optionally retrieve data from temporary csv
# df = pd.read_csv("clc400.csv", dtype={'Unnamed: 0': 'float64', 'URAU_CODE': 'str', 'CLC': 'str', 'count': 'float64'})
# df_all = df[["URAU_CODE","CLC","count"]]
df_sum = df_all.groupby(["URAU_CODE","CLC"]).sum(numeric_only=True).reset_index()
norm = df_sum.groupby(["CLC"])[['count']].transform(lambda x: (x/x.max())).rename(columns={'count': 'norm'})
df_sum['norm'] = norm
df_sum

Unnamed: 0,URAU_CODE,CLC,count,norm
0,AT001C,40,75590.0,0.463359
1,AT001C,41,64261.0,0.177205
2,AT003C,40,56961.0,0.349165
3,AT003C,41,11289.0,0.031130
4,AT004C,40,16114.0,0.098777
...,...,...,...,...
608,SK004C,40,3820.0,0.023416
609,SK006C,40,8301.0,0.050884
610,SK006C,41,27642.0,0.076225
611,SK007C,41,2782.0,0.007672


In [63]:
# save to sqlite
conn = sqlite3.connect(dbfile) 
df_sum.to_sql(table_name,conn,if_exists='replace',index=False)
conn.commit()
conn.close()

In [66]:
#check database
query = """ 
  SELECT *
         FROM  c_urban_cube_clc
         LIMIT 10
         """
con = sqlite3.connect(dbfile)
df= pd.read_sql(query, con)
# Be sure to close the connection
con.close()
print (df)
print ("END")

  URAU_CODE CLC    count      norm
0    AT001C  40  75590.0  0.463359
1    AT001C  41  64261.0  0.177205
2    AT003C  40  56961.0  0.349165
3    AT003C  41  11289.0  0.031130
4    AT004C  40  16114.0  0.098777
5    AT005C  40  10913.0  0.066896
6    AT006C  41  14378.0  0.039648
7    BE001C  40   3150.0  0.019309
8    BE003C  40  47219.0  0.289447
9    BE003C  41  17110.0  0.047182
END
