# Get city statistics from climate data downloaded from CDS
Workflow to get day- and nighttime utci statistics

In [5]:
import sqlite3
import pandas as pd
import xarray as xr
from src import utils
import time
input_folder = "../../../data/"


## Download hourly data

In [6]:
year = '1992'


In [7]:

import cdsapi
c = cdsapi.Client()
args = {
    "months": ['01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',],
    "days":   ['01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31'],
    }
print(year)
c.retrieve(
        'derived-utci-historical', 
    {
        'version': '1_1',
        'format': 'zip',
        'day': args["days"],
        'month': args["months"],
        'year': year,
        'product_type': 'consolidated_dataset',
        'variable': 'universal_thermal_climate_index',
    },
    input_folder+f'utci_hourly_{year}.zip')


1992


2023-07-27 17:07:12,447 INFO Welcome to the CDS
2023-07-27 17:07:12,448 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/derived-utci-historical
2023-07-27 17:07:12,801 INFO Request is queued
2023-07-27 17:07:13,853 INFO Request is running
2023-07-27 17:11:32,389 INFO Request is completed
2023-07-27 17:11:32,391 INFO Downloading https://download-0011-clone.copernicus-climate.eu/cache-compute-0011/cache/data2/dataset-derived-utci-historical-09cfa0f8-b96c-4cd6-a9d3-10aa27924540.zip to ../../../data/utci_hourly_1992.zip (14.4G)
2023-07-27 17:38:18,208 INFO Download rate 9.2M/s     


Result(content_length=15416926517,content_type=application/zip,location=https://download-0011-clone.copernicus-climate.eu/cache-compute-0011/cache/data2/dataset-derived-utci-historical-09cfa0f8-b96c-4cd6-a9d3-10aa27924540.zip)

In [8]:

output_folder = utils.unzip_to_folder(input_folder, f'utci_hourly_{year}')

File Name                                             Modified             Size
ECMWF_utci_19920401_v1.1_con.nc                2020-06-26 09:28:22     42276982
ECMWF_utci_19921104_v1.1_con.nc                2020-06-26 09:31:30     42362898
ECMWF_utci_19921203_v1.1_con.nc                2020-06-26 09:33:26     42442964
ECMWF_utci_19920609_v1.1_con.nc                2020-06-26 09:25:42     41875415
ECMWF_utci_19920718_v1.1_con.nc                2020-06-26 09:27:36     41833324
ECMWF_utci_19921016_v1.1_con.nc                2020-06-26 09:28:22     42167837
ECMWF_utci_19920422_v1.1_con.nc                2020-06-26 09:21:00     42206356
ECMWF_utci_19920727_v1.1_con.nc                2020-06-26 09:24:54     41684724
ECMWF_utci_19920911_v1.1_con.nc                2020-06-26 09:29:20     41993965
ECMWF_utci_19920617_v1.1_con.nc                2020-06-26 09:23:12     41741528
ECMWF_utci_19920415_v1.1_con.nc                2020-06-26 09:23:42     42267928
ECMWF_utci_19920824_v1.1_con.nc         

## Get city coordinates

In [9]:
%time
# path to databases
city_geom  = input_folder+'/city_pts_urban_audit2021.sqlite'
con = sqlite3.connect(city_geom)
# read full table
city_all = pd.read_sql_query("SELECT _wgs84x, _wgs84y, city_code FROM urau_lb_2021_3035_cities_center_points_4", con)
con.close()
# get city coordinates
# lonlat_list =[["NL005C", 4.640960, 52.113299], ["NL006C", 5.384670, 52.173656], ["NL007C", 5.921886, 52.189884]]
lon_list = city_all["_wgs84x"].values.tolist()
lat_list = city_all["_wgs84y"].values.tolist()
city_list = city_all["city_code"].values.tolist()
target_lon = xr.DataArray(lon_list, dims="city", coords={"city": city_list})
target_lat = xr.DataArray(lat_list, dims="city", coords={"city": city_list})

CPU times: total: 0 ns
Wall time: 0 ns


## Read the downloaded .nc file with xarray

In [11]:
output_folder = "C:/Users/MariaRicci/Projects_Cdrive/FAIRiCube/data/"
climate_path = output_folder+"utci_hourly_1992/ECMWF_utci*_v1.1_con.nc"
data = xr.open_mfdataset(climate_path, engine="netcdf4", parallel=True)

In [12]:
data

Unnamed: 0,Array,Chunk
Bytes,28.32 GiB,79.23 MiB
Shape,"(8784, 601, 1440)","(24, 601, 1440)"
Dask graph,366 chunks in 733 graph layers,366 chunks in 733 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 28.32 GiB 79.23 MiB Shape (8784, 601, 1440) (24, 601, 1440) Dask graph 366 chunks in 733 graph layers Data type float32 numpy.ndarray",1440  601  8784,

Unnamed: 0,Array,Chunk
Bytes,28.32 GiB,79.23 MiB
Shape,"(8784, 601, 1440)","(24, 601, 1440)"
Dask graph,366 chunks in 733 graph layers,366 chunks in 733 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Compute statistics

In [13]:
data_cities = data["utci"].sel(lon=target_lon, lat=target_lat, method="ffill")
data_cities_daytime = data_cities.resample(time="12H", base = 7)
utci_mean = data_cities_daytime.mean()
utci_min = data_cities_daytime.min()
utci_max = data_cities_daytime.max()


In [14]:
data_cities

Unnamed: 0,Array,Chunk
Bytes,24.43 MiB,68.34 kiB
Shape,"(8784, 729)","(24, 729)"
Dask graph,366 chunks in 735 graph layers,366 chunks in 735 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 24.43 MiB 68.34 kiB Shape (8784, 729) (24, 729) Dask graph 366 chunks in 735 graph layers Data type float32 numpy.ndarray",729  8784,

Unnamed: 0,Array,Chunk
Bytes,24.43 MiB,68.34 kiB
Shape,"(8784, 729)","(24, 729)"
Dask graph,366 chunks in 735 graph layers,366 chunks in 735 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [15]:

stats = xr.merge([utci_mean.rename("utci_mean"), utci_min.rename("utci_min"), utci_max.rename("utci_max")])
stats

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.04 MiB 2.85 kiB Shape (733, 729) (1, 729) Dask graph 733 chunks in 2935 graph layers Data type float32 numpy.ndarray",729  733,

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.04 MiB 2.85 kiB Shape (733, 729) (1, 729) Dask graph 733 chunks in 2935 graph layers Data type float32 numpy.ndarray",729  733,

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.04 MiB 2.85 kiB Shape (733, 729) (1, 729) Dask graph 733 chunks in 2935 graph layers Data type float32 numpy.ndarray",729  733,

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [16]:
stats

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.04 MiB 2.85 kiB Shape (733, 729) (1, 729) Dask graph 733 chunks in 2935 graph layers Data type float32 numpy.ndarray",729  733,

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.04 MiB 2.85 kiB Shape (733, 729) (1, 729) Dask graph 733 chunks in 2935 graph layers Data type float32 numpy.ndarray",729  733,

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.04 MiB 2.85 kiB Shape (733, 729) (1, 729) Dask graph 733 chunks in 2935 graph layers Data type float32 numpy.ndarray",729  733,

Unnamed: 0,Array,Chunk
Bytes,2.04 MiB,2.85 kiB
Shape,"(733, 729)","(1, 729)"
Dask graph,733 chunks in 2935 graph layers,733 chunks in 2935 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [17]:
from dask.distributed import Client
client = Client()  # Connect to distributed cluster and override default

2023-07-28 08:02:18,223 INFO To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
2023-07-28 08:02:18,317 INFO State start
2023-07-28 08:02:18,347 INFO   Scheduler at:     tcp://127.0.0.1:62401
2023-07-28 08:02:18,348 INFO   dashboard at:            127.0.0.1:8787
2023-07-28 08:02:18,449 INFO         Start Nanny at: 'tcp://127.0.0.1:62405'
2023-07-28 08:02:18,456 INFO         Start Nanny at: 'tcp://127.0.0.1:62406'
2023-07-28 08:02:18,459 INFO         Start Nanny at: 'tcp://127.0.0.1:62404'
2023-07-28 08:02:18,462 INFO         Start Nanny at: 'tcp://127.0.0.1:62407'
2023-07-28 08:02:37,614 INFO Register worker <WorkerState 'tcp://127.0.0.1:62427', name: 3, status: init, memory: 0, processing: 0>
2023-07-28 08:02:37,621 INFO Starting worker compute stream, tcp://127.0.0.1:62427
2023-07-28 08:02:37,622 INFO Starting established connection to tcp://127.0.0.1:62432
2023-07-28 08:02:37,625 INFO Register worker <WorkerStat

In [18]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 23.94 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:62401,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 23.94 GiB

0,1
Comm: tcp://127.0.0.1:62426,Total threads: 2
Dashboard: http://127.0.0.1:62430/status,Memory: 5.99 GiB
Nanny: tcp://127.0.0.1:62404,
Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-8y9vafeq,Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-8y9vafeq

0,1
Comm: tcp://127.0.0.1:62425,Total threads: 2
Dashboard: http://127.0.0.1:62431/status,Memory: 5.99 GiB
Nanny: tcp://127.0.0.1:62405,
Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-j7rkh3wk,Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-j7rkh3wk

0,1
Comm: tcp://127.0.0.1:62424,Total threads: 2
Dashboard: http://127.0.0.1:62429/status,Memory: 5.99 GiB
Nanny: tcp://127.0.0.1:62406,
Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-09zpz0yx,Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-09zpz0yx

0,1
Comm: tcp://127.0.0.1:62427,Total threads: 2
Dashboard: http://127.0.0.1:62428/status,Memory: 5.99 GiB
Nanny: tcp://127.0.0.1:62407,
Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-dsdbbo5m,Local directory: C:\Users\MARIAR~1\AppData\Local\Temp\dask-worker-space\worker-dsdbbo5m


In [19]:
stats_df = stats.to_dataframe()
stats_df

2023-07-28 08:07:14,827 INFO Event loop was unresponsive in Scheduler for 4.44s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2023-07-28 08:07:14,829 INFO Event loop was unresponsive in Nanny for 4.44s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2023-07-28 08:07:14,830 INFO Event loop was unresponsive in Nanny for 4.44s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2023-07-28 08:07:14,831 INFO Event loop was unresponsive in Nanny for 4.44s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2023-07-28 08:07:14,832 INFO Event loop was unresponsive in Nanny for 4.44s.  This is often caused by long-running GIL-holding functions or moving large chu

Unnamed: 0_level_0,Unnamed: 1_level_0,lon,lat,utci_mean,utci_min,utci_max
city,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
NL005C,1991-12-31 19:00:00,4.5,52.25,260.830750,258.663971,262.960938
NL005C,1992-01-01 07:00:00,4.5,52.25,262.342682,258.436615,266.392426
NL005C,1992-01-01 19:00:00,4.5,52.25,261.008209,259.498749,262.231140
NL005C,1992-01-02 07:00:00,4.5,52.25,263.137421,261.100433,266.184937
NL005C,1992-01-02 19:00:00,4.5,52.25,258.725220,256.209717,261.414185
...,...,...,...,...,...,...
NL004C,1992-12-29 19:00:00,5.0,52.50,260.779266,259.389832,261.772888
NL004C,1992-12-30 07:00:00,5.0,52.50,267.024323,260.125183,274.717346
NL004C,1992-12-30 19:00:00,5.0,52.50,261.214172,259.204529,261.857452
NL004C,1992-12-31 07:00:00,5.0,52.50,269.872040,257.206787,282.333679


In [20]:
stats_df = stats_df.reset_index()
stats_df

Unnamed: 0,city,time,lon,lat,utci_mean,utci_min,utci_max
0,NL005C,1991-12-31 19:00:00,4.5,52.25,260.830750,258.663971,262.960938
1,NL005C,1992-01-01 07:00:00,4.5,52.25,262.342682,258.436615,266.392426
2,NL005C,1992-01-01 19:00:00,4.5,52.25,261.008209,259.498749,262.231140
3,NL005C,1992-01-02 07:00:00,4.5,52.25,263.137421,261.100433,266.184937
4,NL005C,1992-01-02 19:00:00,4.5,52.25,258.725220,256.209717,261.414185
...,...,...,...,...,...,...,...
534352,NL004C,1992-12-29 19:00:00,5.0,52.50,260.779266,259.389832,261.772888
534353,NL004C,1992-12-30 07:00:00,5.0,52.50,267.024323,260.125183,274.717346
534354,NL004C,1992-12-30 19:00:00,5.0,52.50,261.214172,259.204529,261.857452
534355,NL004C,1992-12-31 07:00:00,5.0,52.50,269.872040,257.206787,282.333679


In [21]:
import geopandas as gpd
gdf = gpd.GeoDataFrame(
    stats_df[["city", "time", "utci_mean", "utci_min", "utci_max"]], geometry=gpd.points_from_xy(stats_df.lon,stats_df.lat), crs="EPSG:4326")

In [22]:
gdf.to_file(f"stats_{year}.shp", driver="GeoJSON")

2023-07-28 08:11:29,937 - tornado.application - ERROR - Exception in callback <bound method BokehTornado._keep_alive of <bokeh.server.tornado.BokehTornado object at 0x0000021FD15D2790>>
Traceback (most recent call last):
  File "c:\Users\MariaRicci\anaconda3\envs\fairicube_env\lib\site-packages\tornado\ioloop.py", line 921, in _run
    val = self.callback()
  File "c:\Users\MariaRicci\anaconda3\envs\fairicube_env\lib\site-packages\bokeh\server\tornado.py", line 760, in _keep_alive
    c.send_ping()
  File "c:\Users\MariaRicci\anaconda3\envs\fairicube_env\lib\site-packages\bokeh\server\connection.py", line 93, in send_ping
    self._socket.ping(str(self._ping_count).encode("utf-8"))
  File "c:\Users\MariaRicci\anaconda3\envs\fairicube_env\lib\site-packages\tornado\websocket.py", line 444, in ping
    raise WebSocketClosedError()
tornado.websocket.WebSocketClosedError
2023-07-28 08:11:29,937 ERROR Exception in callback <bound method BokehTornado._keep_alive of <bokeh.server.tornado.Bokeh

In [23]:
collections = {
        "c_city_utci":
        {
            "crs": 4326,
            "properties":
            {
                "id": "integer",
                "timestamp": "timestamp",
                "utci_mean": "float",
                "utci_min": "float",
                "utci_max": "float",
                "urau_code": "text",
            }
        }
    }

In [27]:
import shutil
del data
del data_cities
del data_cities_daytime
client.close()


NameError: name 'data' is not defined

In [28]:
del data_cities
del data_cities_daytime

In [29]:

shutil.rmtree(input_folder+f"utci_hourly_{year}", ignore_errors=False, onerror=None)

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: '../../../data/utci_hourly_1992\\ECMWF_utci_19920113_v1.1_con.nc'