# Get city statistics from climate data downloaded from CDS
Workflow to get day- and nighttime utci statistics

In [None]:
import sqlite3
import pandas as pd
import xarray as xr
from src import utils
import time
input_folder = "../../../data/"


## Download hourly data

In [None]:
year = '1992'


In [None]:

import cdsapi
c = cdsapi.Client()
args = {
    "months": ['01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',],
    "days":   ['01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31'],
    }
print(year)
c.retrieve(
        'derived-utci-historical', 
    {
        'version': '1_1',
        'format': 'zip',
        'day': args["days"],
        'month': args["months"],
        'year': year,
        'product_type': 'consolidated_dataset',
        'variable': 'universal_thermal_climate_index',
    },
    input_folder+f'utci_hourly_{year}.zip')


In [None]:

output_folder = utils.unzip_to_folder(input_folder, f'utci_hourly_{year}')

## Get city coordinates

In [None]:
%time
# path to databases
city_geom  = input_folder+'/city_pts_urban_audit2021.sqlite'
con = sqlite3.connect(city_geom)
# read full table
city_all = pd.read_sql_query("SELECT _wgs84x, _wgs84y, city_code FROM urau_lb_2021_3035_cities_center_points_4", con)
con.close()
# get city coordinates
# lonlat_list =[["NL005C", 4.640960, 52.113299], ["NL006C", 5.384670, 52.173656], ["NL007C", 5.921886, 52.189884]]
lon_list = city_all["_wgs84x"].values.tolist()
lat_list = city_all["_wgs84y"].values.tolist()
city_list = city_all["city_code"].values.tolist()
target_lon = xr.DataArray(lon_list, dims="city", coords={"city": city_list})
target_lat = xr.DataArray(lat_list, dims="city", coords={"city": city_list})

## Read the downloaded .nc file with xarray

In [None]:
climate_path = input_folder+"utci_hourly_1992/ECMWF_utci*_v1.1_con.nc"
data = xr.open_mfdataset(climate_path, engine="netcdf4", parallel=True)

In [None]:
data

## Compute statistics

In [None]:
data_cities = data["utci"].sel(lon=target_lon, lat=target_lat, method="ffill")
data_cities_daytime = data_cities.resample(time="12H", base = 7)
utci_mean = data_cities_daytime.mean()
utci_min = data_cities_daytime.min()
utci_max = data_cities_daytime.max()


In [None]:
data_cities

In [None]:

stats = xr.merge([utci_mean.rename("utci_mean"), utci_min.rename("utci_min"), utci_max.rename("utci_max")])
stats

In [None]:
stats

In [None]:
from dask.distributed import Client
client = Client()  # Connect to distributed cluster and override default

In [None]:
client

## Convert to GeoDataFrame

In [None]:
stats_df = stats.to_dataframe()
stats_df

In [None]:
stats_df = stats_df.reset_index()
stats_df

In [None]:
import geopandas as gpd
gdf = gpd.GeoDataFrame(
    stats_df[["city", "time", "utci_mean", "utci_min", "utci_max"]], geometry=gpd.points_from_xy(stats_df.lon,stats_df.lat), crs="EPSG:4326")

In [None]:
gdf.to_file(f"stats_{year}.shp", driver="GeoJSON")

## Save to database

In [None]:
collections = {
        "c_city_utci":
        {
            "crs": 4326,
            "properties":
            {
                "id": "integer",
                "timestamp": "timestamp",
                "utci_mean": "float",
                "utci_min": "float",
                "utci_max": "float",
                "urau_code": "text",
            }
        }
    }

In [None]:
import shutil
del data
del data_cities
del data_cities_daytime
client.close()


In [None]:
del data_cities
del data_cities_daytime

## Delete original data to save space

In [None]:

# shutil.rmtree(input_folder+f"utci_hourly_{year}", ignore_errors=False, onerror=None)