# Collection of UC1 datasets for cities (2018)
CLMS datasets
- Urban Atlas
- CLC Water classes (4xx)
- Environmental zones
- Tree cover density
- Imperviousness
- DEM

Eurostat
- Number of deaths per year under 65 due to diseases of the circulatory or respiratory systems [SA2013V]
- Average disposable annual household income - EUR [EC3040V]
- Economically active population, 20-64, total [EC1174V]
- Persons unemployed, total [EC1010V]
- Population on the 1st of January, total [DE1001V]
- Population on the 1st of January, 65-74 years, total [DE1028V]
- Population on the 1st of January, 75 years and over, total [DE1055V]
- Average length of journey to work by private car - km [TT1020V]
- People commuting into the city [TT1064V]

Climate data (ERA5)
- total_cloud_cover	
- 2m_temperature		
- total_precipitation

In [1]:
import pandas as pd
import sqlite3

# path to base data
base_path = "./../../../../s3/data"
# path to city LUT
cities = base_path+"/d001_administration/urban_audit_city_2021/URAU_RG_100K_2021_4326_CITIES/city_working_lat_lon_list.csv"
# path to eurostat LUT
lookuptable = base_path + '/d000_lookuptables/lookuptables.gpkg'

## CLMS datasets

In [3]:
# CLMS

# urban atlas, environmental zones and CLC Water classes are in sqlite database
city_cube_path  = base_path + '/c001_city_cube/C_urban_cube_sh.sqlite'

con = sqlite3.connect(city_cube_path)

urban_atlas = pd.read_sql_query("SELECT urau_code, ua18, pixel_count FROM c_urban_cube_ua", con)
environmental_zones = pd.read_sql_query("SELECT * FROM c_city_env_zones", con)
clc400 = pd.read_sql_query("SELECT * FROM c_urban_cube_clc", con)

con.close()

# other datasets not yet in database
tcd_path = base_path+"/c001_city_cube/tcd_v4.csv"
imperviousness_path = base_path+"/c001_city_cube/imd.csv"
dem_path = base_path+"/c001_city_cube/DEM_COPERNICUS_30.csv"

tcd = pd.read_csv(tcd_path)
imperviousness = pd.read_csv(imperviousness_path)
dem = pd.read_csv(dem_path)

In [24]:
imperviousness

Unnamed: 0.1,Unnamed: 0,URAU_CODE,tot_areaSqm,noDataCount,imd_areaSqm
0,0,CH011C,90505200,487801,8957238
1,0,CH011C,90505200,487801,8957238
2,0,CH012C,127837600,764084,6658803
3,0,BE003C,431215200,2793972,52153848
4,0,BE004C,223914000,964616,42091248
...,...,...,...,...,...
709,0,RO009C,605097600,4821136,15451737
710,0,RO028C,68068800,410581,6236380
711,0,SK001C,709637400,3440399,52184875
712,0,RO010C,102282500,572220,8803204


In [29]:
tmp = urban_atlas.merge(environmental_zones, left_on="urau_code", right_on="urau_name")
tmp = tmp.merge(clc400, left_on="urau_code", right_on="URAU_CODE")
tmp = tmp.merge(tcd)
tmp = tmp.merge(imperviousness, left_on="URAU_CODE", right_on="URAU_CODE")
tmp = tmp.merge(dem)
tmp.columns

Index(['urau_code', 'ua18', 'pixel_count', 'urau_name', 'env_zone',
       'URAU_CODE', 'CLC', 'count', 'norm', 'Unnamed: 0_x', 'tot_areaSqm_x',
       'noDataCount_x', 'treecover_areaSqm', 'Unnamed: 0_y', 'tot_areaSqm_y',
       'noDataCount_y', 'imd_areaSqm', 'Unnamed: 0', 'dem_min', 'dem_max',
       'dem_mean', 'dem_std', 'dem_count'],
      dtype='object')

In [30]:
clms_cube = tmp[['urau_code', 'ua18', 'pixel_count', 'env_zone',
       'CLC', 'count', 'norm', 'tot_areaSqm_x',
       'noDataCount_x', 'treecover_areaSqm', 'tot_areaSqm_y',
       'noDataCount_y', 'imd_areaSqm', 'dem_min', 'dem_max',
       'dem_mean', 'dem_std', 'dem_count']]
clms_cube

Unnamed: 0,urau_code,ua18,pixel_count,env_zone,CLC,count,norm,tot_areaSqm_x,noDataCount_x,treecover_areaSqm,tot_areaSqm_y,noDataCount_y,imd_areaSqm,dem_min,dem_max,dem_mean,dem_std,dem_count
0,AT001C,11100,549560,8,40,75590.0,0.463359,664218800,2691539,82456218,664218800,2691539,120065676,0.0,547.0,129.32441,122.267294,6642188
1,AT001C,11100,549560,8,41,64261.0,0.177205,664218800,2691539,82456218,664218800,2691539,120065676,0.0,547.0,129.32441,122.267294,6642188
2,AT001C,11210,743312,8,40,75590.0,0.463359,664218800,2691539,82456218,664218800,2691539,120065676,0.0,547.0,129.32441,122.267294,6642188
3,AT001C,11210,743312,8,41,64261.0,0.177205,664218800,2691539,82456218,664218800,2691539,120065676,0.0,547.0,129.32441,122.267294,6642188
4,AT001C,11220,675268,8,40,75590.0,0.463359,664218800,2691539,82456218,664218800,2691539,120065676,0.0,547.0,129.32441,122.267294,6642188
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16608,SK008C,21000,147220,6,40,11154.0,0.068373,138924400,713334,23592521,138924400,713334,6046643,0.0,709.0,147.36460,168.309065,788
16609,SK008C,23000,65839,6,40,11154.0,0.068373,138924400,713334,23592521,138924400,713334,6046643,0.0,709.0,147.36460,168.309065,788
16610,SK008C,31000,280810,6,40,11154.0,0.068373,138924400,713334,23592521,138924400,713334,6046643,0.0,709.0,147.36460,168.309065,788
16611,SK008C,32000,8386,6,40,11154.0,0.068373,138924400,713334,23592521,138924400,713334,6046643,0.0,709.0,147.36460,168.309065,788


## EUROSTAT datasets

In [34]:
indicator_list = ('SA2013V', 'EC3040V', 'EC1174V', 'EC1010V', 'DE1001V', 'DE1028V', 'DE1055V', 'TT1020V', 'TT1064V')
# read eurostat LUT
con_l = sqlite3.connect(lookuptable)
eurostat_lookup = pd.read_sql_query("SELECT * FROM L_eurostat", con_l)
con_l.close()

# query database
con_e = sqlite3.connect(city_cube_path)
eurostat_cube = pd.read_sql_query(f"SELECT indic_code, urau_code, [2016], [2017], [2018], [2019], [2020] \
                               FROM c_urban_cube_eurostat WHERE indic_code in {indicator_list}", con_e)
con_e.close()

In [35]:
eurostat_cube

Unnamed: 0,indic_code,urau_code,2016,2017,2018,2019,2020
0,SA2013V,AT,,,,,
1,SA2013V,AT001C,,,,,
2,SA2013V,AT002C,,,,,
3,SA2013V,AT003C,,,,,
4,SA2013V,AT004C,,,,,
...,...,...,...,...,...,...,...
6563,TT1064V,UK026C,,,,,
6564,TT1064V,UK027C,,,,,
6565,TT1064V,UK028C,,,,,
6566,TT1064V,UK029C,,,,,


## Climate data

In [37]:
era5_path2018 = base_path+"/d003_climate/cl_arco-era5/city_monthly_avg_2018.csv"

era5_cube = pd.read_csv(era5_path2018)
era5_cube

Unnamed: 0,time,city,2m_temperature,total_precipitation,total_cloud_cover,latitude,longitude
0,2018-01-31,PL001C,273.65247,0.000062,0.808280,52.25,21.00
1,2018-01-31,PL002C,274.25180,0.000075,0.780520,52.00,19.25
2,2018-01-31,PL003C,274.43280,0.000045,0.754245,50.25,19.75
3,2018-01-31,PL004C,276.13280,0.000064,0.810248,51.25,16.75
4,2018-01-31,PL005C,275.23077,0.000095,0.795826,52.50,16.75
...,...,...,...,...,...,...,...
12163,2019-01-31,NO002C1,277.59888,0.000376,0.591055,60.50,5.25
12164,2019-01-31,NO003C1,275.85715,0.000335,0.817897,63.50,10.25
12165,2019-01-31,NO004C1,278.91160,0.000167,0.431574,59.00,5.50
12166,2019-01-31,NO005C1,277.86720,0.000027,0.193381,58.25,8.00
