# Caclulate tree visibility statistics per district

[![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ac-willeke/urban-climate/blob/main/notebooks/01_FROST_extract_climate_data.ipynb) [![github](https://img.shields.io/badge/GitHub-View%20on%20GitHub-blue?logo=github)](https://github.com/ac-willeke/)

**Author**: Willeke A'Campo

**Description:** This notebooks shows how to calculate the Ecosystem Service statistics for tree visibility and impact per district using DuckDB. The results are stored in a new table in the database and exported to GeoJSON.

**Documentation:** 

### Setup

In [None]:
import geopandas as gpd
from shapely.geometry import Point
from shapely.wkb import loads
import pyarrow
import os
import leafmap
import os
import duckdb
import pandas as pd

# set temp dir to network drivve to avoid disk space issues
os.environ['TMPDIR'] = r"/home/NINA.NO/willeke.acampo/Mounts/P-Prosjekter2/152022_itree_eco_ifront_synliggjore_trars_rolle_i_okosyst/TEMP"

# TODO move to kedro pipeline
municipality = "oslo"
TEMP_DIR = os.environ['TMPDIR']
raw_dir = os.path.join(TEMP_DIR, "oslo", "01_raw")
interim_dir = os.path.join(TEMP_DIR, "oslo", "02_intermediate")
reporting_dir = os.path.join(TEMP_DIR, "oslo", "08_reporting")


# Define the table names
file_names = [
    f"{municipality}_study_area", 
    f"{municipality}_districts",
    f"{municipality}_bldg",
    f"{municipality}_res_bldg",
    f"{municipality}_green_space",
    f"{municipality}_open_space",
    f"{municipality}_public_open_space",
    f"{municipality}_private_open_space",
    f"{municipality}_tree_crowns"
    ]

table_names = [
    "study_area", "districts", "bldg", "res_bldg", "green_space",
    "open_space", "public_open_space", "private_open_space", "tree_crowns"
    ]

district_geojson = os.path.join(interim_dir, f"{municipality}_districts.geojson")

### Data conversion | GeoJSON to GeoParquet   

In [None]:
# Define the parquet_dict
parquet_dict = {
    name: os.path.join(interim_dir, f"{name}.parquet") 
    for name in file_names}

# Check if the parquet files exist, if not convert  to parquet
for key in parquet_dict.keys():
    if os.path.exists(parquet_dict[key]):
        # check crs
        gdf = gpd.read_parquet(parquet_dict[key])
        
        # remove areas smaller than 1m2 for all files 
        if key != "tree_crowns":
            print(f"Removing areas smaller than 1 m2 from {key}")
            len_before = len(gdf)
            gdf = gdf[gdf.area > 1]
            len_after = len(gdf)
            print(f"Removed {len_before - len_after} rows")
        
        print(f"CRS of {key} is {gdf.crs}")
        # if epsg is not 25832, reproject and overwrite parquet
        if gdf.crs.to_epsg() != 25832:
            print(f"Reprojecting {key} to epsg:25832")
            gdf = gdf.to_crs(epsg=25832)
            gdf.to_parquet(
                path = interim_dir + "/" + key + ".parquet",
                index = None, 
                compression = "snappy"
            )
        # if geosjon does not exist in reporting dir, export
        if not os.path.exists(os.path.join(reporting_dir, f"{key}.geojson")):
            print(f"Exporting {key} to geojson")
            #gdf.to_file(os.path.join(reporting_dir, f"{key}.geojson"), driver="GeoJSON")    
    else:
        # convert all .geojson and .shp files to .parquet
        for file in os.listdir(raw_dir):
            if file.endswith(".geojson") or file.endswith(".shp"):
                print(f"Converting {file} to parquet")
                gdf = gpd.read_file(os.path.join(raw_dir, file))
                gdf.to_parquet(
                    path = interim_dir + "/" + file.split(".")[0] + ".parquet",
                    index = None, 
                    compression = "snappy"
                )

### Load District Data 

In [None]:
# load the district data
districts = gpd.read_file(os.path.join(raw_dir, f"{municipality}_districts.geojson"))
districts = districts.to_crs(epsg=25832)

# export a parquet file for each district (delomrade)
district_list = districts['delomradenummer'].unique()
district_list = sorted(district_list)

# print list of unique delomrade numbers 
print(f"Number of districts: {len(district_list)} \n")
print(f"Districts: {district_list}")

**get district list**

In [None]:
# function 
def export_by_district(gdf_dict, file_names, district_list, col_district):
    
    for gdf, file_name, number in zip(gdf_dict, file_names, district_list):
        print(f"Exporting {file_name} for district {number}")
        gdf = gdf[gdf[col_district] == number]
        display(gdf)
        
        gdf.to_parquet(
            path = os.path.join(interim_dir,"parquet", file_name + ".parquet")
            index = None, 
            compression = "snappy"
        )
        
        # save to shp
        gdf.to_file(
            os.path.join(interim_dir, "shp", file_name + ".shp"),
            driver='ESRI Shapefile'
            )
    
    for number in district_list:
        print(f"Exporting {number}")
    


In [None]:
for n in district_list:
    district_number = n
    #print(f"District number: {district_number}")
    district = districts.loc[districts['delomradenummer'] == district_number]
    
    # if not None display 
    if district is not None:
        continue
    else:
        print(f"District {n} is None")
        
    district.to_parquet(
        path = interim_dir + "/" + f"district_{district_number}" + ".parquet",
        index = None, 
        compression = "snappy"
    )

### Load all other tabels and split based on DISTRICT NUMBER value (not geometry)

In [None]:
# assumes that all tables have an attribute field populated with the district number! 
# load all data to 
for file in os.listdir(interim_dir):
    if file.endswith(".geojson") or file.endswith(".shp"):
        gdf = gpd.read_file(os.path.join(raw_dir, file))



### Load all other tables and SPATIALLY split by district

In [None]:
# DO NOT RUN THIS CELL AUTOMATICALLY
raise SystemExit("Stop right there!")

# Rest of your code...

for district_number in district_list:
    # load district {district_number} from parquet
    con = duckdb.connect(database=":memory:", read_only=False)
    con.install_extension("spatial")
    con.load_extension("spatial")

    # create a duckdb table for the district
    district_path = os.path.join(interim_dir, f"district_{district_number}.parquet")
    print(f"District number: {district_number}")

    con.execute(
        f"""
        CREATE TABLE district_{district_number}
        AS SELECT *, ST_GeomFromWKB(geometry) 
        FROM parquet_scan('{district_path}')
        """
    )

    # Load all other tables
    for key,table in zip(parquet_dict.keys(), table_names):
        if table != 'districts':
            con.execute(
                f"""
                CREATE TABLE {table} 
                AS SELECT *, ST_GeomFromWKB(geometry) 
                FROM parquet_scan('{parquet_dict[key]}')
                """
            )

    # Spatially clip all other tables to geometry of 'district_{district_number}' and create a new table {table}_{district_number} and export it to {table}_{district_number}.parquet
    for table in table_names:
        if table != 'districts':
            con.execute(
                f"""
                CREATE TABLE {table}_{district_number} 
                AS SELECT *, ST_GeomFromWKB(geometry) as geometry
                FROM {table} 
                WHERE ST_Intersects(ST_GeomFromWKB(geometry), (SELECT ST_GeomFromWKB(geometry) FROM district_{district_number}))
                """
            )
            con.execute(
                f"""
                COPY (SELECT * FROM {table}_{district_number}) TO '{interim_dir}/{table}_{district_number}.parquet' (FORMAT 'parquet')
                """
            )
            
    con.close()