# Caclulate tree visibility statistics per district

[![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ac-willeke/urban-climate/blob/main/notebooks/01_FROST_extract_climate_data.ipynb) [![github](https://img.shields.io/badge/GitHub-View%20on%20GitHub-blue?logo=github)](https://github.com/ac-willeke/)

**Author**: Willeke A'Campo

**Description:** This notebooks shows how to calculate the Ecosystem Service statistics for tree visibility and impact per district using DuckDB. The results are stored in a new table in the database and exported to GeoJSON.

**Documentation:** 

### Data conversion | GeoJSON to GeoParquet   

In [35]:
import geopandas as gpd
from shapely.geometry import Point
from shapely.wkb import loads
import pyarrow
import os
import leafmap
import os
import duckdb
import pandas as pd

# set temp dir to network drivve to avoid disk space issues
os.environ['TMPDIR'] = r"/home/NINA.NO/willeke.acampo/Mounts/P-Prosjekter2/152022_itree_eco_ifront_synliggjore_trars_rolle_i_okosyst/TEMP"

# TODO move to kedro pipeline
municipality = "oslo"

#[30151 30160 30150 30152 30146 30145 30149 30159 30144 30148 30147 30141
# 30110 30109 30115 30155 30154 30153 30139 30117 30140 30118 30116 30111
# 30143 30106 30157 30156 30132 30158 30114 30119 30112 30108 30104 30130
# 30133 30137 30121 30120 30103 30129 30131 30138 30142 30113 30123 30105
 #30101 30134 30107 30128 30136 30122 30102 30135 30127 30125 30124 30126]



district_number = 30151
TEMP_DIR = os.environ['TMPDIR']
raw_dir = os.path.join(TEMP_DIR, "oslo", "01_raw")
interim_dir = os.path.join(TEMP_DIR, "oslo", "02_intermediate")
reporting_dir = os.path.join(TEMP_DIR, "oslo", "08_reporting")

# Define the table names
file_names = [
    f"{municipality}_study_area", 
    f"{municipality}_districts",
    f"{municipality}_bldg",
    f"{municipality}_res_bldg",
    f"{municipality}_green_space",
    f"{municipality}_open_space",
    f"{municipality}_public_open_space",
    f"{municipality}_private_open_space",
    f"{municipality}_tree_crowns"
    ]

table_names = [
    "study_area", "districts", "bldg", "res_bldg", "green_space",
    "open_space", "public_open_space", "private_open_space", "tree_crowns"
    ]

district_parquet = os.path.join(interim_dir, f"{municipality}_districts.parquet")

In [36]:
# Define the table names
file_names = [
    f"study_area_{district_number}", 
    f"districts_{district_number}",
    f"bldg_{district_number}",
    f"res_bldg_{district_number}",
    f"green_space_{district_number}",
    f"open_space_{district_number}",
    f"public_open_space_{district_number}",
    f"private_open_space_{district_number}",
    f"tree_crowns_{district_number}"
    ]

table_names = [
    "study_area", "districts", "bldg", "res_bldg", "green_space",
    "open_space", "public_open_space", "private_open_space", "tree_crowns"
    ]

# Define the parquet_dict
parquet_dict = {
    name: os.path.join(interim_dir, f"{name}.parquet") 
    for name in file_names}

In [39]:
# load all parquet files to duckdb
con = duckdb.connect(database=":memory:", read_only=False)
con.install_extension("spatial")
con.load_extension("spatial")

# Create a table for each parquet file or GeoDataFrame
for key,table in zip(parquet_dict.keys(), table_names):
    con.execute(
        f"""
        CREATE TABLE {table} 
        AS SELECT *, ST_GeomFromWKB(geometry) 
        FROM parquet_scan('{parquet_dict[key]}')
        """
        )
    

# Fetch and print all table names
result = con.execute(
    """
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'main'
    """
    )

# Print first row of tree_crowns
result = con.execute(
    """
    SELECT * FROM tree_crowns
    """
    )
print(result.fetchone())

print(result.fetchall())

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

(126159, None, None, '120069', 0, 0, None, None, None, None, None, 29.7499905, 7.17550659, None, None, None, 7.90569305, None, 31.9999943, 117.173882, 0.592424273, 0.780327857, None, '30107', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 31.999994732439518, 29.749990201556, b'\x01\x03\x00\x00\x00\x01\x00\x00\x00)\x00\x00\x00(\x0843x\x10"A$\xf2\xff/x^YA(\x0843x\x10"Ar\xf1\xffox^YAh\x0543y\x10"Ar\xf1\xffox^YAh\x0543y\x10"A\x1a\xf1\xff\x8fx^YA(\x0843x\x10"A\x1a\xf1\xff\x8fx^YA(\x0843x\x10"A\xc2\xf0\xff\xafx^YA\xa0\x0243z\x10"A\xc2\xf0\xff\xafx^YA\xa0\x0243z\x10"A\x1a\xf1\xff\x8fx^YA\x98\xf733~\x10"A\x1a\xf1\xff\x8fx^YA\x98\xf733~\x10"A\xc2\xf0\xff\xafx^YA\xd0\xf433\x7f\x10"A\xc2\xf0\xff\xafx^YA\xd0\xf433\x7f\x10"Aj\xf0\xff\xcfx^YA\x98\xf733~\x10"Aj\xf0\xff\xcfx^YA\x98\xf733~\x10"A\xb8\xef\xff\x0fy^YAX\xfa33}\x10"A\xb8\xef\xff\x0fy^YAX\xfa33}\x10"A\x08\xef\xffOy^YA \xfd33|\x10"A\x08\xef\xffOy^YA \xfd33|\x10"A\xfe\xed\xff\xafy^YA\xe0\xff33{\x10"A\

**Create a new Table with Tree Crown Center Points**

In [40]:
# Check if the 'crowns' table exists
table_exists = "tree_crowns" in [
    row[0]
    for row in con.execute(
        """
        SELECT table_name 
        FROM information_schema.tables 
        WHERE table_schema = 'main'
        """
    ).fetchall()
]

if table_exists:
    # convert dtype to DuckDB GEOMETRY
    result = con.execute(
        """
        SELECT 
        ST_X(ST_Centroid(ST_GeomFromWKB(geometry))), 
        ST_Y(ST_Centroid(ST_GeomFromWKB(geometry))) 
        FROM tree_crowns"""
        )
    # xy_crowns to pd
    df = pd.DataFrame(result.fetchall(), columns=["X", "Y"])
    
    xy_crowns = gpd.GeoDataFrame(
        df,
        geometry= gpd.points_from_xy(df.X, df.Y)
        )
    xy_crowns.crs = "EPSG:25832"


    # Create a new table in DuckDB
    con.execute(
        """
        CREATE TABLE tree_crowns_xy AS
        SELECT 
        ST_X(ST_Centroid(ST_GeomFromWKB(geometry))) AS X, 
        ST_Y(ST_Centroid(ST_GeomFromWKB(geometry))) AS Y,
        ST_Point(ST_X(ST_Centroid(ST_GeomFromWKB(geometry))), ST_Y(ST_Centroid(ST_GeomFromWKB(geometry)))) AS geometry
        FROM tree_crowns"""
    )

**Map study area and Tree Crown Center Points (10%-sample)**

In [43]:
# add layers to gdf for mapping
gdf_study_area = leafmap.read_parquet(
    parquet_dict[f"study_area_{district_number}"], 
    return_type='gdf', 
    src_crs="EPSG:25832", 
    dst_crs="EPSG:4326"
    )

# convert xy_crowns to wgs84
xy_crowns_sample = xy_crowns.sample(frac=0.05)
trees_xy = xy_crowns_sample.to_crs("EPSG:4326")
points_geojson = trees_xy.__geo_interface__
print("Map the tree crown center points (10% sample).")
print(trees_xy.head(2))

# Calculate the center of the study_area GeoDataFrame
center = gdf_study_area.geometry.unary_union.centroid

# --------------------------------------------------
# INIT MAP
# --------------------------------------------------
map = leafmap.Map()
# center
map.set_center(center.x, center.y, zoom=13)
# add Basemap
map.add_basemap("CartoDB.Positron")

# add study area as vector layer
map.add_gdf(
        gdf_study_area, 
        layer_name="study_area", 
        get_fill_color=[0, 0, 255, 128]
        )

map.add_gdf(
    trees_xy,
    layer_name ="trees",
    color= "black"
)

map

Map the tree crown center points (10% sample).
                 X             Y                   geometry
40   591875.631459  6.650265e+06  POINT (10.64627 59.97944)
221  591761.137588  6.649762e+06  POINT (10.64399 59.97495)


Map(center=[59.90952583025032, 10.776238876732235], controls=(ZoomControl(options=['position', 'zoom_in_text',…

### Database Connection | DuckDB

In [44]:
%%capture
columns = con.execute("PRAGMA table_info(districts)").fetchall()
for column in columns:
    print(column[1])


**Split open space, buildings and tree crowns by district**

```python
%%capture
# create new table split_open_space 
# with open space split by district boundaries


In [52]:
con.execute(
    """
    CREATE TABLE split_open_space AS 
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(open_space.geometry)) AS geom
    FROM 
        districts, open_space
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(open_space.geometry));
    """
    )

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x7fed71a22770>

In [45]:
%%capture
# create new table split_private_space
# with public space split by district boundaries
con.execute(
    """
    CREATE TABLE split_private_space AS 
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(private_open_space.geometry)) AS geom
    FROM 
        districts, private_open_space
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(private_open_space.geometry));
    """
    )

In [46]:
%%capture
# create new table split_public_space
# with private space split by district boundaries
con.execute(
    """
    CREATE TABLE split_public_space AS 
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(public_open_space.geometry)) AS geom
    FROM 
        districts, public_open_space
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(public_open_space.geometry));
    """
    )

In [47]:
%%capture
# create new table split_buildings
# with buildings split by district boundaries
con.execute(
    """
    CREATE TABLE split_bldg AS 
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(bldg.geometry)) AS geom
    FROM 
        districts, bldg
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(bldg.geometry));
    """
    )

In [48]:
%%capture
# create new table split_res_bldg
# with buildings split by district boundaries
con.execute(
    """
    CREATE TABLE split_res_bldg AS 
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(res_bldg.geometry)) AS geom
    FROM 
        districts, res_bldg
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(res_bldg.geometry));
    """
    )

In [49]:
%%capture
# create new table split_tree_crowns
# with tree crowns split by district boundaries
con.execute(
    """
    CREATE TABLE split_tree_crowns AS 
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(tree_crowns.geometry)) AS geom
    FROM 
        districts, tree_crowns
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(tree_crowns.geometry));
    """
    )

In [53]:
%%capture
# print columns split_open_space
columns = con.execute("PRAGMA table_info(split_open_space)").fetchall()
for column in columns:
    print(column[1])
    
# Query to fetch all table names
tables = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()

# Print the first row of each table
for table in tables:
    first_row = con.execute(f"SELECT * FROM {table[0]} LIMIT 1").fetchone()
    print(f"First row of {table[0]}: {first_row}")

### Generate Columns with Count Statistics

| Name | Alias | Description | Type |  Unit | 
| --- | --- | --- | --- | --- |
| n_trees | Antall trær | Number of trees in the district | INT |
| n_bldg | Antall bygninger | Number of buildings in the district | INT |
| n_res_bldg | Antall boliger | Number of residential buildings in the district | INT |
| n_res_bldg_near_gs | Antall boliger nær grøntområde (300 m) | Number of residential buildings near green space (300 m) | INT |
| n_trees_near_rbldg | Antall trær nær boliger (15 m) | Number of trees near residential buildings (15 m) | INT |
| n_viewshed | Antall viewshed piksler | Number of viewshed pixels that intersect with the building edge | INT |



**Add columns to districts table**

In [54]:
# update districts
# add count columns: n_trees, n_bldg, n_res_bldg, n_res_bldg_near_gs, n_trees_near_rbldg
con.execute("ALTER TABLE districts ADD COLUMN n_trees INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_bldg INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_res_bldg INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_res_bldg_near_gs INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN perc_near_gs INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_trees_near_rbldg INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN perc_near_rbldg INTEGER")

<duckdb.duckdb.DuckDBPyConnection at 0x7fed71a22770>

In [55]:
%%capture
# print columns districts
columns = con.execute("PRAGMA table_info(split_bldg)").fetchall()
for column in columns:
    print(column[1])

**Calculate COUNT attributes**

In [57]:
# COUNT number of trees per district
n_trees = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*) 
    FROM tree_crowns_xy 
    JOIN districts ON ST_Within(tree_crowns_xy.geometry, ST_GeomFromWKB(districts.geometry))
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of buildings per district
n_bldg = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*) 
    FROM split_bldg 
    JOIN districts ON ST_Within(split_bldg.geom, ST_GeomFromWKB(districts.geometry))
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of residential buildings per district 
n_res_bldg = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*) 
    FROM split_res_bldg 
    JOIN districts ON ST_Within(split_res_bldg.geom, ST_GeomFromWKB(districts.geometry))
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of res buildings WITHIN 300m distance of green space
n_res_bldg_near_gs = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*) 
    FROM res_bldg 
    JOIN districts ON ST_Within(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(districts.geometry))
    WHERE res_bldg.geometry IS NOT NULL AND districts.geometry IS NOT NULL AND EXISTS (
        SELECT 1
        FROM green_space
        WHERE green_space.geometry IS NOT NULL AND ST_DWithin(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(green_space.geometry), 300)
    )
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of trees crowns WITHIN 15m distance of res buildings
n_trees_near_rbldg = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*) 
    FROM tree_crowns
    JOIN districts ON ST_Within(ST_GeomFromWKB(tree_crowns.geometry), ST_GeomFromWKB(districts.geometry))
    WHERE EXISTS (
        SELECT 1
        FROM res_bldg
        WHERE ST_DWithin(ST_GeomFromWKB(tree_crowns.geometry), ST_GeomFromWKB(res_bldg.geometry), 15)
        AND ST_Within(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(districts.geometry))
    )
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# Update districts table
for id, count in n_trees:
    con.execute(f"UPDATE districts SET n_trees = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_bldg:
    con.execute(f"UPDATE districts SET n_bldg = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_res_bldg:
    con.execute(f"UPDATE districts SET n_res_bldg = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_res_bldg_near_gs:
    con.execute(f"UPDATE districts SET n_res_bldg_near_gs = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_trees_near_rbldg:
    con.execute(f"UPDATE districts SET n_trees_near_rbldg = {count} WHERE grunnkretsnummer = {id}")
    
# print columns districts
columns = con.execute("PRAGMA table_info(districts)").fetchall()
for column in columns:
    print(column[1])

OBJECTID
fylkesnummer
fylkesnavn
kommunenummer
kommunenavn
delomradenummer
delomradenavn
grunnkretsnummer
grunnkretsnavn
kilde_admin
kilde_befolkning
id_befolkning
year_pop_stat
pop_total
pop_elderly
a_district
a_unit
a_clipped
SHAPE_Length
SHAPE_Area
geometry
st_geomfromwkb(geometry)
st_geomfromwkb(geometry):1
n_trees
n_bldg
n_res_bldg
n_res_bldg_near_gs
perc_near_gs
n_trees_near_rbldg
perc_near_rbldg


In [58]:
# if NAN set to 0
con.execute(
    """
    UPDATE districts
    SET n_res_bldg_near_gs = COALESCE(n_res_bldg_near_gs, 0)
    """
)

con.execute(
    """
    UPDATE districts
    SET n_bldg = COALESCE(n_bldg, 0)
    """
)

con.execute(
    """
    UPDATE districts
    SET n_res_bldg = COALESCE(n_res_bldg, 0)
    """
)

con.execute(
    """
    UPDATE districts
    SET n_trees_near_rbldg = COALESCE(n_trees_near_rbldg, 0)
    """
)
# normalize perc_near_gs
con.execute(
    """
    UPDATE districts
    SET perc_near_gs = (n_res_bldg_near_gs / n_res_bldg) * 100
    """
)

# normalize perc_near_rbldg
con.execute(
    """
    UPDATE districts
    SET perc_near_rbldg = (n_trees_near_rbldg / n_trees) * 100
    """
)

<duckdb.duckdb.DuckDBPyConnection at 0x7fed71a22770>

### Generate Columns with Area Statistics

| Name | Alias | Description | Type |  Unit |
| --- | --- | --- | --- | --- |
| a_district | Grunnkretsareal | Area of the district | FLOAT | m2 |
| a_open_space | Åpent område | Area of open space | FLOAT | m2 |
| a_private_space | Privat område | Area of private space | FLOAT | m2 |
| a_public_space | Offentlig område | Area of public space | FLOAT | m2 |
| a_crown | Kroneareal | Crown coverage area within the district | FLOAT | m2 |
| perc_crown | Trekronedekningsgrad (%) | Tree crown coverage % | FLOAT | % |


**Add columns to districts table**

In [59]:
# update districts
# add area columns: a_open_space, a_private_space, a_public_space, a_green_space, a_crown, a_crown_public, a_crown_private
con.execute("ALTER TABLE districts ADD COLUMN a_open_space FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN a_private_space FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN a_public_space FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN a_crown FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN perc_crown FLOAT")

<duckdb.duckdb.DuckDBPyConnection at 0x7fed71a22770>

**Caluclate AREA attributes**

In [60]:
# AREA of open space per district
# calculate the overlapping area of open space with district X, A etc. 
con.execute(
    """
    UPDATE districts
    SET a_open_space = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_open_space.geom)))
        FROM split_open_space
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_open_space.geom)
    )
    """
)

# AREA of private open space per district
# calculate the overlapping area of private open space with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_private_space = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_private_space.geom)))
        FROM split_private_space
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_private_space.geom)
    )
    """
)

# AREA of public open space per district
# calculate the overlapping area of public open space with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_public_space = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_public_space.geom)))
        FROM split_public_space
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_public_space.geom)
    )
    """
)

# AREA of tree crowns per district
# calculate the overlapping area of tree crowns with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_crown = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_tree_crowns.geom)))
        FROM split_tree_crowns
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_tree_crowns.geom)
    )
    """
)

# PERCENTAGE of tree crown coverage per district 
con.execute(
    """
    UPDATE districts
    SET perc_crown = (a_crown / a_clipped) * 100
    """
)

<duckdb.duckdb.DuckDBPyConnection at 0x7fed71a22770>

**Export to dataframe**

In [61]:
# Export districts to DataFrame
df = pd.read_sql("SELECT * FROM districts", con)

# Convert geometry column from WKB to shapely geometry
df['geometry'] = df['geometry'].apply(loads, hex=True)

# Convert DataFrame to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')
gdf_sorted = gdf.sort_values(by='grunnkretsnummer', ascending=True)
gdf_sorted = gdf_sorted.round(2)

# Define the %-bins and labels
labels = ["no data", "0-25%", "25-50%", "50-75%", "75-100%"]
bins = pd.IntervalIndex.from_tuples([(-0.01, 25), (25, 50), (50, 75), (75, 100)])
dict = {
    "nan":"no data", 
    "(-0.01, 25.0]": "0-25%", 
    "(25.0, 50.0]": "25-50%", 
    "(50.0, 75.0]": "50-75%", 
    "(75.0, 100.0]": "75-100%"
    }

# Near Residential Buildings % Categories
gdf_sorted['bldg_bins'] = pd.cut(gdf_sorted['perc_near_rbldg'], bins)
gdf_sorted['bldg_bins'] = gdf_sorted['bldg_bins'].astype(str)
gdf_sorted['labels_near_rbldg'] = gdf_sorted['bldg_bins'].map(dict)

# Near Green Space % Categories
gdf_sorted['gs_bins'] = pd.cut(gdf_sorted['perc_near_gs'], bins)
gdf_sorted['gs_bins'] = gdf_sorted['gs_bins'].astype(str)
gdf_sorted['labels_near_gs'] = gdf_sorted['gs_bins'].map(dict)

# Crown Coverate % Categories
gdf_sorted['crown_bins'] = pd.cut(gdf_sorted['perc_crown'], bins)
gdf_sorted['crown_bins'] = gdf_sorted['crown_bins'].astype(str)
gdf_sorted['labels_perc_crown'] = gdf_sorted['crown_bins'].map(dict)

gdf_sorted.drop(columns=['bldg_bins', 'gs_bins', 'crown_bins'], inplace=True)

# display col "kommunenavn","grunnkrestnummer", "grunnkretsnavn" n_bldg, n_res_bldg, n_res_bldg_near_gs, n_trees_near_rbldg, a_open_space, a_private_space, a_public_space, a_crown, perc_crown
display(gdf_sorted[[
    "kommunenavn","grunnkretsnummer", "grunnkretsnavn", 
    "n_trees", "n_bldg", "n_res_bldg", 
    "n_res_bldg_near_gs", "perc_near_gs","labels_near_gs",
    "n_trees_near_rbldg", "perc_near_rbldg", "labels_near_rbldg",
    "a_open_space", "a_private_space", "a_public_space", "a_crown", 
    "perc_crown", "labels_perc_crown"
    ]]
        )
gdf.crs = "EPSG:25832"
gdf_sorted.crs = "EPSG:25832"


gdf_mapping = gdf.to_crs("EPSG:4326")

Unnamed: 0,kommunenavn,grunnkretsnummer,grunnkretsnavn,n_trees,n_bldg,n_res_bldg,n_res_bldg_near_gs,perc_near_gs,labels_near_gs,n_trees_near_rbldg,perc_near_rbldg,labels_near_rbldg,a_open_space,a_private_space,a_public_space,a_crown,perc_crown,labels_perc_crown
0,Oslo,3015103,Sørkedalen Øst,269,0,0,0,,no data,0,0,0-25%,5671.41,118.12,5570.37,23651.01,40.03,25-50%


In [62]:
%%capture
print(gdf_sorted.columns)
#drop duckdb geom col
gdf_sorted.drop(columns=['st_geomfromwkb(geometry)'], inplace=True)

# sort columns 
new_order = ['OBJECTID', 'fylkesnummer', 'fylkesnavn', 'kommunenummer',
       'kommunenavn', 'delomradenummer', 'delomradenavn', 'grunnkretsnummer',
       'grunnkretsnavn', 'kilde_admin', 'kilde_befolkning', 'id_befolkning',
       'year_pop_stat', 'pop_total', 'pop_elderly', 'a_district', 'a_unit',
       'a_clipped', 'n_trees', 'n_bldg', 'n_res_bldg',
       'n_res_bldg_near_gs', 'perc_near_gs', 'labels_near_gs',
       'n_trees_near_rbldg', 'perc_near_rbldg', 'labels_near_rbldg',
       'a_open_space', 'a_private_space', 'a_public_space',
       'a_crown', 'perc_crown','labels_perc_crown',  
       'SHAPE_Length', 'SHAPE_Area', 'geometry']

# Reorder the columns
gdf_sorted = gdf_sorted[new_order]
print(gdf_sorted.columns)

**Export to Parquet, GeoJSON and CSV format**

In [64]:
# Export GDF to file 
filepath = os.path.join(reporting_dir, f"{municipality}_district_treeVis_stat")

# Write to .parquet
gdf_sorted.to_parquet(os.path.join(filepath + '.parquet'))

# Write to .geojson
gdf_sorted.to_file(os.path.join(filepath + '.geojson'), driver='GeoJSON')

# Write to .csv
gdf_sorted.to_csv(os.path.join(filepath + '.csv'))