
# Create a mixed woodland data model asset by combining dense and sparse woodland assets

Miles Clement (miles.clement@defra.gov.uk)

Last Updated 17/03/25

## Setup
####Packages

In [0]:
import pandas as pd
import os
from pathlib import Path
from functools import reduce

In [0]:
from sedona.spark import *
from pyspark.sql.functions import expr, when, col, lit, sum
from pyspark.sql import functions as F

sedona = SedonaContext.create(spark)
sqlContext.clearCache()

username = (
    dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()
)

####Point to directory containing asset tavbles

In [0]:
table_dir = Path('/dbfs/mnt/lab-res-a1001005/esd_project/Defra_Land/Final/Asset_Tables')
alt_table_dir = str(table_dir).replace("/dbfs", "dbfs:")

In [0]:
# Load in dense & sparse data
dense = sedona.read.format("parquet").load(f"{alt_table_dir}/10m_x_assets_combined_dense_woodland.parquet")
sparse = sedona.read.format("parquet").load(f"{alt_table_dir}/10m_x_assets_combined_sparse_woodland.parquet")

# Drop columns from sparse df to avoid duplicate columns
sparse = sparse.drop('geometry', 'dgl_fh', 'dgl_lh')

# Join dataframes
mixed = dense.join(sparse, on="id", how="left")

In [0]:
mixed.write.format("parquet").mode("overwrite").save(
    f"{alt_table_dir}/10m_x_assets_combined_mixed_woodland.parquet"
)