# Setup

Imports

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

Fix directories, define default variables

In [None]:
# make sure notebook is ran from src
cwd = os.getcwd()
if not cwd.split('/')[-1] == 'src':
    try:
        os.chdir('src')
    except FileNotFoundError:
        print('Error: please run from src dir or project root')
        sys.exit(1)

# define paths
data_path = '../data/'
plot_path = '../plots/'
if not os.path.exists(plot_path):
    os.mkdir(plot_path)

# projection in which trees data is stored: "Amersfoort / RD New"
proj = 28992

Read in (converted to csv) trees data, tif file obtained from [Nationaal Georegister](https://www.nationaalgeoregister.nl/geonetwork/srv/dut/catalog.search#/metadata/89611780-75d6-4163-935f-9bc0a738f7ca).

In [None]:
df = pd.read_csv('../data/bomenkaart_raw.csv')
df.shape

Read in shapefiles, obtained from [EarthWorks](https://earthworks.stanford.edu/catalog/stanford-gp502yc4422).

In [None]:
nl_base = gpd.read_file('../data/shapefiles/NLD_adm0.shp').to_crs(epsg=proj)
nl_base.shape

Visualize the data

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

nl_base.plot(ax=ax, color='white', edgecolor='black')

df_sample = df.sample(100000)
points = ax.scatter(
    'x',
    'y',
    c = 'z',
    data = df_sample,
    s = 0.1,
    cmap = 'viridis'
)
ax.set_axis_off()

cbar = fig.colorbar(points, ax=ax, location='bottom', shrink=0.5, pad=0.05)
cbar.set_label('number of trees per $100 \: m^2$')
fig.suptitle('Tree density in the Netherlands')
fig.tight_layout()
fig.savefig(plot_path+'tree_height.png', dpi=300)

In [None]:
fig, ax = plt.subplots(figsize=(3, 3))
df['z'].hist(ax=ax, bins=100)
ax.set_title('Distribution of tree density')
ax.set_xlabel('number of trees per $100 \: m^2$')
ax.set_ylabel('number of observations')
fig.tight_layout()