##Notebook for producing plots for showing habitat occurance on Defra Group Land

Miles Clement, Mar 2025

Script produces three plots for the DGL Habitat Extents report

- Figure 2 - Area of habitats found on DEFRA land. Stacked bar chart with tenure splits.
- Figure 3 - Areas of habitats found on DEFRA land, split by tenure. Logarithmic y-axis. Bar chart.
- Figure 5 - Habitat breakdown for the three largest landowners within the DEFRA group, shown as a percentage of the total land owned by each organisation. Bar chart.

**Habitat Notes**
- Saltmarsh is a subset of Coastal Margins, and included in the extent of the latter (beware of double counting)
- Upland Bog is a subset of Moorland & Heath, and included in the extent of the latter (beware of double counting)
- Dense and Sparse Woodland will also be reported as combined/mixed woodland


---------
###SETUP
####Load Packages

In [0]:
import numpy as np
import pandas as pd
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.ticker as mtick


In [0]:
from matplotlib.font_manager import FontEntry, fontManager
from matplotlib.pyplot import rcParams

font_entry = FontEntry(
    fname="/dbfs/mnt/lab/unrestricted/james.kenyon@defra.gov.uk/font/Arial.ttf",
    name="Arial",
    size="scalable",
)
fontManager.ttflist.insert(0, font_entry)
rcParams["font.sans-serif"] = ["Arial"]

#### Load in Core Datasets

In [0]:
data_in_dir = Path('/dbfs/mnt/lab-res-a1001005/esd_project/Defra_Land/Final/Stats')

In [0]:
summary_tenure_df = pd.read_csv(f'{data_in_dir}/dgl_summary_tenure.csv')

summary_org_df = pd.read_csv(f'{data_in_dir}/dgl_summary_organisation.csv')

In [0]:
summary_tenure_df

In [0]:
summary_org_df

-----
###FIGURE 2
Area of habitats found on DEFRA land. Stacked bar chart with tenure splits.

####Data Prep for Figures 2 & 3
- Remove subset habitats 
- Rename habitats in tenure dataset 

In [0]:
# Define the list of subset habitats to exclude
subset_habs = ["dense_woodland", "sparse_woodland", "saltmarsh", "upland_bog"]

# Filter out rows with any of the excluded habitats
fig_2a3_df = summary_tenure_df[~summary_tenure_df["habitat"].isin(subset_habs)]

# Tidy up columns and rename habitats for plot
fig_2a3_df = fig_2a3_df[['habitat', 'freehold_ha', 'leasehold_ha', 'mixed_tenure_ha']].rename(
    columns={
        'habitat': 'Habitat',
        'freehold_ha': 'Freehold Area (ha)',
        'leasehold_ha': 'Leasehold Area (ha)',
        'mixed_tenure_ha': 'Mixed Tenure Areas (ha)'
    }
)


habitat_rename_mapping = {
    'mixed_woodland': 'Woodland',
    'moorland': 'Mountain, Moorland & Heath',
    'grassland': 'Semi-Natural Grassland',
    'coastal': 'Marine & Coastal Margins',
    'arable': 'Enclosed Farmland',
    'water': 'Freshwater & Wetlands',
    'urban': 'Urban'
}

fig_2a3_df['Habitat'] = fig_2a3_df['Habitat'].replace(habitat_rename_mapping)

In [0]:
fig_2a3_df

In [0]:
x = np.arange(len(fig_2a3_df["Habitat"]))
width = 0.7

In [0]:
colors = {
    "Woodland": '#28A197',
    "Mountain, Moorland & Heath": '#A285D1',
    "Semi-Natural Grassland": '#F46A25',
    "Marine & Coastal Margins": '#12436D',
    "Enclosed Farmland": '#801650',
    "Freshwater & Wetlands": '#2073BC',
    "Urban": '#3D3D3D'
}

tenure_colours = [colors[habitat] for habitat in fig_2a3_df["Habitat"]]

In [0]:
# Stacked bar chart
fig, ax = plt.subplots(figsize=(13, 9))

# Bottom level (freehold), filled bars
bar1 = ax.bar(
    x, 
    fig_2a3_df["Freehold Area (ha)"], 
    width, 
    label="Freehold", 
    color=tenure_colours, 
    edgecolor=tenure_colours,
    zorder=3  
)
# Level 2 (leasehold), pattern fill bars
bar2 = ax.bar(
    x,
    fig_2a3_df["Leasehold Area (ha)"],
    width,
    bottom=fig_2a3_df["Freehold Area (ha)"],
    label="Leasehold",
    color="white",
    edgecolor=tenure_colours,  
    hatch="..",
    zorder=3  
)
# Level 3 (mixed), transparent bars
bar3 = ax.bar(
    x,
    fig_2a3_df["Mixed Tenure Areas (ha)"],
    width,
    bottom=fig_2a3_df["Freehold Area (ha)"] + fig_2a3_df["Leasehold Area (ha)"],
    label="Mixed Tenure",
    color="none",  
    edgecolor=tenure_colours,  
    zorder=3  
)

# Split longer labels over multi lines
split_labels = [
    "Woodland",
    "Mountain,\nMoorland\n& Heath",
    "Semi-Natural\nGrassland",
    "Marine &\nCoastal\nMargins",
    "Enclosed\nFarmland",
    "Freshwater &\nWetlands",
    "Urban"
]

#y-axis
ax.set_ylabel("Area (ha)", fontsize=18)
ax.tick_params(axis='y', labelsize=15)
#x-axis
ax.set_xticks(x)
ax.set_xticklabels(split_labels, fontsize=18)
# Grid lines
ax.yaxis.set_major_locator(plt.MultipleLocator(50000))
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f"{x:,.0f}"))  
ax.grid(axis='y', linestyle='--', alpha=0.7, zorder=0)  

# Show the plot
plt.tight_layout()
plt.show()

-----
###FIGURE 3
Areas of habitats found on DEFRA land, split by tenure. Logarithmic y-axis. Bar chart.

In [0]:
width = 0.25 

In [0]:
# Grouped bar chart
fig, ax = plt.subplots(figsize=(13, 9))

# First group - Freehold
bar1 = ax.bar(
    x - width, 
    fig_2a3_df["Freehold Area (ha)"], 
    width, 
    label="Freehold", 
    color="#28A197", 
    zorder=3
)
# Second group - Leasehold
bar2 = ax.bar(
    x, 
    fig_2a3_df["Leasehold Area (ha)"], 
    width, 
    label="Leasehold", 
    color="#F46A25", 
    zorder=3
)
# Third group - Mixed
bar3 = ax.bar(
    x + width, 
    fig_2a3_df["Mixed Tenure Areas (ha)"], 
    width, 
    label="Mixed Tenure", 
    color="#801650", 
    zorder=3
)

# Split longer labels over multi lines
split_labels = [
    "Woodland",
    "Mountain,\nMoorland\n& Heath",
    "Semi-Natural\nGrassland",
    "Marine &\nCoastal\nMargins",
    "Enclosed\nFarmland",
    "Freshwater &\nWetlands",
    "Urban"
]

# y-axis
ax.set_yscale("log")  # Logarithmic scale for y-axis
ax.set_ylabel("Area (ha, Logarithmic)", fontsize=18)
ax.tick_params(axis='y', labelsize=15)
# x-axis
ax.set_xticks(x)
ax.set_xticklabels(split_labels, fontsize=18)
# Grid lines
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{int(y):,}'))  # Format tick labels with commas
ax.set_yticks([10, 100, 1000, 10000, 100000, 200000])  # Custom tick marks
ax.grid(axis='y', linestyle='--', alpha=0.7, zorder=0)  # Grid lines

# Legend
ax.legend(fontsize=18)

# Show the plot
plt.tight_layout()
plt.show()


-----
###FIGURE 5
Habitat breakdown for the three largest landowners within the DEFRA group, shown as a percentage of the total land owned by each organisation. Bar chart.

####Data Prep for Figure 5
- Rename "Department for Environment, Food and Rural Affairs (Forestry Commission or Forestry England)" to "DEFRA (FC/FE)"
- Calculate organisational totals

In [0]:
summary_org_df["organisation"] = summary_org_df["organisation"].replace(
    "Department for Environment, Food and Rural Affairs (Forestry Commission or Forestry England)", "DEFRA (FC/FE)"
)

In [0]:
# Remove subset habitat for calculating totals
fig_5_filtered_df = summary_org_df[~summary_org_df["habitat"].isin(subset_habs)]

# Calc totals
fig_5_filtered_df = fig_5_filtered_df.groupby("organisation", as_index=False).agg(
    {
        "freehold_ha": "sum",
        "leasehold_ha": "sum",
        "mixed_tenure_ha": "sum",
    }
)

fig_5_filtered_df["total_ha"] = fig_5_filtered_df["freehold_ha"] + fig_5_filtered_df["leasehold_ha"] + fig_5_filtered_df["mixed_tenure_ha"]

# Reorder
summary_df = fig_5_filtered_df.sort_values(by="total_ha", ascending=False)

summary_df = summary_df.reset_index(drop=True)

summary_df

In [0]:
# Extract top 3 orgs for plotting
top_3_organisations = summary_df.head(3)["organisation"].tolist()

In [0]:
# Filter org dataset for top 3 organisations
fig_5_top3_df = summary_org_df[summary_org_df["organisation"].isin(top_3_organisations)]

# Group by organisation and habitat, then sum the relevant columns
fig_5_top3_df = fig_5_top3_df.groupby(["organisation", "habitat"], as_index=False).agg(
    {
        "freehold_ha": "sum",
        "leasehold_ha": "sum",
        "mixed_tenure_ha": "sum",
    }
)

fig_5_top3_df["total_ha"] = (
    fig_5_top3_df["freehold_ha"]
    + fig_5_top3_df["leasehold_ha"]
    + fig_5_top3_df["mixed_tenure_ha"]
)

In [0]:
# Merge fig_5_top3_df with fig_5_filtered_df to get the total_ha for each organisation
fig_5_top3_df = fig_5_top3_df.merge(fig_5_filtered_df[["organisation", "total_ha"]], on="organisation", suffixes=("", "_org_total"))

# Calculate the percentage of total_ha for each habitat
fig_5_top3_df["percentage_of_total"] = (fig_5_top3_df["total_ha"] / fig_5_top3_df["total_ha_org_total"]) * 100

# Select only the desired columns
fig_5_top3_df = fig_5_top3_df[["organisation", "habitat", "percentage_of_total"]]

fig_5_top3_df


In [0]:
# Rename habitats (make prettier)
fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "arable", "Enclosed Farmland"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "coastal", "Marine & Coastal Margins"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "dense_woodland", "Woodland (Dense)"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "grassland", "Semi-Natural Grassland"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "mixed_woodland", "Woodland (Mixed)"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "moorland", "Mountain, Moorland & Heath"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "saltmarsh", "Saltmarsh"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "sparse_woodland", "Woodland (Sparse)"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "upland_bog", "Upland Bog"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "urban", "Urban"
)

fig_5_top3_df["habitat"] = fig_5_top3_df["habitat"].replace(
    "water", "Freshwater & Wetlands"
)

fig_5_top3_df

In [0]:
# Reorder organisations (high to low)
organisation_order = ['DEFRA (FC/FE)', 'Natural England', 'Environment Agency']


In [0]:
# Reorder organisations (high to low)
organisation_order = ['DEFRA (FC/FE)', 'Natural England', 'Environment Agency']

# Sort the habitat_summary dataframe based on the organisation_order
fig_5_top3_df['organisation'] = pd.Categorical(fig_5_top3_df['organisation'], categories=organisation_order, ordered=True)

fig_5_top3_df = fig_5_top3_df.sort_values(by=['organisation', 'habitat'])

# Set plotting order for habitats
plot_habs = ["Woodland (Mixed)", "Woodland (Dense)","Woodland (Sparse)","Mountain, Moorland & Heath","Upland Bog","Semi-Natural Grassland","Marine & Coastal Margins","Saltmarsh","Enclosed Farmland","Freshwater & Wetlands", "Urban"]  

# Define the habitat colors with fill, edge, and pattern
habitat_colors = {
    "Woodland (Mixed)": {"fill": "#28A197", "edge": "#28A197", "pattern": None},
    "Woodland (Dense)": {"fill": "#BFBFBF", "edge": "#28A197", "pattern": "//"},
    "Woodland (Sparse)": {"fill": "#BFBFBF", "edge": "#28A197", "pattern": "\\"},
    "Mountain, Moorland & Heath": {"fill": '#A285D1', "edge": "#A285D1", "pattern": None},
    "Upland Bog": {"fill": '#BFBFBF', "edge": "#A285D1", "pattern": "O"},
    "Semi-Natural Grassland": {"fill": '#F46A25', "edge": "#F46A25", "pattern": None},
    "Marine & Coastal Margins": {"fill": '#12436D', "edge": "#12436D", "pattern": None},
    "Saltmarsh": {"fill": '#BFBFBF', "edge": "#12436D", "pattern": "."},
    "Enclosed Farmland": {"fill": '#801650', "edge": "#801650", "pattern": None},
    "Freshwater & Wetlands": {"fill": '#2073BC', "edge": "#2073BC", "pattern": None},
    "Urban": {"fill": '#3D3D3D', "edge": "#3D3D3D", "pattern": None}
}

# Generate the habitat fill colors list from the dictionary
habitat_fill_colors = [habitat_colors[hab]["fill"] for hab in plot_habs]

In [0]:
# Create a list to store the positions of the bars (the x-position for each group)
bar_width = 0.07  # Adjust the width of the bars
x_positions = np.arange(len(organisation_order))  

In [0]:
# Initialize figure
plt.figure(figsize=(18, 11))

# Loop through each habitat and plot
for i, habitat in enumerate(plot_habs):
    # Filter data for the current habitat
    habitat_data = fig_5_top3_df[fig_5_top3_df['habitat'] == habitat]
    
    # Calculate the positions for this habitat's bars
    positions = x_positions + i * bar_width  # Slightly offset each habitat's bars
    
    # Get the color and pattern for the current habitat
    habitat_color = habitat_colors[habitat]
    fill_color = habitat_color["fill"]
    edge_color = habitat_color["edge"]
    pattern = habitat_color["pattern"]
    
    # Plot the bars for this habitat
    bars = plt.bar(
        positions,
        habitat_data['percentage_of_total'],
        width=bar_width,
        label=habitat,
        color=fill_color,
        edgecolor=edge_color,
        zorder=3
    )
    
    # Apply pattern if defined
    if pattern:
        for bar in bars:
            bar.set_hatch(pattern)

# Add the total_ha labels to the x-axis
# First, get the total_ha for each organisation
organisation_names = fig_5_top3_df['organisation'].unique()
total_ha_values = [fig_5_filtered_df.loc[fig_5_filtered_df['organisation'] == org, 'total_ha'].values[0] for org in organisation_names] #Pulling in from earlier df 

# Create a list with both organisation names and formatted total_ha (with commas and no parentheses)
x_labels = [f"{total_ha:,.1f} ha" for org, total_ha in zip(organisation_names, total_ha_values)]

# Add labels and title
plt.xticks(x_positions + (len(plot_habs) * bar_width) / 2, organisation_names, fontsize=21)

# Adjust the positioning of the total_ha labels (move them below the organisations)
for i, label in enumerate(x_labels):
    plt.text(
        x_positions[i] + (len(plot_habs) * bar_width) / 2,  # x-position (aligned with the bar)
        -4.4,  
        label,  
        ha='center',  # Horizontal alignment
        fontsize=21,  
        color='black'  
    )

# y-axis
plt.yticks(fontsize=18)
plt.ylabel("Percentage of Organisation Total (%)", fontsize=21)
# legend
plt.legend(fontsize=20)
# grid
plt.grid(axis='y', zorder=0)

# Show the plot
plt.tight_layout()
plt.show()