# Forest trends
https://www.forest-trends.org/project-list/

In [1]:
import json
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from bs4 import BeautifulSoup
from tqdm import tqdm

Helper functions

In [2]:
def get_text(soup, class_name, default=""):
    """Extracts text from an element, returns default if not found."""
    element = soup.find(class_=class_name)
    return element.get_text(strip=True).replace(f"{class_name.replace('p', '').capitalize()}:", "").strip() if element else default

def get_subcontact_text(soup, label):
    """Finds contact details like Name, Organization, etc."""
    element = soup.find("div", class_="subcontact", string=lambda t: t and label in t)
    return element.get_text(strip=True).replace(label, "").strip() if element else ""


Get project list

In [3]:
url = "https://www.forest-trends.org/wp-content/themes/foresttrends/map_tools/project_fetch.php?ids="
response = requests.get(url).json()

In [4]:
projects = pd.json_normalize(response['markers'], sep='_')

Get additional descriptions

In [5]:
sites = pd.DataFrame()
for site in tqdm(projects.pid.unique().tolist()):
    
    url = f'https://www.forest-trends.org/wp-content/themes/foresttrends/map_tools/project_fetch_single.php?pid={site}'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.json()['html'], 'html.parser')
        data = {
            "Size": get_text(soup, "psize"),
            "Status": get_text(soup, "pstatus"),
            "Description": get_text(soup, "pdescription"),
            "Objective": get_text(soup, "pobjective"),
            "Interventions": ", ".join([li.get_text(strip=True) for li in soup.select('.pinterventions ul li')]) if soup.select('.pinterventions ul li') else "",
            "Market Mechanism": get_text(soup, "pmarketmechanism"),
            "Contact Name": get_subcontact_text(soup, "Name:"),
            "Contact Organization": get_subcontact_text(soup, "Organization:"),
            "Contact Email": soup.find("a", href=lambda h: h and h.startswith("mailto:")).get_text(strip=True) if soup.find("a", href=lambda h: h and h.startswith("mailto:")) else "",
            "Contact URL": soup.find("a", href=lambda h: h and h.startswith("http")).get_text(strip=True) if soup.find("a", href=lambda h: h and h.startswith("http")) else ""
        }
        temp = pd.DataFrame([data])
        temp['pid'] = site
        sites = pd.concat([sites, temp], ignore_index=True)

100%|██████████| 4587/4587 [34:24<00:00,  2.22it/s]   


In [6]:
df = projects.merge(sites, on = 'pid', how = 'left')

In [7]:
df["site_sqkm_reported"] = pd.to_numeric(df["Size"].str.replace(" ha", "", regex=False), errors="coerce") / 100

In [8]:
df['country'] = df['location'].astype(str).apply(lambda x: x.split(",")[-1].strip() if isinstance(x, str) else x)

Limit to af- and reforestation projects

In [9]:
df = df.query("Interventions.str.contains('Afforestation or reforestation')")

### Fix geometries

In [10]:
df['geometry'] = df.apply(lambda row: Point(row['lng'], row['lat']), axis=1)

In [11]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')

In [12]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True

In [13]:
gdf['geometry'] = gdf['geometry'].make_valid()

### Harmonize nomenclature

In [14]:
# Renaming the columns to follow our naming format in the paper columns section
columns_rename_mapping = {
    'pid': 'project_id_reported',
    'geometry': 'geometry',
    'site_sqkm_reported':'site_sqkm_reported',
    'Description': 'project_description_reported',
    'country': 'country'
}
gdf = gdf[list(columns_rename_mapping.keys())]
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [15]:
gdf['host_name'] = 'Forest Trends'
gdf['url'] = 'https://www.forest-trends.org/wp-content/themes/foresttrends/map_tools/project_fetch_single.php?pid=' + gdf.project_id_reported
gdf['project_geometries_invalid'] = invalid_geom

In [16]:
gdf =gdf.assign(site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,site_id_reported=None,planting_date_reported=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 277 entries, 0 to 4557
Data columns (total 15 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           277 non-null    object  
 1   geometry                      277 non-null    geometry
 2   site_sqkm_reported            246 non-null    float64 
 3   project_description_reported  277 non-null    object  
 4   country                       277 non-null    object  
 5   host_name                     277 non-null    object  
 6   url                           277 non-null    object  
 7   project_geometries_invalid    277 non-null    bool    
 8   site_sqkm                     0 non-null      object  
 9   species_count_reported        0 non-null      object  
 10  species_planted_reported      0 non-null      object  
 11  survival_rate_reported        0 non-null      object  
 12  trees_planted_reported        0 non-null      

### Save it

In [None]:
gdf.to_file('../midsave/forest_trends.gpkg')