In [112]:
import requests
import pandas as pd
import geopandas as gpd
from bs4 import BeautifulSoup
from shapely.ops import transform
from shapely.geometry import MultiPolygon, Polygon
from io import BytesIO
import itertools
from tqdm import tqdm
import time
from random import uniform

### Important functions

In [None]:
# Function to fetch and parse KML file
def fetch_kml(uri):
    response = requests.get(uri)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to download KML file from {uri}")
        return None

# Function to parse KML and convert to geometries
def parse_kml(content):
    soup = BeautifulSoup(content, 'xml')
    geometries = []

    # Find all MultiGeometry elements, which can host multiple Polygons
    for multi_geom in soup.find_all('MultiGeometry'):
        polygons = []
        for polygon in multi_geom.find_all('Polygon'):
            coords = polygon.find('coordinates').string.strip().split()
            points = [tuple(map(float, c.split(','))) for c in coords]
            polygons.append(Polygon(points))
        if polygons:
            geometries.append(MultiPolygon(polygons))

    # Also check for individual Polygons that are not part of MultiGeometry
    for polygon in soup.find_all('Polygon'):
        if polygon.parent.name != 'MultiGeometry':  # Avoid reprocessing MultiGeometry polygons
            coords = polygon.find('coordinates').string.strip().split()
            points = [tuple(map(float, c.split(','))) for c in coords]
            geometries.append(Polygon(points))

    return geometries

# Main processing
def process_kml_uris(kml_uris):
    all_geometries = []
    for uri in kml_uris:
        kml_content = fetch_kml(uri)
        if kml_content:
            geometries = parse_kml(kml_content)
            all_geometries.extend(geometries)
    return all_geometries

### Read project list

Project lists were acquired from the Verra registry at July 8th and 9th, 2024 by using the export to excel functionality for bulk download:

- Verified Carbon Standard (VCS): https://registry.verra.org/app/search/VCS/All%20Projects
- Climate, Community & Biodiversity Standards (CCB): https://registry.verra.org/app/search/CCB/All%20Projects
- Sustainable Development Verified Impact Standard (VISta): https://registry.verra.org/app/search/SDVISTA/All%20Projects

In [36]:
projects_vcs = pd.read_excel('/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/allprojects_vcs.xlsx')
projects_ccb = pd.read_excel('/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/allprojects_ccb.xlsx')
projects_vista = pd.read_excel('/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/allprojects_vista.xlsx')

  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")


In [37]:
projects_vcs.dropna(subset = 'AFOLU Activities', inplace = True)
projects_ccb.dropna(subset = 'CCB Project Type', inplace = True)
projects_vista.dropna(subset = 'Project Type', inplace = True)

In [39]:
project_list_vcs = projects_vcs[projects_vcs['AFOLU Activities'].str.contains("ARR")].ID.tolist()
project_list_ccb = projects_ccb[projects_ccb['CCB Project Type'].str.contains("Afforestation, Reforestation and Revegetation")].ID.tolist()
project_list_vista = projects_vista[projects_vista['Project Type'].str.contains("Agriculture Forestry and Other Land Use")].ID.tolist()

In [44]:
project_list = list(set(itertools.chain(project_list_vcs, project_list_ccb, project_list_vista)))

In [72]:
project_list[0:4]

[4107, 2070, 4120, 4123]

### Extract geometries per project

In [136]:
project_gdf = pd.DataFrame()

for project_id in tqdm(project_list):

    # Make the GET request to fetch the JSON
    response = requests.get(f'https://registry.verra.org/uiapi/resource/resourceSummary/{project_id}')

    if response.status_code == 200:
        data = response.json()

        # Extract KML URIs
        kml_uris = []
        for group in data.get('documentGroups', []):
            for document in group.get('documents', []):
                if document['documentType'].lower() == 'kml file' or document['documentName'].endswith('.kml'):
                    kml_uris.append(document['uri'])

        # Process the KML URIs to get geometries
        geometries = process_kml_uris(kml_uris)

        # Convert geometries to GeoPandas DataFrame
        temp = gpd.GeoDataFrame(geometry=geometries)
        
        # Assign CRS
        if abs(gdf.geometry[0].centroid.y).max() > 200:
            temp = temp.set_crs(3857).to_crs(4326)
        else:
            temp = temp.set_crs(4326)

        # Explode MultiPolygons into individual Polygons
        temp = temp.explode(index_parts=True)
        
        # 3D to 2D geometries
        temp['geometry'] = temp['geometry'].apply(lambda geometry: transform(lambda x, y, z=None: (x, y), geometry))
        
        # Assign identifiers
        temp['project_id'] = project_id
        temp = temp.reset_index(drop = True).reset_index().rename(columns={'index': 'site_id'})
        
        # Add project to output
        project_gdf = pd.concat([project_gdf, temp], ignore_index=True)
        
        # Delay to avoid excess request responses
        time.sleep(uniform(0, 2.0))
        
    else:
        print(f"Request failed with status code: {response.status_code}")


 53%|████████████████████▊                  | 313/587 [1:18:33<48:13, 10.56s/it]

Failed to download KML file from https://registry.verra.org/mymodule/ProjectDoc/Project_ViewFile.asp?FileID=104824&IDKEY=3kjalskjf098234kj28098sfkjlf098098kl32lasjdflkj9093144552296


100%|███████████████████████████████████████| 587/587 [1:49:48<00:00, 11.22s/it]


In [142]:
project_gdf.project_id.shape

(1355236,)

In [145]:
project_gdf.to_file("/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/verra_sites.gpkg", driver="GPKG")

In [144]:
projects_vcs

Unnamed: 0,ID,Name,Proponent,Project Type,AFOLU Activities,Methodology,Status,Country/Area,Estimated Annual Emission Reductions,Region,Project Registration Date,Crediting Period Start Date,Crediting Period End Date
0,5144,SHANDONG DEZHOU IMPROVED AGRICULTURAL LAND MAN...,Fujian Dunfeng Carbon Sequestration Agricultur...,Agriculture Forestry and Other Land Use,ALM,VM0042,Under validation,China,556100,Asia,NaT,2024-05-26,2034-05-25
1,5142,KUZA MITI AGROFORESTRY CARBON PROJECT IN KENYA,One Carbon World Ltd.,Agriculture Forestry and Other Land Use,ARR,VM0047,Under development,Kenya,84889,Africa,NaT,2023-09-19,2122-09-18
7,5123,GKF AGROFORESTRY PROJECT IN TELANGANA,Emergent Ventures India Private Limited,Agriculture Forestry and Other Land Use,ARR,VM0047,Under development,India,262253,Asia,NaT,2021-06-01,2121-05-31
11,5117,Agroforestry Corridors for Multi Cropping Syst...,Thryve.Earth Pte. Ltd.,Agriculture Forestry and Other Land Use,ARR,VM0047,Under development,Brazil,114000,Latin America,NaT,NaT,NaT
12,5118,Farm Resilience and Regeneration in France,ReGeneration,Agriculture Forestry and Other Land Use,ALM,VM0042,Under development,France,356550,Europe,NaT,2023-04-21,2043-04-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4234,720,Tree Planting in South African townships,Food and Trees for Africa (FTFA),Agriculture Forestry and Other Land Use,ARR,AR-AM0002,Registered,South Africa,9,Africa,2020-04-06,2011-09-01,2051-08-31
4236,1351,Planting for the Future: Financially sustainab...,Plant your Future,Agriculture Forestry and Other Land Use,ARR,AR-ACM0003,Registered,Peru,719,Latin America,2015-03-17,2012-01-16,2042-01-15
4237,1429,Recovery of degraded areas with agroforestry s...,Multiple Proponents,Agriculture Forestry and Other Land Use,ARR,AR-ACM0003,Late to verify,Colombia,746,Latin America,2015-11-04,2012-01-16,2112-01-15
4239,647,Boden Creek Ecological Preserve Forest Carbon ...,Boden Creek Ecological Preserve,Agriculture Forestry and Other Land Use,REDD,VM0007,Registered,Belize,57718,Latin America,2020-04-06,2005-01-01,2029-12-31
