In [None]:
import requests
import pandas as pd
import geopandas as gpd
from bs4 import BeautifulSoup
from shapely.ops import transform
from shapely.geometry import MultiPolygon, Polygon
from io import BytesIO
import itertools
from tqdm import tqdm
import time
from random import uniform

### Important functions

In [None]:
# Function to fetch and parse KML file
def fetch_kml(uri):
    response = requests.get(uri)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to download KML file from {uri}")
        return None

# Function to parse KML and convert to geometries
def parse_kml(content):
    soup = BeautifulSoup(content, 'xml')
    geometries = []

    # Find all MultiGeometry elements, which can host multiple Polygons
    for multi_geom in soup.find_all('MultiGeometry'):
        polygons = []
        for polygon in multi_geom.find_all('Polygon'):
            coords = polygon.find('coordinates').string.strip().split()
            points = [tuple(map(float, c.split(','))) for c in coords]
            polygons.append(Polygon(points))
        if polygons:
            geometries.append(MultiPolygon(polygons))

    # Also check for individual Polygons that are not part of MultiGeometry
    for polygon in soup.find_all('Polygon'):
        if polygon.parent.name != 'MultiGeometry':  # Avoid reprocessing MultiGeometry polygons
            coords = polygon.find('coordinates').string.strip().split()
            points = [tuple(map(float, c.split(','))) for c in coords]
            geometries.append(Polygon(points))

    return geometries

# Main processing
def process_kml_uris(kml_uris):
    all_geometries = []
    for uri in kml_uris:
        kml_content = fetch_kml(uri)
        if kml_content:
            geometries = parse_kml(kml_content)
            all_geometries.extend(geometries)
    return all_geometries

### Read project list

Project lists were acquired from the Verra registry at July 8th and 9th, 2024 by using the export to excel functionality for bulk download:

- Verified Carbon Standard (VCS): https://registry.verra.org/app/search/VCS/All%20Projects
- Climate, Community & Biodiversity Standards (CCB): https://registry.verra.org/app/search/CCB/All%20Projects
- Sustainable Development Verified Impact Standard (VISta): https://registry.verra.org/app/search/SDVISTA/All%20Projects

In [None]:
projects_vcs = pd.read_excel('/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/allprojects_vcs.xlsx')
projects_ccb = pd.read_excel('/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/allprojects_ccb.xlsx')
projects_vista = pd.read_excel('/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/allprojects_vista.xlsx')

In [None]:
projects_vcs.dropna(subset = 'AFOLU Activities', inplace = True)
projects_ccb.dropna(subset = 'CCB Project Type', inplace = True)
projects_vista.dropna(subset = 'Project Type', inplace = True)

In [None]:
project_list_vcs = projects_vcs[projects_vcs['AFOLU Activities'].str.contains("ARR")].ID.tolist()
project_list_ccb = projects_ccb[projects_ccb['CCB Project Type'].str.contains("Afforestation, Reforestation and Revegetation")].ID.tolist()
project_list_vista = projects_vista[projects_vista['Project Type'].str.contains("Agriculture Forestry and Other Land Use")].ID.tolist()

In [None]:
project_list = list(set(itertools.chain(project_list_vcs, project_list_ccb, project_list_vista)))

In [None]:
project_list[0:4]

### Extract geometries per project

In [None]:
project_gdf = pd.DataFrame()

for project_id in tqdm(project_list):

    # Make the GET request to fetch the JSON
    response = requests.get(f'https://registry.verra.org/uiapi/resource/resourceSummary/{project_id}')

    if response.status_code == 200:
        data = response.json()

        # Extract KML URIs
        kml_uris = []
        for group in data.get('documentGroups', []):
            for document in group.get('documents', []):
                if document['documentType'].lower() == 'kml file' or document['documentName'].endswith('.kml'):
                    kml_uris.append(document['uri'])

        # Process the KML URIs to get geometries
        geometries = process_kml_uris(kml_uris)

        # Convert geometries to GeoPandas DataFrame
        temp = gpd.GeoDataFrame(geometry=geometries)
        
        # Assign CRS
        if abs(gdf.geometry[0].centroid.y).max() > 200:
            temp = temp.set_crs(3857).to_crs(4326)
        else:
            temp = temp.set_crs(4326)

        # Explode MultiPolygons into individual Polygons
        temp = temp.explode(index_parts=True)
        
        # 3D to 2D geometries
        temp['geometry'] = temp['geometry'].apply(lambda geometry: transform(lambda x, y, z=None: (x, y), geometry))
        
        # Assign identifiers
        temp['project_id'] = project_id
        temp = temp.reset_index(drop = True).reset_index().rename(columns={'index': 'site_id'})
        
        # Add project to output
        project_gdf = pd.concat([project_gdf, temp], ignore_index=True)
        
        # Delay to avoid excess request responses
        time.sleep(uniform(0, 2.0))
        
    else:
        print(f"Request failed with status code: {response.status_code}")


In [None]:
project_gdf.project_id.shape

In [None]:
project_gdf.to_file("/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/verra_sites.gpkg", driver="GPKG")

### Some feasibility checks

In [None]:
project_gdf = gpd.read_file("/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/input/Verra/verra_sites.gpkg")

In [None]:
project_gdf['sqkm'] = project_gdf.to_crs(3857).area/1000000

In [None]:
project_gdf.sqkm.describe()

In [None]:
project_gdf[project_gdf.sqkm >= 10000]

In [None]:
project_gdf[project_gdf.sqkm == 0]