# Gold Standards registry
https://registry.goldstandard.org/projects?q=&page=3&project_types=22

In [None]:
import json
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from tqdm import tqdm
import time
import os
from random import uniform

### Load data

In [None]:
df = pd.read_csv("../input/Gold_Standards/GSF Registry Projects Export 2025-02-13.csv")
df.head(1)

In [None]:
df['project_pdf_available'] = False

In [None]:
for project_id in tqdm(df.GSID.unique().tolist()):

    url = f'https://assurance-platform.goldstandard.org/api/public/project-documents/GS{project_id}'
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "application/json",
    }
    try:
        response = requests.get(url, headers=headers)

    except Exception as e:
        print(f"Error with project {project_id}: {e}")
        continue

    if response.status_code == 200:
        data = response.json()

        pdf_path = f"../midsave/project_descriptions/pd_goldstandard_{project_id}.pdf"
        
        pd_uris = []
        for group in data.get('requests', []):
            for document in group.get('documents', []):
                if document['documentType'] == 'Gold Standard Project Design Document': # or 'pd' in document['documentName'].lower()
                    pd_uris.append(document['id'])
        
        pd_available = False
        for uri in pd_uris:
            response = requests.get(f'https://assurance-platform.goldstandard.org/api/public/documents/{uri}/download', headers=headers)
            if response.status_code == 200:
                if response.content:
                    open(os.path.join(pdf_path), "wb").write(response.content)
                    pd_available = True

        df.loc[df.GSID == project_id, 'project_pdf_available'] = pd_available

    time.sleep(uniform(0, 5.0))

### Harmonize nomenclature

In [None]:
filtered_columns = ["GSID", "Country", "Description", "project_pdf_available"]
gdf = df[filtered_columns].copy()

In [None]:
gdf['url'] = 'https://assurance-platform.goldstandard.org/project-documents/GS' + gdf['GSID'].astype(str)
gdf['host_name'] = 'Gold Standard'

In [None]:
gdf

In [None]:
columns_rename_mapping = {

    'GSID': 'project_id_reported',
    'Description':'project_description_reported',
    'Country':'country',
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [None]:
gdf = gdf.assign(site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,planting_date_reported=None,geometry=None,site_id_reported=None)
gdf.info()

### Save it

In [None]:
gdf = gpd.GeoDataFrame(gdf).set_crs('EPSG:4326', allow_override=True)
gdf.to_file('../midsave/gold_standards.gpkg', driver='GPKG')