# Gold Standards registry
https://registry.goldstandard.org/projects?q=&page=3&project_types=22

In [11]:
import json
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from tqdm import tqdm
import time
import os
from random import uniform

### Load data

In [12]:
df = pd.read_csv("../input/Gold_Standards/GSF Registry Projects Export 2025-02-13.csv")
df.head(1)

Unnamed: 0,GSID,Project Name,Project Developer Name,Status,Sustainable Development Goals,Project Type,Country,Description,Estimated Annual Credits,Methodology,Size,Programme of Activities,POA GSID
0,12587,Parry Agro Industries C Cubed Tea and Rubber A...,Murali Balan Padikkal,Listed,1131512138754,A/R,India,The C Cubed project combines tea and rubber ag...,21000,Afforestation/Reforestation GHG Emissions Redu...,Large Scale,Standalone,


In [13]:
df['project_pdf_available'] = False

In [14]:
for project_id in tqdm(df.GSID.unique().tolist()):

    url = f'https://assurance-platform.goldstandard.org/api/public/project-documents/GS{project_id}'
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "application/json",
    }
    try:
        response = requests.get(url, headers=headers)

    except Exception as e:
        print(f"Error with project {project_id}: {e}")
        continue

    if response.status_code == 200:
        data = response.json()

        pdf_path = f"../midsave/project_descriptions/pd_goldstandard_{project_id}.pdf"
        
        pd_uris = []
        for group in data.get('requests', []):
            for document in group.get('documents', []):
                if document['documentType'] == 'Gold Standard Project Design Document': # or 'pd' in document['documentName'].lower()
                    pd_uris.append(document['id'])
        
        pd_available = False
        for uri in pd_uris:
            response = requests.get(f'https://assurance-platform.goldstandard.org/api/public/documents/{uri}/download', headers=headers)
            if response.status_code == 200:
                if response.content:
                    open(os.path.join(pdf_path), "wb").write(response.content)
                    pd_available = True

        df.loc[df.GSID == project_id, 'project_pdf_available'] = pd_available

    time.sleep(uniform(0, 5.0))

100%|██████████| 71/71 [02:46<00:00,  2.34s/it]


### Harmonize nomenclature

In [15]:
filtered_columns = ["GSID", "Country", "Description", "project_pdf_available"]
gdf = df[filtered_columns].copy()

In [16]:
gdf['url'] = 'https://assurance-platform.goldstandard.org/project-documents/GS' + gdf['GSID'].astype(str)
gdf['host_name'] = 'Gold Standard'

In [17]:
gdf

Unnamed: 0,GSID,Country,Description,project_pdf_available,url,host_name
0,12587,India,The C Cubed project combines tea and rubber ag...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
1,13054,Brazil,The Syntropic Agroforestry Coffee Project in S...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
2,13053,Brazil,The objective of the PoA “Global Syntropic Agr...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
3,13106,Latvia,PATA Afforestation Project VPA 1 will implemen...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
4,12466,Latvia,PATA Afforestation Project will implement affo...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
...,...,...,...,...,...,...
66,3565,Colombia,The project areas are located in the municipal...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
67,3264,Costa Rica,The VisionsWald - VisionForest is located in a...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
68,3039,Australia,The Yarra Yarra Biodiversity Corridor is a nat...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard
69,3025,Colombia,The Project establishes a reforestation of nat...,False,https://assurance-platform.goldstandard.org/pr...,Gold Standard


In [18]:
columns_rename_mapping = {

    'GSID': 'project_id_reported',
    'Description':'project_description_reported',
    'Country':'country',
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [19]:
gdf = gdf.assign(site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,planting_date_reported=None,geometry=None,site_id_reported=None)
gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   project_id_reported           71 non-null     int64 
 1   country                       71 non-null     object
 2   project_description_reported  71 non-null     object
 3   project_pdf_available         71 non-null     bool  
 4   url                           71 non-null     object
 5   host_name                     71 non-null     object
 6   site_sqkm                     0 non-null      object
 7   species_count_reported        0 non-null      object
 8   species_planted_reported      0 non-null      object
 9   survival_rate_reported        0 non-null      object
 10  trees_planted_reported        0 non-null      object
 11  planting_date_reported        0 non-null      object
 12  geometry                      0 non-null      object
 13  site_id_reported      

In [20]:

gdf["planting_date_type"]= " "
gdf['project_geometries_invalid']= " "


### Save it

In [21]:
gdf = gpd.GeoDataFrame(gdf).set_crs('EPSG:4326', allow_override=True)
gdf.to_file('../midsave/gold_standards.gpkg', driver='GPKG')