In [1]:
import json
import random
import time
from urllib.parse import quote

In [2]:
!pip install selenium

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait



In [16]:
!pip install fake_useragent

from fake_useragent import UserAgent
from collections import defaultdict



In [17]:
def setup_chrome_driver():
    ua = UserAgent()
    user_agent = ua.random
    print(user_agent)

    PATH = '/usr/local/bin/chromedriver'
    service = webdriver.ChromeService(executable_path=PATH)

    options = webdriver.ChromeOptions()
    # options.add_argument(f"--user-agent={user_agent}")
    options.add_argument("/home/swayam/.config/google-chrome/Default")
    options.add_argument("--start-maximized")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)

    driver = webdriver.Chrome(service=service, options=options, keep_alive=True)

    return driver

In [18]:

def setup_firefox_driver():
    """
    Set up and return the FireFox WebDriver
    """

    ua = UserAgent()
    user_agent = ua.random
    print(user_agent)

    # For using Firefox
    PATH = "/usr/local/bin/geckodriver"
    service = webdriver.FirefoxService(executable_path=PATH)

    options = webdriver.FirefoxOptions()
    options.add_argument("/home/swayam/.config/google-chrome/Default")
    options.add_argument("--start-maximized")
    options.add_argument("--disable-blink-features=AutomationControlled")

    driver = webdriver.Chrome(service=service, options=options, keep_alive=True)

    return driver


In [11]:
driver = setup_chrome_driver()

Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Mobile/15E148 Safari/604.1


In [12]:
project_id = 5401
url = f"https://registry.verra.org/app/projectDetail/VCS/{project_id}"
driver.get(url)

In [15]:
{
  "project": {
    "id": "5401",
    "title": "Regenerating Landscapes: Smallholder Farmers Agroforestry Initiative in Tamil Nadu, India",
    "description": "The project aims to restore and revitalize 20,000 hectares of degraded agricultural land...",
    "summary": {
      "state": "Tamil Nadu",
      "vcs": {
        "proponent": "Multiple Proponents",
        "status": "Under development",
        "estimatedAnnualEmissionReductions": 424726,
        "projectType": "Agriculture Forestry and Other Land Use",
        "afoluActivity": "ARR",
        "methodology": "VM0047",
        "area": {
            "value": 20000,
            "unit": "Hectares"
        },
        "creditingPeriod": {
            "term": "1st",
            "startDate": "2024-12-19",
            "endDate": "2058-12-18"
        }
      }
    },

    "documents": {
      "pipeline": [
        {
          "name": "VCS PD DRAFT _5401 _26122024.pdf",
          "url": "https://registry.verra.org/mymodule/ProjectDoc/Project_ViewFile.asp?FileID=125128",
          "dateUpdated": "2024-12-26"
        }
      ],
      "registration": [],
      "issuance": [],
      "other": [
        {
          "name": "Verra-Registry-Communications-Agreement-multiple-PPs_5401_v1_signed by all.pdf",
          "url": "https://registry.verra.org/mymodule/ProjectDoc/Project_ViewFile.asp?FileID=126055",
          "dateUpdated": "2025-01-15"
        }
      ]
    }
  }
}

In [21]:
def get_project_title(driver: webdriver.Chrome):
    title = driver.find_element(by=By.CLASS_NAME, value="card-header")

    return title.text

print(get_project_title(driver))

REGENERATING LANDSCAPES: SMALLHOLDER FARMERS AGROFORESTRY INITIATIVE IN TAMIL NADU, INDIA


In [22]:
def get_project_description(driver: webdriver.Chrome):
    title = driver.find_element(by=By.CLASS_NAME, value="card-text")

    return title.text

print(get_project_description(driver))

The project aims to restore and revitalize 20,000 hectares of degraded agricultural land privately owned by marginal and smallholder farmers, empowering them to adopt diverse, native agroforestry systems. These systems are designed to address both ecological and economic challenges in the region, fostering long-term resilience and sustainability. Key interventions include training farmers in regenerative, climate-resilient agroforestry and agriculture practices tailored to the tropical dryland ecosystem. This involves providing access to high-value saplings of commercial fruit trees, timber species, and non-timber forest products, carefully selected to optimize carbon sequestration and enhance vital ecosystem services such as biodiversity restoration, soil health improvement, and water retention. To combat decades of soil degradation caused by unsustainable monoculture farming, the project will implement advanced soil revitalization techniques. These include the integration of biochar 

In [32]:
def get_project_description(driver: webdriver.Chrome):
    summary = defaultdict()
    summary[[
    summary["state"] = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(4)").text

    vcs = defaultdict(str)

    vcs["proponent"] = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(3)").text
    
    vcs["status"] = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(5)").text

    vcs["estimatedAnnualEmissionReductions"] = int(driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(7)").text)

    vcs["projectType"] = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(9)").text

    vcs["afoluActivity"] = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(11)").text
    vcs["methodology"] = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(13)").text

    areaWithUnits = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(15)").text

    value = int(areaWithUnits[: areaWithUnits.find(" ")])
    unit = areaWithUnits[areaWithUnits.find(" ")+1 : ]

    area = defaultdict(str)
    area["value"] = value
    area["unit"] = unit

    vcs["area"] = area

    cp = driver.find_element(by=By.CSS_SELECTOR, value="tr.attr-row:nth-child(17)").text

    term = cp[: cp.find(",")]
    startDate = cp[cp.find(",")+1 : cp.find("-")].strip()
    endDate = cp[cp.find("-")+1 : ].strip()

    creditingPeriod = defaultdict(str)
    creditingPeriod["term"] = term
    creditingPeriod["startDate"] = startDate
    creditingPeriod["endDate"] = endDate

    vcs["creditingPeriod"] = creditingPeriod

    summary["vcs"] = vcs

    return summary

j = json.dumps(get_project_description(driver), indent=4)
print(j)

{
    "state": "Tamil Nadu",
    "vcs": {
        "proponent": "Multiple Proponents",
        "status": "Under development",
        "estimatedAnnualEmissionReductions": 424726,
        "projectType": "Agriculture Forestry and Other Land Use",
        "afoluActivity": "ARR",
        "methodology": "VM0047",
        "area": {
            "value": 20000,
            "unit": "Hectares"
        },
        "creditingPeriod": {
            "term": "1st",
            "startDate": "19/12/2024",
            "endDate": "18/12/2058"
        }
    }
}


In [None]:
project = defaultdict(str)

project["id"] = str(project_id)
project["title"] = get_project_title()
project["description"] = get_project_description()
project["summary"] = get_project_summary()
project["documents"] = get_project_documents()