# Climate Partner Impact
https://www.climateimpact.com

In [None]:
import json
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from bs4 import BeautifulSoup
from tqdm import tqdm
import time
import os
from random import uniform

### Load data

In [None]:
df = pd.read_csv("../input/ACR/acr_export_02_27_2025.csv", encoding="ISO-8859-1")
df

In [None]:
df['project_id'] = df['Project ID'].str.extract(r'ACR(\d+)')

In [None]:
df = df.loc[df['Project Type'] == 'Forest Carbon'].copy()

In [None]:
df = (df.loc[(df['Project Methodology/Protocol'] == 'Afforestation and Reforestation of Degraded Lands') |
              (df['Project Methodology/Protocol'] == 'AR-ACM0001 Afforestation and Reforestation of Degraded Land')].copy())

In [None]:
df['project_pdf_available'] = False

In [None]:
for project_id in tqdm(df['project_id'].unique().tolist()):

    url = f'https://acr2.apx.com/mymodule/reg/TabDocuments.asp?r=111&ad=Prpt&act=update&type=PRO&aProj=pub&tablename=doc&id1={project_id}'
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "application/json",
        }
    try:
        response = requests.get(url, headers=headers)
    
    except Exception as e:
        print(f"Error")
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        sections = soup.find_all(string=lambda text: text and "Validation Report" in text)
        
        pd_uris = []
        
        for section in sections:
            parent_row = section.find_parent("tr")
            if parent_row:
                links = parent_row.find_all("a", href=True)
                pd_uris.extend([link["href"] for link in links])

        pdf_path = f"/Users/tillkoebe/Documents/GitHub/Forest_Monitoring/midsave/project_descriptions/pd_acr_{project_id}.pdf"
        
        pd_available = False
        for uri in pd_uris:
            response = requests.get(f'https://acr2.apx.com/{uri}', headers=headers)
            if response.status_code == 200:
                if response.content:
                    open(os.path.join(pdf_path), "wb").write(response.content)
                    pd_available = True
        
        df.loc[df.project_id == project_id, 'project_pdf_available'] = pd_available

    time.sleep(uniform(0, 5.0))

In [None]:
df['planting_date_reported'] = pd.to_datetime(df['Initial Crediting Period Start Date']).dt.year

In [None]:
gdf = (df[['Project ID', 'Project Site Country', 'planting_date_reported', 'project_pdf_available']]
       .rename(columns = {'Project ID':'project_id_reported', 
                          'Project Site Country':'country'})
      .reset_index(drop = True)
      .reset_index(names = ['project_id_created'])
      .assign(site_id_created=lambda x: x.project_id_created,
              site_sqkm=None,
              species_count_reported=None,
              species_planted_reported=None,
              survival_rate_reported=None,
              trees_planted_reported=None,
              planting_date_reported=None,
              geometry=None))

In [None]:
gdf.info()

### Save it

In [None]:
gdf = gpd.GeoDataFrame(gdf).set_crs('EPSG:4326', allow_override=True)

In [None]:
gdf.to_file('../midsave/american_carbon_registry.gpkg', driver='GPKG')