# Climate Action Reserve
https://www.climateactionreserve.org

In [27]:
import json
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.ops import transform
from tqdm import tqdm
import time
import os
from random import uniform

### Load data

In [28]:
df = pd.read_csv("../input/CAR/car_export_02_28_2025.csv", encoding="ISO-8859-1")
df.head(1)

Unnamed: 0,Project ID,ARB ID,Cooperative/ Aggregate ID,Project Developer,Project Owner,Project Name,Offset Project Operator,Authorized Project Designee,Verification Body,Project Type,...,Additional Certification(s),SDG Impact,Project Notes,Total Number of Offset Credits Registered,Project Listed Date,Project Registered Date,Documents,Data,Project Website,Unnamed: 25
0,CAR1957,CAFR6957,,Sierra Pacific Industries,Sierra Pacific Industries,2021 Fire Refo,Sierra Pacific Industries,,,Reforestation - ARB Compliance,...,,,,,04/05/2024,,View,View,,


In [29]:
df['project_id'] = df['Project ID'].str.extract(r'CAR(\d+)')

In [30]:
df = (df.loc[(df['Project Type'] == 'Reforestation - ARB Compliance') | 
              (df['Project Type'] == 'Reforestation')].copy())

In [31]:
df.project_id.unique()

array(['1957', '1783', '1954', '1491', '1490', '1487', '1953', '1163',
       '1164', '1040', '1042', '1165', '1166', '1167', '1044', '1168',
       '505'], dtype=object)

Check and, if applicable, retrieve shapefiles from project websites

In [32]:
gdf = gpd.GeoDataFrame()

for project_id in tqdm(df['project_id'].unique().tolist()):

    shp_dir = f"../input/CAR/{project_id}/project_area.shp"
    
    if os.path.exists(shp_dir):
        temp = gpd.read_file(shp_dir)
      
    
        temp['geometry'] = temp['geometry'].apply(lambda geometry: transform(lambda x, y, z=None: (x, y), geometry))

        while 'MultiPolygon' in temp.geometry.geom_type.unique().tolist():
            temp = temp.explode(index_parts=False)
            temp['geometry'] = temp['geometry'].make_valid()

        temp = temp.loc[temp.geometry.geom_type.isin(['Polygon', 'Point'])].copy()
        
        temp = temp.to_crs(4326)
        
        invalid_geom = False
        if len(temp[temp.geometry.is_valid == False]) > 0:
            invalid_geom = True

        temp['project_id'] = project_id
        temp['site_sqkm'] = temp.to_crs(3857).area/1e6
        temp['project_geometries_invalid'] = invalid_geom
    
        temp = df.merge(temp, on = 'project_id', how = 'inner')
        temp = temp[['Project ID', 'Project Listed Date', 'geometry', 'site_sqkm', 'project_geometries_invalid']]
    
    else:
        temp = (df
                .loc[df.project_id == project_id, ['Project ID', 'Project Listed Date']]
                .assign(geometry=None, site_sqkm=None, project_geometries_invalid=None))
    
    temp['url'] = 'https://thereserve2.apx.com/mymodule/reg/TabDocuments.asp?r=111&ad=Prpt&act=update&type=PRO&aProj=pub&tablename=doc&id1='+project_id

    temp['project_pdf_available'] = False

    pd_dir = f"../midsave/project_descriptions/pd_car_{project_id}.pdf"
    if os.path.exists(pd_dir):
        temp['project_pdf_available'] = True

    
    gdf = pd.concat([gdf, temp], ignore_index= True)

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  gdf = pd.concat([gdf, temp], ignore_index= True)
  gdf = pd.concat([gdf, temp], ignore_index= True)
  gdf = pd.concat([gdf, temp], ignore_index= True)
  gdf = pd.concat([gdf, temp], ignore_index= True)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  gdf = pd.concat([gdf, temp], ignore_index= True)
  gdf = pd.concat([gdf, temp], ignore_index= True)
  gdf = pd.concat([gdf, temp], ignore_index= T

In [33]:
gdf = gpd.GeoDataFrame(gdf, geometry = 'geometry', crs = 'EPSG:4326')

In [34]:
gdf.reset_index(names = ['site_id_created'], inplace = True)

In [35]:
gdf.rename(columns = {'Project ID':'project_id_reported','Project Listed Date':'planting_date_reported'}, inplace = True)

In [36]:
gdf = gdf.assign(species_count_reported=None,
              species_planted_reported=None,
              survival_rate_reported=None,
              trees_planted_reported=None)

In [47]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 513 entries, 0 to 512
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype   
---  ------                      --------------  -----   
 0   site_id_created             513 non-null    int64   
 1   project_id_reported         513 non-null    object  
 2   planting_date_reported      513 non-null    object  
 3   geometry                    504 non-null    geometry
 4   site_sqkm                   504 non-null    float64 
 5   project_geometries_invalid  504 non-null    object  
 6   url                         513 non-null    object  
 7   project_pdf_available       513 non-null    bool    
 8   species_count_reported      0 non-null      object  
 9   species_planted_reported    0 non-null      object  
 10  survival_rate_reported      0 non-null      object  
 11  trees_planted_reported      0 non-null      object  
 12  host_name                   513 non-null    object  
 13  planting_dat

In [46]:
gdf['host_name'] = 'Climate Action Reserve'
gdf["planting_date_type"]="Project Listed Date"


### Save it

In [40]:
gdf.to_file('../midsave/climate_action_reserve.gpkg', driver='GPKG')