### Projects Meta Data

In [1]:
# the file downloaded from planet websites https://app.plant-for-the-planet.org/app/projects?_scope=map&currency=EUR&tenant=ten_NxJq55pm&filter[purpose]=trees,conservation&locale=en
import pandas as pd
import json
import requests
import ast
from shapely.geometry import shape, Polygon, MultiPolygon,mapping
from shapely.ops import transform
import pyproj
import numpy as np
from scipy import stats
from shapely import wkt
import geopandas as gpd

from helper_functions import remove_not_geom, count_all_polygons, split_multipolygon, convert_3d_to_2d

# Retrieve website information

In [2]:
df=pd.read_csv("../input/Plant_for_planet_shapefiles/plantforplanet_projects.csv")

In [3]:
projects_dict = df.groupby('properties.id').apply(lambda x: x.drop('properties.id', axis=1).to_dict(orient='records')).to_dict()
projects_dict

  projects_dict = df.groupby('properties.id').apply(lambda x: x.drop('properties.id', axis=1).to_dict(orient='records')).to_dict()


{'proj_08KPr5UC3XliXwWgDqNgiIHs': [{'Unnamed: 0': 65,
   'type': 'Feature',
   'geometry.type': 'Point',
   'geometry.coordinates': '[40.739686581322246, -2.317548419577491]',
   'properties._scope': 'map',
   'properties.allowDonations': True,
   'properties.classification': 'large-scale-planting',
   'properties.countPlanted': 813710.0,
   'properties.countTarget': 1800000.0,
   'properties.country': 'KE',
   'properties.currency': 'EUR',
   'properties.fixedRates': '[]',
   'properties.image': '631bb1e028d86881019932.jpg',
   'properties.isApproved': True,
   'properties.isFeatured': False,
   'properties.isPublished': True,
   'properties.isTopProject': True,
   'properties.location': 'Kenya',
   'properties.minTreeCount': 2.0,
   'properties.name': 'Eden Reforestation Projects - Kenya',
   'properties.paymentDefaults.fixedTreeCountOptions': '[10, 25, 50, 100]',
   'properties.paymentDefaults.fixedDefaultTreeCount': 5.0,
   'properties.purpose': 'trees',
   'properties.reviewScore'

In [4]:
all_projects = df['properties.id'].unique()

project_info = {}
for slug in all_projects:
    url = f"https://app.plant-for-the-planet.org/app/projects/{slug}?_scope=extended&currency=EUR&locale=en"
    response = requests.get(url)
    if response.status_code == 200:
        project_info[slug] = response.json()
    else:
        project_info[slug] = f"Failed to retrieve information for {slug}"

with open('../input/Plant_for_planet_shapefiles/project_info.json', 'w') as json_file:
    json.dump(project_info, json_file)

print("Project information saved to '../input/Plant_for_planet_shapefiles/project_info.json'")

Project information saved to '../input/Plant_for_planet_shapefiles/project_info.json'


# Preprocessing

In [5]:
# Reading the all the data in the json file and we transpose to have the right format table
json_file_path ='../input/Plant_for_planet_shapefiles/project_info.json'
raw_df = pd.read_json(json_file_path).T
raw_df.head(5)

Unnamed: 0,id,_scope,allowDonations,certificates,classification,coordinates,countDonated,countPlanted,countRegistered,countTarget,...,treeCost,unitCost,unitType,unitsContributed,unitsTargeted,videoUrl,visitorAssistance,website,yearAbandoned,yearAcquired
proj_ezpAp1POh20dBnYpx0BjhU35,proj_ezpAp1POh20dBnYpx0BjhU35,extended,True,[],agroforestry,"{'lon': -49.5903, 'lat': -9.2045}",313,313,0,2000,...,3.9,3.9,tree,{'tree': 313.0},{'tree': 2000},https://www.youtube.com/watch?v=9GrWYgJnFqM,True,https://ecosystemrestorationcamps.org/camp-far...,2012,2012.0
proj_ZCspL8JYmUu0OXcx6O73I1j0,proj_ZCspL8JYmUu0OXcx6O73I1j0,extended,True,[],other-planting,"{'lon': -43.468833, 'lat': -22.495032}",3572,3573,0,1000000,...,4.9,4.9,tree,{'tree': 3654.2},{'tree': 1000000},,True,http://www.itpa.org.br,2012,
proj_nXBzA2sbX2tm1D75p7bfJ81Z,proj_nXBzA2sbX2tm1D75p7bfJ81Z,extended,True,[],large-scale-planting,"{'lon': -1.979117, 'lat': 8.2204420012607}",48852,48852,0,10000000,...,2.0,2.0,tree,{'tree': 49371.6},{'tree': 10000000},https://youtu.be/f7koI_yKNtI?si=Wtu9dCV8mcCMZxq2,True,https://www.plant-for-the-planet.org/plant-for...,1996,2020.0
proj_wMNVTLkRIZ4TcV8oi26wlRwN,proj_wMNVTLkRIZ4TcV8oi26wlRwN,extended,True,[],agroforestry,"{'lon': 104.791856, 'lat': 20.700794}",155,155,0,100000,...,8.5,8.5,tree,{'tree': 155.0},{'tree': 100000},,True,trofaco.org,2018,2021.0
proj_vH52lGTvcjbCGKG4dINKsXJU,proj_vH52lGTvcjbCGKG4dINKsXJU,extended,False,[],agroforestry,"{'lon': -67.945206, 'lat': 6.169667}",24,30024,30000,212500,...,13.53,13.53,tree,{'tree': 24},{'tree': 212500},,False,https://www.kardianuts.com/,0,


In [6]:
raw_df.rename(columns= {"id":"project_id_reported", 
                        "description":"project_description_reported",
                        "countPlanted":"trees_planted_reported", 
                        "firstTreePlanted": "planting_date_reported",
                        "survivalRate": "survival_rate_reported"}, inplace = True)

In [7]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 263 entries, proj_ezpAp1POh20dBnYpx0BjhU35 to proj_YeRIxbP2YYWz27zTOzZi5wFH
Data columns (total 61 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   project_id_reported           263 non-null    object
 1   _scope                        263 non-null    object
 2   allowDonations                263 non-null    object
 3   certificates                  263 non-null    object
 4   classification                257 non-null    object
 5   coordinates                   263 non-null    object
 6   countDonated                  257 non-null    object
 7   trees_planted_reported        257 non-null    object
 8   countRegistered               257 non-null    object
 9   countTarget                   263 non-null    object
 10  country                       263 non-null    object
 11  currency                      263 non-null    object
 12  degradationCause             

### Convert nested polygons to one polygon per row

In [8]:
expanded_rows = []
for _, row in raw_df.iterrows():
    for site in row['sites']:
        
        site_data = {
            'project_id_reported': row['project_id_reported'],
            'geometry': site['geometry'],
            'site_id_reported': site['properties']['id'],
            'site_description_reported': site['properties']['description'],
            'site_status_reported': site['properties']['status']
        }
        expanded_rows.append(site_data)

df = pd.DataFrame(expanded_rows)

In [9]:
df['geometry'] = df['geometry'].apply(lambda x: shape(x) if x is not None else Polygon())

In [10]:
df[df.geometry.isna()]

Unnamed: 0,project_id_reported,geometry,site_id_reported,site_description_reported,site_status_reported


In [11]:
raw_gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')

In [12]:
raw_gdf.head(5)

Unnamed: 0,project_id_reported,geometry,site_id_reported,site_description_reported,site_status_reported
0,proj_ezpAp1POh20dBnYpx0BjhU35,"POLYGON ((-49.95883 -9.35107, -49.95976 -9.351...",site_W97pqKxXURFOA1E,Farm for the Future demonstration plot,planting
1,proj_ZCspL8JYmUu0OXcx6O73I1j0,"POLYGON Z ((-43.47250 -22.48945 0.00000, -43.4...",site_NekKEGqkIO4rZ5C,The area to be reforested is around the Tinguá...,planting
2,proj_ZCspL8JYmUu0OXcx6O73I1j0,"POLYGON Z ((-43.46200 -22.47790 0.00000, -43.4...",site_Wl3hF91IBkei1Xy,The area to be reforested is around the Tinguá...,planting
3,proj_ZCspL8JYmUu0OXcx6O73I1j0,"POLYGON Z ((-43.46833 -22.49190 0.00000, -43.4...",site_qHUXswEmePqou5T,The area to be reforested is around the Tinguá...,planting
4,proj_nXBzA2sbX2tm1D75p7bfJ81Z,"POLYGON ((-1.99088 8.22041, -1.99354 8.23531, ...",site_2ITLGnOa3jbDUFa,Plant-for-Ghana is a pioneer reforestation pro...,planting


Check if raw geometry contains invalid geometries

Explode multipolygons to multiple simple polygons

In [13]:
gdf = raw_gdf.copy()

while 'MultiPolygon' in gdf.geometry.geom_type.unique().tolist():
    gdf = gdf.explode(index_parts=False)
    gdf['geometry'] = gdf['geometry'].make_valid()

Only keep polygons and points

In [14]:
gdf = gdf.loc[gdf.geometry.geom_type.isin(['Polygon', 'Point'])].copy()

Simplify 3D to 2D

In [15]:
gdf['geometry'] = gdf['geometry'].apply(convert_3d_to_2d)

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


In [16]:
# Resetting the index without keeping the old index as a column
gdf.reset_index(drop=True, inplace=True)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3382 entries, 0 to 3381
Data columns (total 5 columns):
 #   Column                     Non-Null Count  Dtype   
---  ------                     --------------  -----   
 0   project_id_reported        3382 non-null   object  
 1   site_id_reported           3382 non-null   object  
 2   site_description_reported  1972 non-null   object  
 3   site_status_reported       3380 non-null   object  
 4   geometry                   3382 non-null   geometry
dtypes: geometry(1), object(4)
memory usage: 132.2+ KB


Add area sizes

In [17]:
gdf['site_sqkm'] = gdf['geometry'].to_crs(3857).area / 1e6

In [18]:
gdf['site_sqkm'].describe()

count     3382.000000
mean        62.283820
std        897.704562
min          0.000000
25%          0.005371
50%          0.023971
75%          0.110689
max      37409.490953
Name: site_sqkm, dtype: float64

### Add project-level information

In [19]:
gdf = gdf.merge(raw_df[['project_id_reported', 'trees_planted_reported', 'country',"project_description_reported","planting_date_reported","survival_rate_reported"]],
                on = 'project_id_reported', how = 'left')

Fix datetime

In [20]:
gdf['planting_date_reported'] = pd.to_datetime(gdf['planting_date_reported']).dt.year

In [21]:
gdf['planting_date_reported']

0          NaN
1       2012.0
2       2012.0
3       2012.0
4       2021.0
         ...  
3377       NaN
3378       NaN
3379       NaN
3380       NaN
3381    2022.0
Name: planting_date_reported, Length: 3382, dtype: float64

In [22]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True  

In [23]:
gdf['host_name'] = 'Planet for the Planet'
gdf['url'] = 'https://web.plant-for-the-planet.org/en/' + gdf.project_id_reported
gdf['species_count_reported'] = None
gdf['species_planted_reported'] = None

gdf['project_geometries_invalid'] = invalid_geom

In [24]:
gdf.to_file('../midsave/plant_for_planet.gpkg')