### Projects Meta Data

In [None]:
# the file downloaded from planet websites https://app.plant-for-the-planet.org/app/projects?_scope=map&currency=EUR&tenant=ten_NxJq55pm&filter[purpose]=trees,conservation&locale=en
import pandas as pd
import json
import requests
import ast
from shapely.geometry import shape, Polygon, MultiPolygon,mapping
from shapely.ops import transform
import pyproj
import numpy as np
from scipy import stats
from shapely import wkt
import geopandas as gpd

from helper_functions import remove_not_geom, count_all_polygons, split_multipolygon, convert_3d_to_2d

# Retrieve website information

In [None]:
df=pd.read_csv("../input/Plant_for_planet_shapefiles/plantforplanet_projects.csv")

In [None]:
projects_dict = df.groupby('properties.id').apply(lambda x: x.drop('properties.id', axis=1).to_dict(orient='records')).to_dict()
projects_dict

In [None]:
all_projects = df['properties.id'].unique()

project_info = {}
for slug in all_projects:
    url = f"https://app.plant-for-the-planet.org/app/projects/{slug}?_scope=extended&currency=EUR&locale=en"
    response = requests.get(url)
    if response.status_code == 200:
        project_info[slug] = response.json()
    else:
        project_info[slug] = f"Failed to retrieve information for {slug}"

with open('../input/Plant_for_planet_shapefiles/project_info.json', 'w') as json_file:
    json.dump(project_info, json_file)

print("Project information saved to '../input/Plant_for_planet_shapefiles/project_info.json'")

# Preprocessing

In [None]:
# Reading the all the data in the json file and we transpose to have the right format table
json_file_path ='../input/Plant_for_planet_shapefiles/project_info.json'
raw_df = pd.read_json(json_file_path).T
raw_df.head(5)

In [None]:
raw_df.rename(columns= {"id":"project_id_reported", 
                        "description":"project_description_reported",
                        "countPlanted":"trees_planted_reported", 
                        "firstTreePlanted": "planting_date_reported",
                        "survivalRate": "survival_rate_reported"}, inplace = True)

In [None]:
raw_df.info()

### Convert nested polygons to one polygon per row

In [None]:
expanded_rows = []
for _, row in raw_df.iterrows():
    for site in row['sites']:
        
        site_data = {
            'project_id_reported': row['project_id_reported'],
            'geometry': site['geometry'],
            'site_id': site['properties']['id'],
            'site_description': site['properties']['description'],
            'site_status': site['properties']['status']
        }
        expanded_rows.append(site_data)

df = pd.DataFrame(expanded_rows)

In [None]:
df['geometry'] = df['geometry'].apply(lambda x: shape(x) if x is not None else Polygon())

In [None]:
df[df.geometry.isna()]

In [None]:
raw_gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')

In [None]:
raw_gdf.head(5)

Explode multipolygons to multiple simple polygons

In [None]:
gdf = raw_gdf.explode()

In [None]:
gdf.head(5)

Simplify 3D to 2D

In [None]:
gdf['geometry'] = gdf['geometry'].apply(convert_3d_to_2d)

In [None]:
gdf['geometry'] = gdf['geometry'].make_valid()

In [None]:
# Resetting the index without keeping the old index as a column
gdf.reset_index(drop=True, inplace=True)
gdf.info()

Add area sizes

In [None]:
gdf['polygon_areas_km'] = gdf['geometry'].to_crs(3857).area / 1e6

In [None]:
gdf['polygon_areas_km'].describe()

### Add project-level information

In [None]:
gdf = gdf.merge(raw_df[['project_id_reported', 'trees_planted_reported', 'country',"project_description_reported","planting_date_reported","survival_rate_reported"]],
                on = 'project_id_reported', how = 'left')

In [None]:
gdf['host_name'] = 'Planet for the Planet'
gdf['url'] = 'https://www.plant-for-the-planet.org/'
gdf['species_count_reported'] = None
gdf['species_planted_reported'] = None

In [None]:
gdf.info()

In [None]:
gdf.to_file('../input/Plant_for_planet_shapefiles/plant_planet.gpkg')