# Reforestaction
https://www.reforestaction.com/en/projects

In [1]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import requests
from urllib.parse import urljoin
from helper_functions import fetch_page_data

In [2]:


all_data = []
for page in range(1, 12):
    print(f"Fetching page {page}...")
    page_data = fetch_page_data(page)
    all_data.extend(page_data.get('data', []))

df = pd.json_normalize(all_data)

Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...


### Fix geometries

In [3]:
df['geometry'] = df.apply(lambda row: Point(row['attributes.centroidLongitude'], row['attributes.centroidLatitude']), axis=1)

In [4]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

### Harmonize nomenclature

In [5]:
gdf = gdf[['id', 'attributes.externalId', 'geometry']]

columns_rename_mapping = {
    'id': 'project_id_reported',
    'attributes.externalId':'site_id_reported'
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [6]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True  

In [7]:
gdf['host_name'] = 'reforestaction'
gdf['url'] = 'https://www.reforestaction.com/en/projects'
gdf['project_geometries_invalid'] = invalid_geom
gdf = gdf.assign(project_description_reported=None,site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,planting_date_reported=None,country=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           1100 non-null   int64   
 1   site_id_reported              1100 non-null   int64   
 2   geometry                      1100 non-null   geometry
 3   host_name                     1100 non-null   object  
 4   url                           1100 non-null   object  
 5   project_geometries_invalid    1100 non-null   bool    
 6   project_description_reported  0 non-null      object  
 7   site_sqkm                     0 non-null      object  
 8   species_count_reported        0 non-null      object  
 9   species_planted_reported      0 non-null      object  
 10  survival_rate_reported        0 non-null      object  
 11  trees_planted_reported        0 non-null      object  
 12  planting_date_reported        0 non-null

### Save it

In [8]:
gdf.to_file('../midsave/reforestaction.gpkg')