# Reforestum
https://reforestum.com

In [1]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [2]:
with open('../input/Reforestum/forests.json', 'r') as file:
    data = json.load(file)
type(data)

list

In [3]:
if isinstance(data, list) and isinstance(data[0], dict):
    df = pd.DataFrame(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3691 entries, 0 to 3690
Data columns (total 17 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   status                           3691 non-null   int64  
 1   data.forests.id                  3691 non-null   object 
 2   data.forests.name                3691 non-null   object 
 3   data.forests.short_desc          2082 non-null   object 
 4   data.forests.location_desc       3688 non-null   object 
 5   data.forests.total_surface       3691 non-null   int64  
 6   data.forests.reforested_surface  3691 non-null   int64  
 7   data.forests.occupied_surface    3691 non-null   int64  
 8   data.forests.main_image          3691 non-null   object 
 9   data.forests.available           3691 non-null   bool   
 10  data.forests.offset_type         3691 non-null   object 
 11  data.forests.timeline_formula    16 non-null     object 
 12  data.forests.co2_yea

In [4]:
df['country'] = df['data.forests.location_desc'].astype(str).apply(lambda x: x.split()[-1] if isinstance(x, str) else '')
df.head()

Unnamed: 0,status,data.forests.id,data.forests.name,data.forests.short_desc,data.forests.location_desc,data.forests.total_surface,data.forests.reforested_surface,data.forests.occupied_surface,data.forests.main_image,data.forests.available,data.forests.offset_type,data.forests.timeline_formula,data.forests.co2_years,data.forests.co2_sqm,data.forests.forest_slug,data.forests.captured_co2,data.forests.coordinates,country
0,200,1,Génesis,"Génesis, our first forest. Crowdfunded in 2017.","Picos de Europa, Spain",40000,40000,40000,https://s3.eu-central-1.amazonaws.com/reforest...,False,SQM-SHARES,"if(y<20){r = 0.0000672*(Math.pow(Math.E,(0.372...",25,0.1777755102,genesis,39883.9013,"[-4.3529, 42.7489]",Spain
1,200,5,Calahorra de Boedo,"Calahorra de Boedo, Reforestum's first project...","Boedo-Ojeda county, north of Palencia, Spain",220000,220000,220000,https://statics.reforestum.com/forests/galleri...,False,SQM-SHARES,return (22.88908544779504 / (1 + Math.exp(-0.1...,40,0.01998192499,calahorra,0.0,"[-4.3974, 42.5673]",Spain
2,200,6,ACR Forest,This forest covers an area of 2.12 hectares an...,"Fresno del Río, Palencia, Spain",20000,20000,20000,https://statics.reforestum.com/forests/galleri...,False,SQM-SHARES,return (21.90292305405437 / (1 + Math.exp(-0.1...,40,0.02146226415,acr,0.0,"[-4.8201, 42.6972]",Spain
3,200,7,Bosque Kiehl's,The Kiehl's forest is an afforestation project...,"Santa Cruz de Boedo, Palencia, Spain",101400,101400,101400,https://statics.reforestum.com/forests/galleri...,False,SQM-SHARES,return (55.435503028539 / (1 + Math.exp(-0.177...,40,0.04720641026,santa-cruz-de-boedo,0.0,"[-4.3818, 42.5442]",Spain
4,200,9,Galeries Lafayette Forest,"This forest, located in Loire Atlantique, Fran...","Loire Atlantique, France",33200,33200,33200,https://statics.reforestum.com/forests/galleri...,False,SQM-SHARES,return (27.351921932309804 / (1 + Math.exp(-0....,30,0.02680722892,galeries-lafayette,0.0,"[-1.3546, 47.5379]",France


In [5]:
columns_rename_mapping = {
    'data.forests.id': 'project_id_reported',
    'data.forests.coordinates': 'geometry',
    'data.forests.short_desc': 'project_description_reported',
    'country': 'country',
    'data.forests.reforested_surface':'site_sqkm'
}
df = df[list(columns_rename_mapping.keys())]
df.rename(columns=columns_rename_mapping, inplace=True)

### Fix geometries

In [6]:
df['geometry'] = df['geometry'].apply(lambda x: Point(x))

In [7]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

In [8]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True

### Harmonize nomenclature

In [9]:
gdf['site_sqkm'] = gdf['site_sqkm'] / 1e6
gdf['url'] = 'https://reforestum.com'
gdf['host_name'] = 'reforestum'
gdf['project_geometries_invalid'] = invalid_geom

In [10]:
gdf = gdf.assign(site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,planting_date_reported=None,country=None,site_id_reported=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3691 entries, 0 to 3690
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           3691 non-null   object  
 1   geometry                      3691 non-null   geometry
 2   project_description_reported  2082 non-null   object  
 3   country                       0 non-null      object  
 4   site_sqkm                     0 non-null      object  
 5   url                           3691 non-null   object  
 6   host_name                     3691 non-null   object  
 7   project_geometries_invalid    3691 non-null   bool    
 8   species_count_reported        0 non-null      object  
 9   species_planted_reported      0 non-null      object  
 10  survival_rate_reported        0 non-null      object  
 11  trees_planted_reported        0 non-null      object  
 12  planting_date_reported        0 non-null

In [11]:
gdf.to_file('../midsave/reforestum.gpkg')