The json file contains metadata from the website https://facethefuture.com/#projects ,and the scripts shows preprocessing and shapefile generation

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Polygon, MultiPolygon, LineString, MultiLineString, Point, MultiPoint

In [None]:
df_raw = pd.read_json("../input/FaceTheFuture/FaceTheFuture.json")
df_raw.head(1)

In [None]:
df = pd.concat([df_raw.drop(columns=["properties"]), df_raw["properties"].apply(pd.Series)], axis=1)
df.info()

### Fix geometries

In [None]:
df['geometry'] = df['geometry'].apply(
    lambda geom: shape(geom) if isinstance(geom, dict) and geom.get('type') in ['Polygon', 'MultiPolygon', 'LineString', 'MultiLineString', 'Point', 'MultiPoint'] else None
)

In [None]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:3857')

In [None]:
gdf = gdf.explode(index_parts=False).reset_index(drop = True)

In [None]:
gdf['geometry'] = gdf['geometry'].make_valid()

### Harmonizing nomenclature

In [None]:
filtered_columns = ["GlobalID", "id", "TreeNumber", "ContractArea", "PlantingYear_no", "Comments", "Creator", "geometry"]
gdf = gdf[filtered_columns].copy()

In [None]:
new_column_names = {
    "GlobalID": "project_id_reported",
    "id": "site_id_reported",
    "TreeNumber": "trees_planted_reported",
    "ContractArea": "site_sqkm",
    "PlantingYear_no": "planting_date_reported",
    "Comments": "project_description_reported",
    "Creator": "host_name",
    "geometry": "geometry"
}
gdf = gdf.rename(columns=new_column_names)

In [None]:
gdf['site_sqkm'] = gdf['geometry'].area / 1e6
gdf['url'] = 'https://facethefuture.com/'
gdf = gdf.assign(species_count_reported=None,country=None, species_planted_reported=None,survival_rate_reported=None)
gdf.info()

In [None]:
gdf['site_sqkm'].describe()

In [None]:
gdf.to_crs('EPSG:4326').to_file('../midsave/face_the_future.gpkg')