In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Polygon, MultiPolygon

In [None]:
df = pd.read_json("../input/Restor_Eco/restor_eco.json")
df.head(3)

In [None]:
df.info()

### Fix geometries

In [None]:
df['geometry'] = df['polygon'].apply(lambda x: MultiPolygon([shape({'type': 'Polygon', 'coordinates': poly}) for poly in x['coordinates']]) if isinstance(x, dict) and 'type' in x and 'coordinates' in x else Polygon())

In [None]:
gdf = gpd.GeoDataFrame(df, geometry = 'geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

In [None]:
gdf = gdf.explode(index_parts = False)

In [None]:
gdf = gdf[(gdf.geometry.geom_type == 'Polygon') | (gdf.geometry.geom_type == 'MultiPolygon')]
gdf = gdf.explode(index_parts = False)
gdf = gdf.reset_index(drop = True)

In [None]:
gdf.plot()

### Harmonize nomenclature

In [None]:
gdf['site_sqkm'] = gdf['geometry'].to_crs(3857).area / 1e6
gdf['site_sqkm'].describe()

In [None]:
gdf['iso2'] = gdf['countryCode'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else '')

In [None]:
# Renaming the columns to follow our naming format in the paper columns section
columns_rename_mapping = {
    'id': 'project_id_reported',
    'description': 'description_reported',
    'interventionStartYear': 'planting_date_reported',
    'website': 'url',
     'iso2': 'country'
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [None]:
gdf['description_reported'] = gdf['description_reported'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else '')
gdf['planting_date_reported'] = gdf['planting_date_reported'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else '')
gdf['url'] = gdf['url'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else '')

In [None]:
columns_to_keep = [
     'project_id_reported',
    'description_reported',
     'planting_date_reported',
     'geometry',
     'url',
     'site_sqkm',
     'country'
]
gdf=gdf[columns_to_keep]

In [None]:
gdf['site_id_reported'] = ['restor_site_{}'.format(i) for i in range(1, len(gdf) + 1)]
gdf = gdf.assign(species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None)
gdf.info()

### Save it

In [None]:
gdf.to_file('../input/Restor_Eco/restor_eco.gpkg')