# ExplorerLand data
https://explorer.land

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Polygon, MultiPolygon, LineString, MultiLineString, Point, MultiPoint
import json
import requests
import time
from random import uniform

In [None]:
projects = pd.DataFrame()
for page in range(0, 10):
    url = f"https://api.explorer.land/v1/public/projects?listed=1&status_ids[]=e6b4q7jev3n8xd50&status_ids[]=exm7vzjapj0r5846&order_by_status=1&include=activity_types,ecosystem_types,partners,tabs,countries,modules&limit=1000&page={page}"
    response = requests.get(url).json()
    projects = pd.concat([projects, pd.json_normalize(response['data'], sep='_')], ignore_index = True)

In [None]:
projects = projects.rename(columns = {'slug':'project_id_reported','long_description':'project_description_reported'})

In [None]:
projects

In [None]:
sites = pd.DataFrame()
for project in projects.project_id_reported.unique().tolist():
    url = f"https://api.explorer.land/v1/public/spots/polygon?project_slug={project}"
    response = requests.get(url).json()
    if response['features']:
        temp = pd.json_normalize(response['features'], sep='_')
        temp['project_id_reported'] = project
        sites = pd.concat([sites, temp], ignore_index = True)
        time.sleep(uniform(0, 5.0))

In [None]:
raw_df = sites.copy()

In [None]:
raw_df["geometry"] = raw_df["geometry_coordinates"].apply(lambda coords: MultiPolygon([Polygon(p[0]) for p in coords]))

In [None]:
gdf = gpd.GeoDataFrame(raw_df, geometry='geometry', crs = 'EPSG:4326')

### Fix geometries

Check if input geometries are valid

In [None]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True    

Combine linestrings to polygons, if applicable

In [None]:
gdf['geometry'] = gdf['geometry'].apply(
    lambda geom: Polygon(list(geom.coords) + [geom.coords[0]]) if isinstance(geom, LineString) and not geom.is_closed and len(geom.coords) > 0 else
                 Polygon(geom.coords) if isinstance(geom, LineString) and geom.is_closed else
                 geom
)

Explode multipolygons to individual polygons

In [None]:
while 'MultiPolygon' in gdf.geometry.geom_type.unique().tolist():
    gdf = gdf.explode(index_parts=False)
    gdf['geometry'] = gdf['geometry'].make_valid()

Keep only polygons and points

In [None]:
gdf = gdf.loc[gdf.geometry.geom_type.isin(['Polygon', 'Point'])].copy()

Keep only reforestation sites

In [None]:
gdf = gdf.loc[gdf.properties_category_label == 'Reforestation']

Create site-level id

In [None]:
gdf = gdf.reset_index(drop = True).reset_index(names=['site_id_created'])

Add project-level description

In [None]:
gdf = (gdf
       .loc[:, ['site_id_created', 'properties_slug', 'project_id_reported', 'properties_description', 'geometry']]
       .rename(columns = {'properties_slug':'site_id_reported','properties_description':'site_description_reported'})
       .merge(projects[['project_id_reported', 'project_description_reported']], on = 'project_id_reported', how = 'left')
       .drop_duplicates()
       .reset_index(drop = True))

### Harmonize nomenclature

In [None]:
gdf['host_name'] = 'Explorer Land'
gdf['url'] = 'https://explorer.land/x/project/' + gdf.project_id_reported + '/site/' + gdf.site_id_reported + '/about'
gdf['site_sqkm'] = gdf['geometry'].to_crs('EPSG:3857').area / 1e6
gdf['project_geometries_invalid'] = invalid_geom

In [None]:
gdf.plot()

In [None]:
gdf['site_sqkm'].describe()

### Save it

In [None]:
gdf.to_file("../midsave/explorer_land.gpkg")