In [1]:
import pandas as pd
import geopandas as gpd
import requests

from shapely.geometry import Polygon

from helper_functions import remove_trailing_zeros

In [2]:


url = "https://atlas.openforestprotocol.org/_next/data/QYgQ6w94CfP8KqSblp6WH/index.json"

response = requests.get(url)


if response.status_code == 200:
    data = response.json()


    hits = data.get("pageProps", {}).get("projects", {}).get("hits", {}).get("hits", [])

 
    df = pd.json_normalize(hits, sep='_')

    # Cleaning up column names
    df.columns = df.columns.str.replace(r'^_', '', regex=True)  # Remove leading underscores
    df.columns = df.columns.str.replace(r'source_', '', regex=True)  # Remove '_source_' prefi


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Data columns (total 27 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   index                         285 non-null    object 
 1   id                            285 non-null    object 
 2   seq_no                        0 non-null      object 
 3   primary_term                  0 non-null      object 
 4   sort                          285 non-null    object 
 5   project_id                    285 non-null    object 
 6   type                          285 non-null    object 
 7   regionShortCode               284 non-null    object 
 8   developer                     285 non-null    object 
 9   name                          285 non-null    object 
 10  region                        285 non-null    object 
 11  polygonCoordinate             285 non-null    object 
 12  square                        285 non-null    object 
 13  cidSc

In [4]:
# df = pd.read_json('../input/Atlas/Atlas_data_frame.json')
# df.info()

### Clean up geometry column

Remove excessive zeros

In [5]:
df['polygonCoordinate']

0      [[[-97.9555704, 30.136472, 0], [-97.9555678, 3...
1      [[[79.4147657, 12.90351, 0], [79.4147415, 12.9...
2      [[[34.50747555742856, 0.2299028441952135, 1370...
3      [[[-89.53625081492414, 16.365542322241154], [-...
4      [[[-89.528162, 16.922236, 0], [-89.5273918, 16...
                             ...                        
280    [[[-76.4483506369876, -6.384513412496722, 0], ...
281    [[[-76.12956027421468, -6.602165490968678], [-...
282    [[[-76.1045134, -6.5638766, 0], [-76.1045175, ...
283    [[[-76.1580794, -6.6076822, 0], [-76.1580911, ...
284    [[[-76.0242417, -6.57401, 0], [-76.0242005, -6...
Name: polygonCoordinate, Length: 285, dtype: object

In [6]:
def remove_third_value(coord):

    return coord[:2] if len(coord) > 2 else coord

def remove_third_value_from_list(coord_list):
    return [[remove_third_value(coord) for coord in sublist] for sublist in coord_list]

df['geometry'] = df['polygonCoordinate'].apply(remove_third_value_from_list)

Adjust format of coordinates

In [7]:

df['geometry'] = df['geometry'].apply(lambda x: Polygon(x[0]))

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

print(gdf)


        index             id seq_no primary_term                   sort  \
0    projects  1658928733175   None         None  [1659029210203715858]   
1    projects  1661949769192   None         None  [1661949830372869074]   
2    projects  1662054172732   None         None  [1662483722835113383]   
3    projects  1662665088823   None         None  [1662665620078210122]   
4    projects  1659544487696   None         None  [1665176192519129488]   
..        ...            ...    ...          ...                    ...   
280  projects  1744146266171   None         None  [1744147021315507477]   
281  projects  1744192265189   None         None  [1744192471787709343]   
282  projects  1744135855654   None         None  [1744193102468158291]   
283  projects  1744193016071   None         None  [1744193295644616355]   
284  projects  1744299155214   None         None  [1744299278470368105]   

        project_id           type regionShortCode  \
0    1658928733175   conservation             

In [8]:
gdf = gpd.GeoDataFrame(df, geometry = 'geometry', crs = 'EPSG:4326')


In [9]:
gdf = gdf.explode(index_parts=False)

Dismiss polygons with faulty coordinates

In [10]:
gdf['geometry'] = gdf['geometry'].apply(
    lambda geom: Polygon() if not geom.is_empty and any(abs(coord) > 180 for x, y in geom.exterior.coords for coord in (x, y)) else geom
)

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


In [11]:
gdf['geometry'] = gdf.make_valid()

### Harmonize nomenclature

In [12]:
gdf['site_sqkm'] = gdf['geometry'].to_crs(3857).area / 1e6

In [13]:
columns_rename_mapping = {
    'project_id': 'project_id_reported',
    'id': 'site_id_reported',
    'regionShortCode': 'country',
    'aboutProject': 'project_description_reported'
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [14]:
columns_to_keep = ['project_id_reported','site_id_reported','project_description_reported','geometry','site_sqkm','country']

gdf = gdf[columns_to_keep]

In [15]:
gdf['host_name'] = 'Open Forest Protocol'
gdf['url'] = 'https://atlas.openforestprotocol.org/'
gdf = gdf.assign(species_count_reported=None, species_planted_reported=None,planting_date_reported=None,survival_rate_reported=None,trees_planted_reported=None)

gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 285 entries, 0 to 284
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           285 non-null    object  
 1   site_id_reported              285 non-null    object  
 2   project_description_reported  285 non-null    object  
 3   geometry                      285 non-null    geometry
 4   site_sqkm                     285 non-null    float64 
 5   country                       284 non-null    object  
 6   host_name                     285 non-null    object  
 7   url                           285 non-null    object  
 8   species_count_reported        0 non-null      object  
 9   species_planted_reported      0 non-null      object  
 10  planting_date_reported        0 non-null      object  
 11  survival_rate_reported        0 non-null      object  
 12  trees_planted_reported        0 non-null      o

In [16]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True
gdf['project_geometries_invalid'] = invalid_geom

In [17]:
gdf.head()

Unnamed: 0,project_id_reported,site_id_reported,project_description_reported,geometry,site_sqkm,country,host_name,url,species_count_reported,species_planted_reported,planting_date_reported,survival_rate_reported,trees_planted_reported,project_geometries_invalid
0,1658928733175,1658928733175,Juniper Forest is a mixed forest consisting pr...,"POLYGON ((-97.95557 30.13647, -97.95557 30.136...",0.00562,us,Open Forest Protocol,https://atlas.openforestprotocol.org/,,,,,,False
1,1661949769192,1661949769192,Hybrid Miyawaki forest,"POLYGON ((79.41477 12.90351, 79.41474 12.90284...",0.002786,in,Open Forest Protocol,https://atlas.openforestprotocol.org/,,,,,,False
2,1662054172732,1662054172732,This is an area where trees have been cut and ...,"POLYGON ((34.50748 0.22990, 34.50679 0.22942, ...",0.015105,ke,Open Forest Protocol,https://atlas.openforestprotocol.org/,,,,,,False
3,1662665088823,1662665088823,We have developed and implemented a decentrali...,"POLYGON ((-89.53625 16.36554, -89.53501 16.360...",0.333961,gt,Open Forest Protocol,https://atlas.openforestprotocol.org/,,,,,,False
4,1659544487696,1659544487696,"Once a tropical forest landscape, this zone ha...","POLYGON ((-89.52816 16.92224, -89.52739 16.930...",0.321885,gt,Open Forest Protocol,https://atlas.openforestprotocol.org/,,,,,,False


In [18]:
gdf.to_file('../midsave/atlas.gpkg')