In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon

from helper_functions import remove_trailing_zeros

In [None]:
df = pd.read_json('../input/Atlas/Atlas_data_frame.json')
df.info()

### Clean up geometry column

Remove excessive zeros

In [None]:
df['polygonCoordinate'] = df['polygonCoordinate'].apply(remove_trailing_zeros)

Adjust format of coordinates

In [None]:
updated_polygon_coords = []

for index, row in df.iterrows():
   
    coords = list(map(float, row['polygonCoordinate'].split(',')))
    
    half = len(coords) // 2
    lats = coords[:half]
    longs = coords[half:]
    
    polygon_coords = list(zip(lats, longs))
   
    updated_polygon_coords.append(polygon_coords)

df['geometry'] = updated_polygon_coords
df['geometry'] = df['geometry'].apply(lambda coords: Polygon(coords))

In [None]:
gdf = gpd.GeoDataFrame(df, geometry = 'geometry', crs = 'EPSG:4326')


In [None]:
gdf = gdf.explode(index_parts=False)

Dismiss polygons with faulty coordinates

In [None]:
gdf['geometry'] = gdf['geometry'].apply(
    lambda geom: Polygon() if not geom.is_empty and any(abs(coord) > 180 for x, y in geom.exterior.coords for coord in (x, y)) else geom
)

In [None]:
gdf['geometry'] = gdf.make_valid()

### Harmonize nomenclature

In [None]:
gdf['site_sqkm'] = gdf['geometry'].to_crs(3857).area / 1e6

In [None]:
columns_rename_mapping = {
    'project_id': 'project_id_reported',
    'id': 'site_id_reported',
    'regionShortCode': 'country',
    'aboutProject': 'description_reported'
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [None]:
columns_to_keep = ['project_id_reported','site_id_reported','description_reported','geometry','site_sqkm','country']

gdf = gdf[columns_to_keep]

In [None]:
gdf['host_name'] = 'Open Forest Protocol'
gdf['url'] = 'https://atlas.openforestprotocol.org/'
gdf = gdf.assign(species_count_reported=None, species_planted_reported=None,planting_date_reported=None,survival_rate_reported=None,trees_planted_reported=None)

gdf.info()

In [None]:
gdf.to_file('../input/Atlas/atlas_data.gpkg')