# Veritree
https://www.veritree.com

In [2]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon

In [3]:
with open('../input/Verritree/verritree1.json', 'r') as file:
    data1 = json.load(file)
with open('../input/Verritree/verritree2.json', 'r') as file:
    data2 = json.load(file)
with open('../input/Verritree/verritree3.json', 'r') as file:
    data3 = json.load(file)

data = data1 + data2 + data3

In [4]:
if isinstance(data, list) and isinstance(data[0], dict):
    df = pd.DataFrame(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 82 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   id                                  122 non-null    int64  
 1   public_id                           122 non-null    object 
 2   planting_site_id                    122 non-null    int64  
 3   country_id                          122 non-null    int64  
 4   organization_id                     122 non-null    int64  
 5   subsite_type_id                     122 non-null    int64  
 6   plus_code                           122 non-null    object 
 7   name                                122 non-null    object 
 8   description                         122 non-null    object 
 9   legacy                              122 non-null    int64  
 10  created_at                          122 non-null    object 
 11  updated_at                          122 non-n

In [5]:
columns_rename_mapping = {
    'planting_site_id': 'site_id_reported',
    'id': 'project_id_reported',
    'geofence.coordinates': 'geometry',
    'description': 'project_description_reported',
    'planting_site.country.name': 'country',
    'forest_type.average_survival_rate': 'survival_rate_reported',
    'species': 'species_planted_reported'
}
df = df[list(columns_rename_mapping.keys())]
df.rename(columns=columns_rename_mapping, inplace=True)

In [6]:

unique_project_ids_count = df['project_id_reported'].nunique()


print(f"Number of unique project IDs: {unique_project_ids_count}")

Number of unique project IDs: 122


In [7]:
df.head(1)

Unnamed: 0,site_id_reported,project_id_reported,geometry,project_description_reported,country,survival_rate_reported,species_planted_reported
0,2,22,"[[[[46.3948, -15.9352], [46.3971, -15.9379], [...",The Betsiboka estuary is a highly dynamic fore...,Madagascar,70,"[Avicennia marina, Bruguiera gymnorrhiza, Ceri..."


### Fix geometries

In [8]:
df['geometry'] = df['geometry'].apply(lambda coords: Polygon([coord for sublist1 in coords for sublist2 in sublist1 for coord in sublist2]))

In [9]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

In [10]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True

### Harmonize nomenclature

In [11]:
gdf["host_name"] = 'Veritree'
gdf['project_geometries_invalid'] = invalid_geom
gdf["url"] = 'https://www.veritree.com'
gdf["site_sqkm"] = gdf["geometry"].to_crs('EPSG:3857').area / 1e6
gdf['species_planted_reported'] = gdf['species_planted_reported'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)

In [12]:
gdf = gdf.assign(species_count_reported=None,trees_planted_reported=None,planting_date_reported=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   site_id_reported              122 non-null    int64   
 1   project_id_reported           122 non-null    int64   
 2   geometry                      122 non-null    geometry
 3   project_description_reported  122 non-null    object  
 4   country                       122 non-null    object  
 5   survival_rate_reported        122 non-null    int64   
 6   species_planted_reported      122 non-null    object  
 7   host_name                     122 non-null    object  
 8   project_geometries_invalid    122 non-null    bool    
 9   url                           122 non-null    object  
 10  site_sqkm                     122 non-null    float64 
 11  species_count_reported        0 non-null      object  
 12  trees_planted_reported        0 non-null  

### Save it

In [13]:
gdf.to_file('../midsave/veritree.gpkg')