In [10]:

import geopandas as gpd
import requests
import pandas as pd
import json
from shapely.geometry import Polygon , shape
import ast
from shapely.ops import transform
from shapely import wkt
from pyproj import CRS, Transformer

from helper_functions import convert_to_list

In [9]:


base_url = "https://tree-nation.com/api/projects/"


response = requests.get(base_url)


if response.status_code == 200:
 
    projects_data = response.json()
    
   
    df = pd.json_normalize(projects_data)
    
 
    results_df = pd.DataFrame()

    # Looping through the "id" column in the DataFrame
    for project_id in df['id']:
       
        full_url = f"{base_url}{project_id}/planting-sites"
        
        # Making the API request
        response = requests.get(full_url)
        
        # Checking if the response code is 200
        if response.status_code == 200:
           
            project_data = response.json()
            
      
            project_df = pd.json_normalize(project_data)
            
            # Add the project data to the results DataFrame
            results_df = pd.concat([results_df, project_df], ignore_index=True)
        else:
            print(f"Error for project_id {project_id}: Status Code {response.status_code}")

### Process geometries

Convert nested lists and swap geometries

In [11]:
results_df['polygon_data'] = results_df['polygon_data'].apply(convert_to_list)

Failed to convert string to list: 
Failed to convert string to list: 
Failed to convert string to list: [[47.8474433568637,-93.9927748606445],[47.8474475967424,-93.99436354110921],[47.8474771863262,-93.99984143271421],[47.8474918961176,-94.0025858935182],[47.8475023559689,-94.00454202409131],[47.8475065059098,-94.005319324319],[47.8475355754934,-94.01079720592368],[47.8475540452268,-94.0143039069508],[47.84755612514059,-94.01543223728061],[47.8475576650761,-94.01627532752708],[47.84759847467,-94.02163745909979],[47.8476390242638,-94.0269996006722],[47.84767932385751,-94.03236172224459],[47.84771937345119,-94.0377238938168],[47.84762650302431,-94.0431273853785],[47.8475333725973,-94.04853084694001],[47.8474714123138,-94.0521175779764],[47.8474400021702,-94.0539342985013],[47.8474018419961,-94.05613733913782],[47.8473463617431,-94.0593377000625],[47.8472745913197,-94.0647293416239],[47.8472025808962,-94.07012098318502],[47.8471303004726,-94.0755125747461],[47.84705845005299,-94.080852756

Turn into proper geometries

In [12]:
results_df['geometry'] = results_df['polygon_data'].apply(lambda x: Polygon(x) if x is not None and len(x) >= 4 else Polygon())

In [13]:
gdf = gpd.GeoDataFrame(results_df, geometry = 'geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

Explode multipolygons

In [14]:
gdf = gdf.explode(index_parts=False)

Get area size

In [15]:
gdf['site_sqkm'] = gdf['geometry'].to_crs(3857).area / 1e6

In [16]:
gdf['site_sqkm'].describe()

count    1.268000e+03
mean     1.109628e+02
std      2.149613e+03
min      0.000000e+00
25%      9.182858e-08
50%      5.805825e-04
75%      2.267890e-01
max      7.449515e+04
Name: site_sqkm, dtype: float64

### Preparing columns

In [17]:
columns_rename_mapping = {
    'project_id': 'project_id_reported',
    'TreeCount': 'trees_planted_reported',
    'description': 'project_description_reported',
    'created_at': 'planting_date_reported',
    'id': 'site_id_reported',
    'image': 'url',
}

gdf.rename(columns=columns_rename_mapping, inplace=True)

In [18]:
gdf.drop(columns=["name", "slug", "address", "polygon_data"], inplace=True)

Fix planting date

In [19]:
gdf['planting_date_reported'] = pd.to_datetime(gdf['planting_date_reported']).dt.year

In [20]:
gdf['host_name'] = 'Tree Nation'
gdf['url'] = 'https://tree-nation.com/projects'
gdf = gdf.assign(species_count_reported=None, species_planted_reported=None,country=None,survival_rate_reported=None)

gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1268 entries, 0 to 675
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   planting_date_reported        1268 non-null   int32   
 1   project_description_reported  1268 non-null   object  
 2   site_id_reported              1268 non-null   int64   
 3   url                           1268 non-null   object  
 4   project_id_reported           1268 non-null   int64   
 5   geometry                      1268 non-null   geometry
 6   site_sqkm                     1268 non-null   float64 
 7   host_name                     1268 non-null   object  
 8   species_count_reported        0 non-null      object  
 9   species_planted_reported      0 non-null      object  
 10  country                       0 non-null      object  
 11  survival_rate_reported        0 non-null      object  
dtypes: float64(1), geometry(1), int32(1), int64(2)

In [22]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True   

gdf['project_geometries_invalid'] = invalid_geom

### Save it

In [24]:
gdf.to_file('../midsave/tree_nation.gpkg')