# Trees.org
https://trees.org

In [1]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [2]:
with open('../input/TreesOrg/data.json', 'r') as file:
    data = json.load(file)
type(data)

list

In [3]:
if isinstance(data, list) and isinstance(data[0], dict):
    df = pd.DataFrame(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86 entries, 0 to 85
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   ID              86 non-null     int64 
 1   type            86 non-null     object
 2   slug            86 non-null     object
 3   title           86 non-null     object
 4   content         86 non-null     object
 5   excerpt         86 non-null     object
 6   long_excerpt    86 non-null     object
 7   short_excerpt   86 non-null     object
 8   link            86 non-null     object
 9   featured_image  86 non-null     object
 10  author          86 non-null     object
 11  publish_date    86 non-null     object
 12  date            86 non-null     object
 13  button          86 non-null     object
 14  map             86 non-null     object
 15  address         86 non-null     object
 16  coordinates     86 non-null     object
 17  country_image   86 non-null     object
 18  info        

In [4]:
df['country'] = df['address'].astype(str).apply(lambda x: x.split()[-1] if isinstance(x, str) else '')
df.head(1)

Unnamed: 0,ID,type,slug,title,content,excerpt,long_excerpt,short_excerpt,link,featured_image,author,publish_date,date,button,map,address,coordinates,country_image,info,country
0,7396,project,mwanza-2,Mwanza 2,,,,,https://trees.org/project/mwanza-2/,,,15 Feb 2024,2024-02-15,"https://trees.org/project/mwanza-2/, Read more...","7JGG+96 Kabila, Tanzania, -2.72407, 33.62559, ...","Kabila, Tanzania","-2.72407, 33.62559","5755, 5755, tanzia, tanzia.svg, 5868, https://...","Project Name: , Mwanza 2, Project Type: , Expa...",Tanzania


In [5]:
columns_rename_mapping = {
    'ID': 'project_id_reported',
    'coordinates': 'geometry',
    'info': 'project_description_reported',
    'country': 'country',
    'date': 'planting_date_reported',
    'link': 'url'
}
df = df[list(columns_rename_mapping.keys())]
df.rename(columns=columns_rename_mapping, inplace=True)

### Fix geometries

In [6]:
df[['lon', 'lat']] = df['geometry'].str.split(',', expand=True).astype(float)
df['geometry'] = df.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
df.drop(columns = ['lon', 'lat'], inplace = True)

In [7]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

In [8]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True

### Harmonize nomenclature

In [9]:
gdf['planting_date_reported'] = pd.to_datetime(gdf['planting_date_reported']).dt.year

In [10]:
gdf['host_name'] = 'trees.org'
gdf['project_geometries_invalid'] = invalid_geom
gdf = gdf.assign(site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,site_id_reported=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 86 entries, 0 to 85
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           86 non-null     int64   
 1   geometry                      86 non-null     geometry
 2   project_description_reported  86 non-null     object  
 3   country                       86 non-null     object  
 4   planting_date_reported        86 non-null     int32   
 5   url                           86 non-null     object  
 6   host_name                     86 non-null     object  
 7   project_geometries_invalid    86 non-null     bool    
 8   site_sqkm                     0 non-null      object  
 9   species_count_reported        0 non-null      object  
 10  species_planted_reported      0 non-null      object  
 11  survival_rate_reported        0 non-null      object  
 12  trees_planted_reported        0 non-null    

In [11]:
gdf.to_file('../midsave/trees_org.gpkg')