# Zero CO2

https://zeroco2.eco/en/projects/

In [1]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [2]:
with open('../input/ZeroCo2/ZeroCo2.json', 'r') as file:
    data = json.load(file)
print(data[0])

{'countryId': 46, 'countryName': 'Guatemala', 'user_created': '8e9fdc64-63e2-442e-b1e8-fef6d1bcfe06', 'date_created': '2023-09-05T16:18:05.252Z', 'user_updated': '8e9fdc64-63e2-442e-b1e8-fef6d1bcfe06', 'date_updated': '2024-01-31T22:59:15.552Z', 'project_id': 'PJ01', 'country': 46, 'sort': 1, 'id': 'ae3f0a01-ecd4-43bb-a814-1ba12ac2c8f5', 'status': 'published', 'project_name': 'Riforestare per resistere', 'description': 'Creiamo foreste in Guatemala per supportare comunità contadine a resistere agli effetti della crisi climatica e alle oppressioni dei più forti.', 'description_translations': [{'language': 'EN', 'project_description': 'We create forests in Guatemala to sustain peasant communities as they resist the effects of the climate crisis and the oppression of the powerful.'}], 'project_name_translations': [{'language': 'IT', 'project_name': 'Riforestare per resistere'}, {'language': 'EN', 'project_name': 'Resistance through reforestation'}], 'is_posidonia': False, 'gps_position.co

In [3]:
if isinstance(data, list) and isinstance(data[0], dict):
    df = pd.DataFrame(data)
    df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 19 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   countryId                  7 non-null      int64 
 1   countryName                7 non-null      object
 2   user_created               7 non-null      object
 3   date_created               7 non-null      object
 4   user_updated               7 non-null      object
 5   date_updated               7 non-null      object
 6   project_id                 7 non-null      object
 7   country                    7 non-null      int64 
 8   sort                       7 non-null      int64 
 9   id                         7 non-null      object
 10  status                     7 non-null      object
 11  project_name               7 non-null      object
 12  description                7 non-null      object
 13  description_translations   7 non-null      object
 14  project_name_t

In [4]:
df.head(1)

Unnamed: 0,countryId,countryName,user_created,date_created,user_updated,date_updated,project_id,country,sort,id,status,project_name,description,description_translations,project_name_translations,is_posidonia,gps_position.coordinates,gps_position.type,local_authority
0,46,Guatemala,8e9fdc64-63e2-442e-b1e8-fef6d1bcfe06,2023-09-05T16:18:05.252Z,8e9fdc64-63e2-442e-b1e8-fef6d1bcfe06,2024-01-31T22:59:15.552Z,PJ01,46,1,ae3f0a01-ecd4-43bb-a814-1ba12ac2c8f5,published,Riforestare per resistere,Creiamo foreste in Guatemala per supportare co...,"[{'language': 'EN', 'project_description': 'We...","[{'language': 'IT', 'project_name': 'Riforesta...",False,"[-89.7276, 16.6798]",Point,


### Fix geometries

In [5]:
df['geometry'] = df['gps_position.coordinates'].apply(lambda x: Point(x))

In [6]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

### Harmonize nomenclature

In [7]:
gdf = gdf[['project_id', 'id', 'description', 'countryName', 'date_created', 'geometry']]

In [8]:
gdf['url'] = 'https://zeroco2.eco/en/projects/'
gdf['host_name'] = 'Zero CO2'

In [9]:
# Renaming the columns to follow our naming format in the paper columns section
columns_rename_mapping = {
    'project_id': 'project_id_reported',
     'id': 'site_id_reported',
    'description': 'project_description_reported',
    'countryName': 'country',
    'date_created' : 'planting_date_reported'
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [10]:
gdf['planting_date_reported'] = pd.to_datetime(gdf['planting_date_reported']).dt.year

In [11]:
gdf = gdf.assign(site_sqkm=None,species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None,country=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           7 non-null      object  
 1   site_id_reported              7 non-null      object  
 2   project_description_reported  7 non-null      object  
 3   country                       0 non-null      object  
 4   planting_date_reported        7 non-null      int32   
 5   geometry                      7 non-null      geometry
 6   url                           7 non-null      object  
 7   host_name                     7 non-null      object  
 8   site_sqkm                     0 non-null      object  
 9   species_count_reported        0 non-null      object  
 10  species_planted_reported      0 non-null      object  
 11  survival_rate_reported        0 non-null      object  
 12  trees_planted_reported        0 non-null      

### Save it

In [12]:
gdf.to_file('../midsave/zero_co2.gpkg')