In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Polygon, MultiPolygon
import numpy as np
import requests
import json
import time
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from helper_functions import *

In [None]:


# Defining  longitude ranges (left to right) in 45-degree increments to extract the ids from the Restor website
longitude_lefts = [-180 + i * 45 for i in range(8)]

#Defining the latitude steps following the web mercator projection as the Restor website uses this projection
latitude_steps = [
    85.0511,  # The Maximum latitude in Web Mercator
    79.17133464081945,
    66.51326044311186,
    40.97989806962013,
    0,
    -40.97989806962013,
    -66.51326044311186,
    -79.17133464081945,
    -85.0511  # Minimum latitude in Web Mercator
]

# extracting all urls for the Restor website using the longitude and latitude stepsas per the restor website
urls = []
for left in longitude_lefts:
    right = left + 45
    for i in range(len(latitude_steps) - 1):
        top = latitude_steps[i]
        bottom = latitude_steps[i + 1]
        if top > bottom:  
            url = (
                f"https://restor2-prod-1-api.restor.eco/sites/3/center-points/"
                f"?bottom={bottom}&left={left}&right={right}&top={top}&visibility=PUBLIC"
            )
            urls.append(url)

# Collecting all ids from all urls in the Restor website
all_data = []

for url in urls:
    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        response.raise_for_status()  
        data = response.json()
        if isinstance(data, list):
            all_data.extend(data)
            print(f"Fetched {len(data)} items from {url}")
        else:
            print(f"Unexpected data format from {url}")
    except Exception as e:
        print(f"Error fetching {url}: {e}")
    time.sleep(1)  

# Saving all collected ids to a json file
with open('../input/Restor_Eco/all_restor_data.json', 'w') as f:
    json.dump(all_data, f, indent=2)

print(f"Data collection complete. Saved {len(all_data)} items to all_restor_data.json.")

In [None]:

with open('../input/Restor_Eco/all_restor_data.json', 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data)

result_list = []

for id in df['id']:
    data = fetch_data(id)
    if data is not None:
        result_list.append(data)

    time.sleep(1)  

# Converting the list of dictionaries to DataFrame
if result_list:
    final_df = pd.DataFrame(result_list)
    final_df.reset_index(drop=True, inplace=True)
    print(final_df)

    final_df.to_csv('../input/Restor_Eco/final_restor_data.csv', index=False)
else:
    print("No data was retrieved")

In [None]:
df = final_df[final_df["siteType"] == "RESTORATION"]
df.info()


### Fix geometries

In [None]:
# Converting string to dictionary
df['polygon'] = df['polygon'].apply(ast.literal_eval)

df['geometry'] = df['polygon'].apply(lambda x: shape(x) if isinstance(x, dict) else Polygon())


In [None]:
gdf = gpd.GeoDataFrame(df, geometry = 'geometry', crs = 'EPSG:4326')
gdf['geometry'] = gdf['geometry'].make_valid()

In [None]:
gdf = gdf.explode(index_parts = False)

In [None]:
gdf = gdf[(gdf.geometry.geom_type == 'Polygon') | (gdf.geometry.geom_type == 'MultiPolygon')]
gdf = gdf.explode(index_parts = False)
gdf = gdf.reset_index(drop = True)

### Harmonize nomenclature

In [None]:
gdf['site_sqkm'] = gdf['geometry'].to_crs(3857).area / 1e6
gdf['site_sqkm'].describe()

In [None]:
gdf['country'] = gdf['countryCode'].apply(lambda x: x if isinstance(x, str) else '')

In [None]:
gdf.info()

In [None]:
# Renaming the columns to follow our naming format in the paper columns section
columns_rename_mapping = {
    'id': 'project_id_reported',
    'description': 'project_description_reported',
    'interventionStartYear': 'planting_date_reported',
    'website': 'url'
}
gdf.rename(columns=columns_rename_mapping, inplace=True)

In [None]:
gdf["planting_date_reported"]

In [None]:
gdf['project_description_reported'] = gdf['project_description_reported'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x)
gdf['planting_date_reported'] = gdf['planting_date_reported'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x)
gdf['url'] = gdf['url'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x)

In [None]:
columns_to_keep = [
     'project_id_reported',
    'project_description_reported',
     'planting_date_reported',
     'geometry',
     'url',
     'site_sqkm',
     'country'
]
gdf=gdf[columns_to_keep]

In [None]:
gdf['planting_date_reported'] = gdf['planting_date_reported'].replace('', np.nan).astype(float)
gdf['planting_date_reported'] = gdf['planting_date_reported'].astype(object)

In [None]:
gdf['site_id_reported'] = ['restor_site_{}'.format(i) for i in range(1, len(gdf) + 1)]
gdf ['host_name'] = 'Restor Eco'
gdf['url'] = 'https://restor.eco/sites/' + gdf['project_id_reported'].astype(str)

gdf = gdf.assign(species_count_reported=None, species_planted_reported=None, survival_rate_reported=None,trees_planted_reported=None)
gdf.info()

In [None]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True
gdf['project_geometries_invalid'] = invalid_geom
gdf["planting_date_type"]="Intervention Start Year"

### Save it

In [None]:
gdf.to_file('../midsave/restor_eco.gpkg')