**Introduction**
Generating the Extra Columns wiith earth Engine API

In [15]:
#pip install geemap

In [1]:
import geopandas as gpd
import pandas as pd
import time
import os
import numpy as np
import geemap
import ee
import json
from tqdm import tqdm
import concurrent.futures



# from google.colab import auth , drive
from helper_functions import calculate_area, calculate_built_area, calculate_road_length, calculate_forest_loss,process_month,extract_polygons
from helper_functions import calculate_elevation_and_slope, get_savi_for_month, get_ndvi_for_month,get_ndre_for_month



In [17]:


df = gpd.read_parquet("../midsave/reforestation_2016_200_projects.parquet")
df=df.head(110000)

In [18]:
gdf = df.loc[:,['site_id_created', 'geometry']]

In [20]:
from shapely.geometry import Polygon


gdf = df.loc[:, ['site_id_created', 'geometry']]

gdf = gdf[gdf['geometry'].apply(lambda geom: isinstance(geom, Polygon))]

In [21]:
gdf.dropna(subset=['geometry'], inplace = True)

In [22]:

geometry_types = gdf.geometry.apply(lambda geom: geom.geom_type)
geometry_counts = geometry_types.value_counts()

print(geometry_counts)

geometry
Polygon    104562
Name: count, dtype: int64


In [23]:
gdf = df.loc[:,['site_id_created', 'geometry']]

Create a subsample for testing code ! Delete when done !

In [24]:



# gdf['geometry'] = gdf['geometry'].apply(extract_polygons)


# gdf['geometry_type'] = gdf['geometry'].apply(lambda geom: geom.geom_type if geom else None)


# geometry_collection_gdf = gdf[gdf['geometry_type'] == 'GeometryCollection']
# print(geometry_collection_gdf[['geometry', 'geometry_type']].head())

### Authenticate with Google Earth Engine
Need to log in to EarthEngine (ee.Authenticate()), create a project and then initialize this project via ee.Initialize()

In [24]:
ee.Authenticate()
ee.Initialize(project='bufferndvi')

In [25]:
ee.Initialize(project='bufferndvi')

### Calculating Tree Cover
- tree_cover_area_2000
- tree_cover_area_2005
- tree_cover_area_2010
- tree_cover_area_2015
- tree_cover_area_2020

Chunking

In [27]:
chunk_size = 500
chunks = [gdf[i:i + chunk_size] for i in range(0, gdf.shape[0], chunk_size)]

GLAD Landcover (https://glad.umd.edu/dataset/GLCLUC2020)

In [28]:
landmask = ee.Image("projects/glad/OceanMask").lte(1)
landCover = ee.Image('projects/glad/GLCLU2020/v2/LCLUC_2015').updateMask(landmask)

Masking land cover image to only include class codes of interest for tree cover

In [29]:
classCodes = [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
              125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148]
maskedLandCover = landCover.remap(classCodes, classCodes, 255)

Calculating area for each class list in the codes

In [30]:
output_csv_path = '../midsave/add_tree2015_cover.csv'
tree_cover = pd.DataFrame()

In [31]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON
    gdf_json_chunk = chunk.__geo_interface__
    
    valid_features = []
    for feature in gdf_json_chunk['features']:
        try:
            # Attempt to convert each feature individually
            ee_feature = geemap.geojson_to_ee(feature)
            valid_features.append(ee_feature)
        except Exception as geom_error:
            print(f"Problematic geometry in chunk {i + 1}: {feature['geometry']} - {geom_error}")

    if not valid_features:
        print(f"No valid features in chunk {i + 1}, skipping...")
        continue

    try:
        # Creating a FeatureCollection from valid features
        fc_chunk = ee.FeatureCollection(valid_features)
        
        # Mapping the area calculation function over the FeatureCollection
        area_results_chunk = fc_chunk.map(lambda feature: calculate_area(feature, classCodes, maskedLandCover))
        temp_chunk_df = pd.DataFrame([feature['properties'] for feature in area_results_chunk.getInfo()['features']])
        temp_chunk_df.rename(columns={'cover_area_2015': 'tree_cover_area_2015'}, inplace=True)
        
        # Ensure 'site_id_created' is included in the DataFrame
        if 'site_id_created' not in temp_chunk_df.columns:
            temp_chunk_df['site_id_created'] = [feature['properties'].get('site_id_created', None) for feature in area_results_chunk.getInfo()['features']]
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    tree_cover = pd.concat([tree_cover, temp_chunk_df], ignore_index=True)

    # Saving combined results to the output CSV
    tree_cover.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

Processing chunk 1/119
Processing chunk 2/119
Processing chunk 3/119
Processing chunk 4/119
Processing chunk 5/119
Processing chunk 6/119
Processing chunk 7/119
Processing chunk 8/119
Processing chunk 9/119
Processing chunk 10/119
Processing chunk 11/119
Processing chunk 12/119
Processing chunk 13/119
Processing chunk 14/119
Processing chunk 15/119
Processing chunk 16/119
Processing chunk 17/119
Processing chunk 18/119
Processing chunk 19/119
Processing chunk 20/119
Processing chunk 21/119
Processing chunk 22/119
Processing chunk 23/119
Processing chunk 24/119
Processing chunk 25/119
Processing chunk 26/119
Processing chunk 27/119
Processing chunk 28/119
Processing chunk 29/119
Processing chunk 30/119
Processing chunk 31/119
Processing chunk 32/119
Processing chunk 33/119
Processing chunk 34/119
Processing chunk 35/119
Processing chunk 36/119
Processing chunk 37/119
Processing chunk 38/119
Processing chunk 39/119
Processing chunk 40/119
Processing chunk 41/119
Processing chunk 42/119
P

### Calculating other land cover
- permanent_water
- short_vegetation_after_tree_loss
- cropland_loss_to_tree
- cropland_gain_from_trees

In [32]:
classCodes = [208, 240, 248, 245]
classesOfInterest = ["permanent_water", "short_vegetation_after_tree_loss", "cropland_loss_to_tree", "cropland_gain_from_trees"]
maskedLandCover = landCover.remap(classCodes, classCodes, 255)

In [33]:
output_csv_path = '../midsave/add_other_land_cover.csv'
other_land_cover = pd.DataFrame()

In [34]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON and then Earth Engine FeatureCollection
    gdf_json_chunk = chunk.__geo_interface__
    
    try:
        # Converting to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the area calculation function over the FeatureCollection
        area_results_chunk = fc_chunk.map(lambda feature: calculate_area(feature, classCodes, maskedLandCover))
        if area_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in area_results_chunk.getInfo()['features']])
            temp_chunk_df.rename(columns={'cover_area_2020': 'other_land_cover_area_2020'}, inplace=True)
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    other_land_cover = pd.concat([other_land_cover, temp_chunk_df], ignore_index=True)

    other_land_cover.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

Processing chunk 1/119
Processing chunk 2/119
Processing chunk 3/119
Processing chunk 4/119
Processing chunk 5/119
Processing chunk 6/119
Processing chunk 7/119
Processing chunk 8/119
Processing chunk 9/119
Processing chunk 10/119
Processing chunk 11/119
Processing chunk 12/119
Processing chunk 13/119
Processing chunk 14/119
Processing chunk 15/119
Processing chunk 16/119
Processing chunk 17/119
Processing chunk 18/119
Processing chunk 19/119
Processing chunk 20/119
Processing chunk 21/119
Processing chunk 22/119
Processing chunk 23/119
Processing chunk 24/119
Processing chunk 25/119
Processing chunk 26/119
Processing chunk 27/119
Processing chunk 28/119
Processing chunk 29/119
Processing chunk 30/119
Processing chunk 31/119
Processing chunk 32/119
Processing chunk 33/119
Processing chunk 34/119
Processing chunk 35/119
Processing chunk 36/119
Processing chunk 37/119
Processing chunk 38/119
Processing chunk 39/119
Processing chunk 40/119
Processing chunk 41/119
Processing chunk 42/119
P

### Calculate built-area shares

Loading the Earth Engine built area image for 2018

In [35]:
builtImage = ee.Image("JRC/GHSL/P2023A/GHS_BUILT_C/2018").select('built_characteristics')

In [36]:
output_csv_path = '../midsave/add_built_area_cover.csv'
built_area_cover = pd.DataFrame()

In [37]:
for i, chunk in enumerate(chunks):
    print(f"Processing built area for chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON and then Earth Engine FeatureCollection
    gdf_json_chunk = chunk.__geo_interface__

    try:
        # Converting the GeoJSON chunk to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the built area calculation function over the FeatureCollection
        built_area_results_chunk = fc_chunk.map(lambda feature: calculate_built_area(feature, builtImage))
        if built_area_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in built_area_results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    built_area_cover = pd.concat([built_area_cover, temp_chunk_df], ignore_index=True)

    built_area_cover.to_csv(output_csv_path, index=False)

print("All chunks processed, built area results saved to:", output_csv_path)

Processing built area for chunk 1/119
Processing built area for chunk 2/119
Processing built area for chunk 3/119
Processing built area for chunk 4/119
Processing built area for chunk 5/119
Processing built area for chunk 6/119
Processing built area for chunk 7/119
Processing built area for chunk 8/119
Processing built area for chunk 9/119
Processing built area for chunk 10/119
Processing built area for chunk 11/119
Processing built area for chunk 12/119
Processing built area for chunk 13/119
Processing built area for chunk 14/119
Processing built area for chunk 15/119
Processing built area for chunk 16/119
Processing built area for chunk 17/119
Processing built area for chunk 18/119
Processing built area for chunk 19/119
Processing built area for chunk 20/119
Processing built area for chunk 21/119
Processing built area for chunk 22/119
Processing built area for chunk 23/119
Processing built area for chunk 24/119
Processing built area for chunk 25/119
Processing built area for chunk 26

### Calculating road network cover
- total_road_length_km

Merging GEE road datasets

In [38]:
roadsAfrica = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-africa')
roadsAmericas = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-americas')
roadsAsia = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-asia')
roadsEurope = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-europe')
roadsOceaniaEast = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-oceania-east')
roadsOceaniaWest = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-oceania-west')

roads = roadsAfrica.merge(roadsAmericas).merge(roadsAsia).merge(roadsEurope).merge(roadsOceaniaEast).merge(roadsOceaniaWest)

In [39]:
output_csv_path = '../midsave/add_road_length.csv'
road_length = pd.DataFrame()

In [40]:
for i, chunk in enumerate(chunks):
    print(f"Processing road length for chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        # Converting the GeoJSON chunk to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the road length calculation function over the FeatureCollection
        road_length_results_chunk = fc_chunk.map(lambda feature: calculate_road_length(feature, roads))
        if road_length_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in road_length_results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    road_length = pd.concat([road_length, temp_chunk_df], ignore_index=True)

    road_length.to_csv(output_csv_path, index=False)

print("All chunks processed, road length results saved to:", output_csv_path)

Processing road length for chunk 1/119
Processing road length for chunk 2/119
Error processing chunk 2: User memory limit exceeded.
Processing road length for chunk 3/119
Processing road length for chunk 4/119
Processing road length for chunk 5/119
Processing road length for chunk 6/119
Error processing chunk 6: User memory limit exceeded.
Processing road length for chunk 7/119
Error processing chunk 7: User memory limit exceeded.
Processing road length for chunk 8/119
Processing road length for chunk 9/119
Error processing chunk 9: User memory limit exceeded.
Processing road length for chunk 10/119
Processing road length for chunk 11/119
Error processing chunk 11: User memory limit exceeded.
Processing road length for chunk 12/119
Error processing chunk 12: User memory limit exceeded.
Processing road length for chunk 13/119
Processing road length for chunk 14/119
Error processing chunk 14: User memory limit exceeded.
Processing road length for chunk 15/119
Error processing chunk 15: U

### Calculating forest loss
- loss_pre_5
- loss_post_3
- loss_post_5

Loading the Global Forest Change 2023 dataset

In [41]:
gfc2017 = ee.Image('UMD/hansen/global_forest_change_2023_v1_11')

In [42]:
output_csv_path = '../midsave/add_forest_loss.csv'
forest_loss = pd.DataFrame()

In [43]:
for i, chunk in enumerate(chunks):
    print(f"Processing forest loss for chunk {i + 1}/{len(chunks)}")

    # Converting the current GeoDataFrame chunk to GeoJSON
    gdf_json_chunk = chunk.__geo_interface__

    try:
        # Converting GeoJSON chunk to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the forest loss calculation function over the FeatureCollection
        loss_results_chunk = fc_chunk.map(lambda feature: calculate_forest_loss(feature, gfc2017))
        if loss_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in loss_results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    forest_loss = pd.concat([forest_loss, temp_chunk_df], ignore_index=True)

print("All chunks processed, forest loss results saved to:", output_csv_path)

Processing forest loss for chunk 1/119
Processing forest loss for chunk 2/119
Processing forest loss for chunk 3/119
Processing forest loss for chunk 4/119
Processing forest loss for chunk 5/119
Processing forest loss for chunk 6/119
Processing forest loss for chunk 7/119
Processing forest loss for chunk 8/119
Processing forest loss for chunk 9/119
Processing forest loss for chunk 10/119
Processing forest loss for chunk 11/119
Processing forest loss for chunk 12/119
Processing forest loss for chunk 13/119
Processing forest loss for chunk 14/119
Processing forest loss for chunk 15/119
Processing forest loss for chunk 16/119
Processing forest loss for chunk 17/119
Processing forest loss for chunk 18/119
Processing forest loss for chunk 19/119
Processing forest loss for chunk 20/119
Processing forest loss for chunk 21/119
Processing forest loss for chunk 22/119
Processing forest loss for chunk 23/119
Processing forest loss for chunk 24/119
Processing forest loss for chunk 25/119
Processin

#### Generating the loss columns

In [44]:
temp = (forest_loss['groups']
               .explode()
               .dropna()
               .reset_index()
               .rename(columns = {'index':'site_id_created'})
               .reset_index())

In [45]:
temp = (pd.json_normalize(temp['groups']).reset_index()
               .merge(temp[['index','site_id_created']], on = 'index', how = 'left').drop(columns = ['index']))

In [46]:
temp['year'] = 2000 + temp['group']

In [47]:
forest_loss = temp.pivot(index=['site_id_created'], columns='year', values='sum')
forest_loss.columns = [col for col in forest_loss.columns]
#forest_loss.columns = [f'forest_loss_{col}' for col in forest_loss.columns]
forest_loss.reset_index(inplace = True)

Add planting date information

In [48]:
forest_loss = forest_loss.merge(df[['site_id_created','planting_date_reported']], on = 'site_id_created', how = 'left')

In [49]:
def calculate_loss_pre_5(row):
    try:
        years = [int(row['planting_date_reported'] - i) for i in range(1, 6)]
        losses = [row[year] for year in years if year in forest_loss.columns]
        return np.nanmean(losses) if losses else np.nan
    except Exception as e:
        return np.nan

def calculate_loss_post_3(row):
    try:
        years = [int(row['planting_date_reported'] + i) for i in range(1, 4)]
        losses = [row[year] for year in years if year in forest_loss.columns]
        return np.nanmean(losses) if losses else np.nan
    except Exception as e:
        return np.nan

def calculate_loss_post_5(row):
    try:
        years = [int(row['planting_date_reported'] + i) for i in range(1, 6)]
        losses = [row[year] for year in years if year in forest_loss.columns]
        return np.nanmean(losses) if losses else np.nan
    except Exception as e:
        return np.nan


forest_loss['loss_pre_5'] = forest_loss.apply(calculate_loss_pre_5, axis=1)
forest_loss['loss_post_3'] = forest_loss.apply(calculate_loss_post_3, axis=1)
forest_loss['loss_post_5'] = forest_loss.apply(calculate_loss_post_5, axis=1)

In [50]:
forest_loss.head(100)

Unnamed: 0,site_id_created,2001,2002,2003,2004,2005,2006,2007,2008,2009,...,2018,2019,2020,2021,2022,2023,planting_date_reported,loss_pre_5,loss_post_3,loss_post_5
0,0,4.425087e+03,,,,,,,,,...,,8.850089e+02,,,,,NaT,,,
1,1,4.728627e+04,2.212458e+04,,,,,,,,...,,,,,,,NaT,,,
2,2,4.197245e+04,1.769991e+03,,,,,,,,...,,,,,,,NaT,,,
3,4,7.986165e+06,1.612558e+06,2.474999e+06,3.273039e+06,3.350165e+06,3.343893e+06,2.615941e+06,3.521027e+06,4.095601e+06,...,1.259225e+07,5.915103e+06,5.454402e+06,8.345306e+06,1.061115e+07,1.456457e+07,NaT,,,
4,29,8.409965e+02,,,,,,,,,...,,,,,,,NaT,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,290,,,,,,,,,,...,3.381550e+03,,,,,,NaT,,,
96,291,,,,,,,,,,...,4.548959e+03,,,,,,NaT,,,
97,292,,,,,,1.279698e+03,,,,...,7.405937e+03,,,,,,NaT,,,
98,293,2.681925e+03,1.787950e+03,,,3.575900e+03,,5.363850e+03,,3.575900e+03,...,,,,,,,NaT,,,


In [51]:
forest_loss.drop(columns = [year for year in range(2000, 2024)] + ['planting_date_reported'], errors = 'ignore', inplace = True)
forest_loss.head(1)

Unnamed: 0,site_id_created,loss_pre_5,loss_post_3,loss_post_5
0,0,,,


In [52]:
forest_loss.to_csv(output_csv_path, index=False)

### Calculating elevation and slope

Loading the Digital Elevations Model (DEM) dataset

In [53]:
dataset = ee.Image('USGS/SRTMGL1_003')
elevation = dataset.select('elevation')
slope = ee.Terrain.slope(elevation)

In [54]:
output_csv_path = '../midsave/add_elevation_slope.csv'
elevation_slope = pd.DataFrame()

In [55]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON and then to Earth Engine FeatureCollection
    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        results_chunk = fc_chunk.map(lambda feature: calculate_elevation_and_slope(feature, elevation, slope))
        if results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    elevation_slope = pd.concat([elevation_slope, temp_chunk_df], ignore_index=True)

    elevation_slope.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

Processing chunk 1/119
Processing chunk 2/119
Processing chunk 3/119
Processing chunk 4/119
Processing chunk 5/119
Processing chunk 6/119
Processing chunk 7/119
Processing chunk 8/119
Processing chunk 9/119
Processing chunk 10/119
Processing chunk 11/119
Processing chunk 12/119
Processing chunk 13/119
Processing chunk 14/119
Processing chunk 15/119
Processing chunk 16/119
Processing chunk 17/119
Processing chunk 18/119
Processing chunk 19/119
Processing chunk 20/119
Processing chunk 21/119
Processing chunk 22/119
Processing chunk 23/119
Processing chunk 24/119
Processing chunk 25/119
Processing chunk 26/119
Processing chunk 27/119
Processing chunk 28/119
Processing chunk 29/119
Processing chunk 30/119
Processing chunk 31/119
Processing chunk 32/119
Processing chunk 33/119
Processing chunk 34/119
Processing chunk 35/119
Processing chunk 36/119
Processing chunk 37/119
Processing chunk 38/119
Processing chunk 39/119
Processing chunk 40/119
Processing chunk 41/119
Processing chunk 42/119
P

### Calculate NDVI per month

Loading Sentinel-2 dataset

In [26]:

chunk_size = 50
chunks = [gdf[i:i + chunk_size] for i in range(0, gdf.shape[0], chunk_size)]
S2 = ee.ImageCollection('COPERNICUS/S2_HARMONIZED') \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30))
output_csv_path = '../midsave/refor2016_ndvi_top3.csv'
ndvi_monthly = pd.DataFrame()
months = list(range(1, 13))

while months:
    try:
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:  # Adjust max_workers based on your system
            futures = {executor.submit(process_month, month, chunks, S2): month for month in months}
            
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(months)):
                month_result = future.result()
                ndvi_monthly = pd.concat([ndvi_monthly, month_result], ignore_index=True)
        
        # If all months are processed successfully, break the loop
        break
    except Exception as e:
        print(f"Error: {e}")
        if 'month' in ndvi_monthly.columns:
            months = list(set(months) - set(ndvi_monthly.month.unique().tolist()))
        else:
            months = list(set(months))
        continue

# Saving the results to a CSV file
ndvi_monthly.to_csv(output_csv_path, index=False)

Processing chunk 1/2200 for month 1...Processing chunk 1/2200 for month 2...

Processing chunk 1/2200 for month 3...
Processing chunk 1/2200 for month 4...


  0%|          | 0/12 [00:00<?, ?it/s]

Processing chunk 2/2200 for month 1...
Processing chunk 2/2200 for month 2...
Processing chunk 2/2200 for month 4...
Processing chunk 2/2200 for month 3...
Processing chunk 3/2200 for month 1...
Processing chunk 3/2200 for month 2...
Processing chunk 4/2200 for month 1...
Processing chunk 3/2200 for month 3...
Processing chunk 3/2200 for month 4...
Processing chunk 4/2200 for month 2...
Processing chunk 5/2200 for month 1...
Processing chunk 4/2200 for month 3...
Processing chunk 4/2200 for month 4...
Processing chunk 6/2200 for month 1...
Processing chunk 5/2200 for month 2...
Processing chunk 5/2200 for month 3...
Processing chunk 7/2200 for month 1...
Processing chunk 5/2200 for month 4...
Processing chunk 6/2200 for month 2...
Processing chunk 8/2200 for month 1...
Processing chunk 6/2200 for month 3...
Processing chunk 7/2200 for month 2...
Processing chunk 9/2200 for month 1...
Processing chunk 6/2200 for month 4...
Processing chunk 7/2200 for month 3...
Processing chunk 8/2200 f

  8%|▊         | 1/12 [80:05:37<881:01:55, 288337.76s/it]

Processing chunk 1/2200 for month 5...
Processing chunk 1508/2200 for month 3...
Processing chunk 1724/2200 for month 2...
Processing chunk 1509/2200 for month 3...
Processing chunk 1725/2200 for month 2...
Processing chunk 1292/2200 for month 4...
Processing chunk 2/2200 for month 5...
Processing chunk 1510/2200 for month 3...
Processing chunk 1726/2200 for month 2...
Processing chunk 1727/2200 for month 2...
Processing chunk 1293/2200 for month 4...
Processing chunk 1511/2200 for month 3...
Error processing chunk 2: Computation timed out.
Processing chunk 1294/2200 for month 4...
Processing chunk 1728/2200 for month 2...
Processing chunk 1512/2200 for month 3...
Processing chunk 1729/2200 for month 2...
Processing chunk 3/2200 for month 5...
Processing chunk 1295/2200 for month 4...
Processing chunk 1513/2200 for month 3...
Processing chunk 1730/2200 for month 2...
Processing chunk 1296/2200 for month 4...
Processing chunk 1731/2200 for month 2...
Processing chunk 4/2200 for month 5.

  8%|▊         | 1/12 [89:11:51<981:10:28, 321111.68s/it]


In [None]:


# while months:
#     try:
#         with concurrent.futures.ThreadPoolExecutor() as executor:
#             futures = {executor.submit(process_month, month, chunks, S2): month for month in months}
            
#             for future in tqdm(concurrent.futures.as_completed(futures), total=len(months)):
#                 month_result = future.result()
#                 ndvi_monthly = pd.concat([ndvi_monthly, month_result], ignore_index=True)
        
#         # If all months are processed successfully, break the loop
#         break
#     except Exception as e:
#         print(f"Error: {e}")
#         if 'month' in ndvi_monthly.columns:
#             months = list(set(months) - set(ndvi_monthly.month.unique().tolist()))
#         else:
#             months = list(set(months))
#         continue

# # Save the results to a CSV file
# ndvi_monthly.to_csv(output_csv_path, index=False)

Processing chunk 1/176 for month 1...
Processing chunk 1/176 for month 2...
Processing chunk 1/176 for month 3...
Processing chunk 1/176 for month 4...
Processing chunk 1/176 for month 5...
Processing chunk 1/176 for month 6...
Processing chunk 1/176 for month 7...
Processing chunk 1/176 for month 8...
Processing chunk 1/176 for month 9...
Processing chunk 1/176 for month 10...
Processing chunk 1/176 for month 11...
Processing chunk 1/176 for month 12...


  0%|          | 0/12 [00:00<?, ?it/s]

Processing chunk 2/176 for month 7...
Processing chunk 2/176 for month 8...
Processing chunk 3/176 for month 7...
Processing chunk 3/176 for month 8...
Processing chunk 2/176 for month 6...
Processing chunk 4/176 for month 7...
Processing chunk 2/176 for month 9...
Processing chunk 5/176 for month 7...
Processing chunk 4/176 for month 8...
Processing chunk 6/176 for month 7...
Processing chunk 5/176 for month 8...
Processing chunk 7/176 for month 7...
Processing chunk 2/176 for month 10...
Processing chunk 6/176 for month 8...
Processing chunk 8/176 for month 7...
Processing chunk 9/176 for month 7...
Processing chunk 7/176 for month 8...
Processing chunk 3/176 for month 9...
Processing chunk 8/176 for month 8...
Processing chunk 2/176 for month 11...
Processing chunk 3/176 for month 6...
Processing chunk 2/176 for month 12...
Processing chunk 9/176 for month 8...
Processing chunk 2/176 for month 1...
Processing chunk 4/176 for month 6...
Processing chunk 5/176 for month 6...
Processin

  0%|          | 0/12 [42:08<?, ?it/s]


Processing chunk 18/176 for month 10...


KeyboardInterrupt: 

Select three months with the highest NDVI values

In [None]:
ndvi_top3 = (ndvi_monthly
             .groupby('site_id_created', group_keys=False)[['site_id_created', 'month', 'mean']]
             .apply(lambda x: x.nlargest(3, 'mean'))
             .rename(columns = {'mean':'ndvi_monthly_mean'})
             .reset_index(drop = True))

Processing chunk 11/176 for month 2...
Processing chunk 24/176 for month 9...
Processing chunk 49/176 for month 8...
Processing chunk 53/176 for month 7...
Processing chunk 17/176 for month 12...
Processing chunk 50/176 for month 6...
Processing chunk 10/176 for month 4...
Processing chunk 54/176 for month 7...
Processing chunk 14/176 for month 1...
Processing chunk 22/176 for month 11...
Processing chunk 51/176 for month 6...
Processing chunk 25/176 for month 9...
Error processing chunk 3: Computation timed out.
Processing chunk 19/176 for month 10...
Processing chunk 11/176 for month 3...
Processing chunk 50/176 for month 8...
Processing chunk 55/176 for month 7...
Processing chunk 52/176 for month 6...
Processing chunk 26/176 for month 9...
Processing chunk 20/176 for month 10...
Processing chunk 23/176 for month 11...
Processing chunk 51/176 for month 8...
Processing chunk 53/176 for month 6...
Processing chunk 18/176 for month 12...
Processing chunk 56/176 for month 7...
Processin

In [None]:
ndvi_top3 

Unnamed: 0,site_id_created,month,ndvi_monthly_mean
0,16908,9,0.521867
1,16908,2,0.432676
2,16908,8,0.372618
3,16909,2,0.517626
4,16909,9,0.479731
...,...,...,...
239995,1223161,9,0.467023
239996,1223161,10,0.417482
239997,1223162,8,0.582072
239998,1223162,9,0.509445


In [None]:
output_csv_path = '../midsave/add_ndvi_top3.csv'

In [None]:
ndvi_top3.to_csv(output_csv_path, index=False)

### SAVI(Soil Adjust Vegetation Index)

Before SAVI ensure Top_Three_NDVI_Months and planting dates are added to df data (After creating the column Top_Three_Ndvi_months rerun cell 5 where chunks are processed to update and include the column in the chunk before running the Shadow index cell)

In [None]:
gdf_si = (gdf
       .merge(df[['site_id_created', 'planting_date_reported']], on = 'site_id_created', how = 'left')
       .merge(ndvi_top3[['site_id_created', 'month', 'ndvi_monthly_mean']], on = 'site_id_created', how = 'left')
       .dropna()
       .reset_index(drop = True))
gdf_si['planting_date_reported'] = gdf_si['planting_date_reported'].astype(int)
gdf_si['month'] = gdf_si['month'].astype(int)

In [None]:
gdf_si

In [None]:
chunk_size = 10
chunks = [gdf_si[i:i + chunk_size] for i in range(0, gdf_si.shape[0], chunk_size)]

In [None]:
output_csv_path = '../midsave/add_savi_index.csv'
savi_index = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        savi_index_chunk = fc_chunk.map(lambda feature: get_savi_for_month(feature, S2))
        if savi_index_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in savi_index_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    savi_index = pd.concat([savi_index, temp_chunk_df], ignore_index=True)

savi_index_grouped = savi_index.groupby(['site_id_created'])['savi_index'].mean().reset_index()
savi_index_grouped.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

In [None]:
savi_index_grouped

### NDVI
- atplanting
- 1 year after planting
- 2 years after planting
- 5 years after planting

In [None]:
output_csv_path = '../midsave/ndvi.csv'
ndvi = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        ndvi_chunk = fc_chunk.map(lambda feature: get_ndvi_for_month(feature, S2))
        if ndvi_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in ndvi_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    ndvi = pd.concat([ndvi, temp_chunk_df], ignore_index=True)

ndvi_grouped = ndvi.groupby(['site_id_created'])['ndvi'].mean().reset_index()
ndvi_grouped.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

In [None]:
ndvi_monthly = pd.read_csv('../midsave/ndvi.csv')

ndvi_monthly.tail()

### NDRE

In [None]:
output_csv_path = '../midsave/ndre.csv'
ndre = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        ndre_chunk = fc_chunk.map(lambda feature: get_ndre_for_month(feature, S2))
        if ndre_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in ndre_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    ndre = pd.concat([ndre, temp_chunk_df], ignore_index=True)

ndre_grouped = ndre.groupby(['site_id_created'])['ndre'].mean().reset_index()
ndre_grouped.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

In [None]:
ndvi_monthly = pd.read_csv('../midsave/ndre.csv')

ndvi_monthly

### Combine them

In [None]:
df_combined = (df
               .merge(tree_cover, on = 'site_id_created', how = 'left')
               .merge(other_land_cover, on = 'site_id_created', how = 'left')
               .merge(built_area_cover, on = 'site_id_created', how = 'left')
               .merge(road_length, on = 'site_id_created', how = 'left')
               .merge(forest_loss, on = 'site_id_created', how = 'left')
               .merge(elevation_slope, on = 'site_id_created', how = 'left')
               .merge(ndvi_top3, on = 'site_id_created', how = 'left')
               .merge(savi_index_grouped, on = 'site_id_created', how = 'left')
               .merge(ndvi_grouped, on = 'site_id_created', how = 'left')
               .merge(ndre_grouped, on = 'site_id_created', how = 'left'))

In [None]:
df_combined.info()