**Introduction**
Generating the Extra Columns wiith earth Engine API

In [1]:
import geopandas as gpd
import pandas as pd
import time
import os
import numpy as np
import geemap
import ee
import json
from tqdm import tqdm
import concurrent.futures

# from google.colab import auth , drive
from helper_functions import calculate_area, calculate_built_area, calculate_road_length, calculate_forest_loss,process_month,extract_polygons
from helper_functions import calculate_elevation_and_slope, get_savi_for_month, get_ndvi_for_month,get_ndre_for_month



In [2]:


df = gpd.read_parquet("../midsave/consolidated_reforestation_projects.parquet")
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1229175 entries, 0 to 1229174
Data columns (total 17 columns):
 #   Column                        Non-Null Count    Dtype   
---  ------                        --------------    -----   
 0   site_id_created               1229175 non-null  int64   
 1   project_id_reported           1229175 non-null  object  
 2   site_id_reported              1229175 non-null  object  
 3   site_description_reported     1696 non-null     object  
 4   site_sqkm                     1229175 non-null  float64 
 5   trees_planted_reported        4349 non-null     float64 
 6   country                       5030 non-null     object  
 7   project_description_reported  1228611 non-null  object  
 8   planting_date_reported        4821 non-null     float64 
 9   survival_rate_reported        2514 non-null     float64 
 10  host_name                     1229175 non-null  object  
 11  url                           1229175 non-null  object  
 12  specie

In [3]:
gdf = df.loc[:,['site_id_created', 'geometry']]

In [4]:
gdf.dropna(subset=['geometry'], inplace = True)

In [5]:

gdf = gdf.set_crs("EPSG:4326")
print(gdf.crs)

EPSG:4326


In [6]:
gdf[:430]

Unnamed: 0,site_id_created,geometry
0,0,"POLYGON ((-49.95883 -9.35107, -49.95976 -9.351..."
1,1,"POLYGON ((-43.4725 -22.48945, -43.47236 -22.48..."
2,2,"POLYGON ((-43.462 -22.4779, -43.46583 -22.4875..."
3,3,"POLYGON ((-43.46833 -22.4919, -43.46834 -22.49..."
4,4,"POLYGON ((-2.01902 8.21743, -2.02027 8.2264, -..."
...,...,...
426,426,"POLYGON ((-64.25714 46.00024, -64.25727 46.000..."
427,427,"POLYGON ((-64.25637 46.00161, -64.2567 46.0008..."
428,428,"POLYGON ((-64.27435 46.02906, -64.27412 46.029..."
429,429,"POLYGON ((-64.27349 46.02907, -64.27274 46.029..."


Create a subsample for testing code ! Delete when done !

In [7]:



gdf['geometry'] = gdf['geometry'].apply(extract_polygons)


gdf['geometry_type'] = gdf['geometry'].apply(lambda geom: geom.geom_type if geom else None)


geometry_collection_gdf = gdf[gdf['geometry_type'] == 'GeometryCollection']
print(geometry_collection_gdf[['geometry', 'geometry_type']].head())

Empty GeoDataFrame
Columns: [geometry, geometry_type]
Index: []


### Authenticate with Google Earth Engine
Need to log in to EarthEngine (ee.Authenticate()), create a project and then initialize this project via ee.Initialize()

In [8]:
ee.Authenticate()

True

In [9]:
ee.Initialize(project='spring-idiom-398208')

### Calculating Tree Cover
- tree_cover_area_2000
- tree_cover_area_2005
- tree_cover_area_2010
- tree_cover_area_2015
- tree_cover_area_2020

Chunking

In [None]:
chunk_size = 50
chunks = [gdf[i:i + chunk_size] for i in range(0, gdf.shape[0], chunk_size)]

GLAD Landcover (https://glad.umd.edu/dataset/GLCLUC2020)

In [None]:
landmask = ee.Image("projects/glad/OceanMask").lte(1)
landCover = ee.Image('projects/glad/GLCLU2020/v2/LCLUC_2020').updateMask(landmask)

Masking land cover image to only include class codes of interest for tree cover

In [None]:
classCodes = [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
              125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148]
maskedLandCover = landCover.remap(classCodes, classCodes, 255)

Calculating area for each class list in the codes

In [None]:
output_csv_path = '../midsave/tree_cover.csv'
tree_cover = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON
    gdf_json_chunk = chunk.__geo_interface__
    
    valid_features = []
    for feature in gdf_json_chunk['features']:
        try:
            # Attempt to convert each feature individually
            ee_feature = geemap.geojson_to_ee(feature)
            valid_features.append(ee_feature)
        except Exception as geom_error:
            print(f"Problematic geometry in chunk {i + 1}: {feature['geometry']} - {geom_error}")

    if not valid_features:
        print(f"No valid features in chunk {i + 1}, skipping...")
        continue

    try:
        # Creating a FeatureCollection from valid features
        fc_chunk = ee.FeatureCollection(valid_features)
        
        # Mapping the area calculation function over the FeatureCollection
        area_results_chunk = fc_chunk.map(lambda feature: calculate_area(feature, classCodes, maskedLandCover))
        temp_chunk_df = pd.DataFrame([feature['properties'] for feature in area_results_chunk.getInfo()['features']])
        temp_chunk_df.rename(columns={'cover_area_2020': 'tree_cover_area_2020'}, inplace=True)
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    tree_cover = pd.concat([tree_cover, temp_chunk_df], ignore_index=True)

    # Saving combined results to the output CSV
    tree_cover.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

### Calculating other land cover
- permanent_water
- short_vegetation_after_tree_loss
- cropland_loss_to_tree
- cropland_gain_from_trees

In [None]:
classCodes = [208, 240, 248, 245]
classesOfInterest = ["permanent_water", "short_vegetation_after_tree_loss", "cropland_loss_to_tree", "cropland_gain_from_trees"]
maskedLandCover = landCover.remap(classCodes, classCodes, 255)

In [None]:
output_csv_path = '../midsave/other_land_cover.csv'
other_land_cover = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON and then Earth Engine FeatureCollection
    gdf_json_chunk = chunk.__geo_interface__
    
    try:
        # Converting to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the area calculation function over the FeatureCollection
        area_results_chunk = fc_chunk.map(lambda feature: calculate_area(feature, classCodes, maskedLandCover))
        if area_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in area_results_chunk.getInfo()['features']])
            temp_chunk_df.rename(columns={'cover_area_2020': 'other_land_cover_area_2020'}, inplace=True)
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    other_land_cover = pd.concat([other_land_cover, temp_chunk_df], ignore_index=True)

    other_land_cover.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

### Calculate built-area shares

Loading the Earth Engine built area image for 2018

In [None]:
builtImage = ee.Image("JRC/GHSL/P2023A/GHS_BUILT_C/2018").select('built_characteristics')

In [None]:
output_csv_path = '../midsave/built_area_cover.csv'
built_area_cover = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing built area for chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON and then Earth Engine FeatureCollection
    gdf_json_chunk = chunk.__geo_interface__

    try:
        # Converting the GeoJSON chunk to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the built area calculation function over the FeatureCollection
        built_area_results_chunk = fc_chunk.map(lambda feature: calculate_built_area(feature, builtImage))
        if built_area_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in built_area_results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    built_area_cover = pd.concat([built_area_cover, temp_chunk_df], ignore_index=True)

    built_area_cover.to_csv(output_csv_path, index=False)

print("All chunks processed, built area results saved to:", output_csv_path)

### Calculating road network cover
- total_road_length_km

Merging GEE road datasets

In [None]:
roadsAfrica = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-africa')
roadsAmericas = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-americas')
roadsAsia = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-asia')
roadsEurope = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-europe')
roadsOceaniaEast = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-oceania-east')
roadsOceaniaWest = ee.FeatureCollection('projects/ee-forest-monitoring/assets/gROADS-v1-oceania-west')

roads = roadsAfrica.merge(roadsAmericas).merge(roadsAsia).merge(roadsEurope).merge(roadsOceaniaEast).merge(roadsOceaniaWest)

In [None]:
output_csv_path = '../midsave/road_length.csv'
road_length = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing road length for chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        # Converting the GeoJSON chunk to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the road length calculation function over the FeatureCollection
        road_length_results_chunk = fc_chunk.map(lambda feature: calculate_road_length(feature, roads))
        if road_length_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in road_length_results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    road_length = pd.concat([road_length, temp_chunk_df], ignore_index=True)

    road_length.to_csv(output_csv_path, index=False)

print("All chunks processed, road length results saved to:", output_csv_path)

### Calculating forest loss
- loss_pre_5
- loss_post_3
- loss_post_5

Loading the Global Forest Change 2023 dataset

In [None]:
gfc2017 = ee.Image('UMD/hansen/global_forest_change_2023_v1_11')

In [None]:
output_csv_path = '../midsave/forest_loss.csv'
forest_loss = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing forest loss for chunk {i + 1}/{len(chunks)}")

    # Converting the current GeoDataFrame chunk to GeoJSON
    gdf_json_chunk = chunk.__geo_interface__

    try:
        # Converting GeoJSON chunk to Earth Engine FeatureCollection
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        # Mapping the forest loss calculation function over the FeatureCollection
        loss_results_chunk = fc_chunk.map(lambda feature: calculate_forest_loss(feature, gfc2017))
        if loss_results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in loss_results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    forest_loss = pd.concat([forest_loss, temp_chunk_df], ignore_index=True)

print("All chunks processed, forest loss results saved to:", output_csv_path)

#### Generating the loss columns

In [None]:
temp = (forest_loss['groups']
               .explode()
               .dropna()
               .reset_index()
               .rename(columns = {'index':'site_id_created'})
               .reset_index())

In [None]:
temp = (pd.json_normalize(temp['groups']).reset_index()
               .merge(temp[['index','site_id_created']], on = 'index', how = 'left').drop(columns = ['index']))

In [None]:
temp['year'] = 2000 + temp['group']

In [None]:
forest_loss = temp.pivot(index=['site_id_created'], columns='year', values='sum')
forest_loss.columns = [col for col in forest_loss.columns]
#forest_loss.columns = [f'forest_loss_{col}' for col in forest_loss.columns]
forest_loss.reset_index(inplace = True)

Add planting date information

In [None]:
forest_loss = forest_loss.merge(df[['site_id_created','planting_date_reported']], on = 'site_id_created', how = 'left')

In [None]:
def calculate_loss_pre_5(row):
    try:
        years = [int(row['planting_date_reported'] - i) for i in range(1, 6)]
        losses = [row[year] for year in years if year in forest_loss.columns]
        return np.nanmean(losses) if losses else np.nan
    except Exception as e:
        return np.nan

def calculate_loss_post_3(row):
    try:
        years = [int(row['planting_date_reported'] + i) for i in range(1, 4)]
        losses = [row[year] for year in years if year in forest_loss.columns]
        return np.nanmean(losses) if losses else np.nan
    except Exception as e:
        return np.nan

def calculate_loss_post_5(row):
    try:
        years = [int(row['planting_date_reported'] + i) for i in range(1, 6)]
        losses = [row[year] for year in years if year in forest_loss.columns]
        return np.nanmean(losses) if losses else np.nan
    except Exception as e:
        return np.nan


forest_loss['loss_pre_5'] = forest_loss.apply(calculate_loss_pre_5, axis=1)
forest_loss['loss_post_3'] = forest_loss.apply(calculate_loss_post_3, axis=1)
forest_loss['loss_post_5'] = forest_loss.apply(calculate_loss_post_5, axis=1)

In [None]:
forest_loss.head(100)

In [None]:
forest_loss.drop(columns = [year for year in range(2000, 2024)] + ['planting_date_reported'], errors = 'ignore', inplace = True)
forest_loss.head(1)

In [None]:
forest_loss.to_csv(output_csv_path, index=False)

### Calculating elevation and slope

Loading the Digital Elevations Model (DEM) dataset

In [None]:
dataset = ee.Image('USGS/SRTMGL1_003')
elevation = dataset.select('elevation')
slope = ee.Terrain.slope(elevation)

In [None]:
output_csv_path = '../midsave/elevation_slope.csv'
elevation_slope = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    # Converting GeoDataFrame chunk to GeoJSON and then to Earth Engine FeatureCollection
    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        results_chunk = fc_chunk.map(lambda feature: calculate_elevation_and_slope(feature, elevation, slope))
        if results_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in results_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    elevation_slope = pd.concat([elevation_slope, temp_chunk_df], ignore_index=True)

    elevation_slope.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

### Calculate NDVI per month

Loading Sentinel-2 dataset

In [10]:
chunk_size = 500
chunks = [gdf[i:i + chunk_size] for i in range(0, gdf.shape[0], chunk_size)]
S2 = ee.ImageCollection('COPERNICUS/S2_HARMONIZED') \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30))
output_csv_path = '../midsave/ndvi_top3.csv'
ndvi_monthly = pd.DataFrame()
months = list(range(1, 13))

while months:
    try:
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:  # Adjust max_workers based on your system
            futures = {executor.submit(process_month, month, chunks, S2): month for month in months}
            
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(months)):
                month_result = future.result()
                ndvi_monthly = pd.concat([ndvi_monthly, month_result], ignore_index=True)
        
        # If all months are processed successfully, break the loop
        break
    except Exception as e:
        print(f"Error: {e}")
        if 'month' in ndvi_monthly.columns:
            months = list(set(months) - set(ndvi_monthly.month.unique().tolist()))
        else:
            months = list(set(months))
        continue

# Saving the results to a CSV file
ndvi_monthly.to_csv(output_csv_path, index=False)

Processing chunk 1/2459 for month 1...
Processing chunk 1/2459 for month 2...
Processing chunk 1/2459 for month 3...
Processing chunk 1/2459 for month 4...


  0%|          | 0/12 [00:00<?, ?it/s]

In [None]:


while months:
    try:
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = {executor.submit(process_month, month, chunks, S2): month for month in months}
            
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(months)):
                month_result = future.result()
                ndvi_monthly = pd.concat([ndvi_monthly, month_result], ignore_index=True)
        
        # If all months are processed successfully, break the loop
        break
    except Exception as e:
        print(f"Error: {e}")
        if 'month' in ndvi_monthly.columns:
            months = list(set(months) - set(ndvi_monthly.month.unique().tolist()))
        else:
            months = list(set(months))
        continue

# Save the results to a CSV file
ndvi_monthly.to_csv(output_csv_path, index=False)

Select three months with the highest NDVI values

In [None]:
ndvi_top3 = (ndvi_monthly
             .groupby('site_id_created', group_keys=False)[['site_id_created', 'month', 'mean']]
             .apply(lambda x: x.nlargest(3, 'mean'))
             .rename(columns = {'mean':'ndvi_monthly_mean'})
             .reset_index(drop = True))

In [None]:
ndvi_top3 

In [None]:
ndvi_top3.to_csv(output_csv_path, index=False)

### SAVI(Soil Adjust Vegetation Index)

Before SAVI ensure Top_Three_NDVI_Months and planting dates are added to df data (After creating the column Top_Three_Ndvi_months rerun cell 5 where chunks are processed to update and include the column in the chunk before running the Shadow index cell)

In [None]:
gdf_si = (gdf
       .merge(df[['site_id_created', 'planting_date_reported']], on = 'site_id_created', how = 'left')
       .merge(ndvi_top3[['site_id_created', 'month', 'ndvi_monthly_mean']], on = 'site_id_created', how = 'left')
       .dropna()
       .reset_index(drop = True))
gdf_si['planting_date_reported'] = gdf_si['planting_date_reported'].astype(int)
gdf_si['month'] = gdf_si['month'].astype(int)

In [None]:
gdf_si

In [None]:
chunk_size = 10
chunks = [gdf_si[i:i + chunk_size] for i in range(0, gdf_si.shape[0], chunk_size)]

In [None]:
output_csv_path = '../midsave/savi_index.csv'
savi_index = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        savi_index_chunk = fc_chunk.map(lambda feature: get_savi_for_month(feature, S2))
        if savi_index_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in savi_index_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    savi_index = pd.concat([savi_index, temp_chunk_df], ignore_index=True)

savi_index_grouped = savi_index.groupby(['site_id_created'])['savi_index'].mean().reset_index()
savi_index_grouped.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

In [None]:
savi_index_grouped

### NDVI
- atplanting
- 1 year after planting
- 2 years after planting
- 5 years after planting

In [None]:
output_csv_path = '../midsave/ndvi.csv'
ndvi = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        ndvi_chunk = fc_chunk.map(lambda feature: get_ndvi_for_month(feature, S2))
        if ndvi_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in ndvi_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    ndvi = pd.concat([ndvi, temp_chunk_df], ignore_index=True)

ndvi_grouped = ndvi.groupby(['site_id_created'])['ndvi'].mean().reset_index()
ndvi_grouped.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

In [None]:
ndvi_monthly = pd.read_csv('../midsave/ndvi.csv')

ndvi_monthly.tail()

### NDRE

In [None]:
output_csv_path = '../midsave/ndre.csv'
ndre = pd.DataFrame()

In [None]:
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i + 1}/{len(chunks)}")

    gdf_json_chunk = chunk.__geo_interface__

    try:
        fc_chunk = geemap.geojson_to_ee(gdf_json_chunk)
    except Exception as e:
        print(f"Error converting chunk {i + 1} to Earth Engine FeatureCollection: {e}")
        continue

    try:
        ndre_chunk = fc_chunk.map(lambda feature: get_ndre_for_month(feature, S2))
        if ndre_chunk:
            temp_chunk_df = pd.DataFrame([feature['properties'] for feature in ndre_chunk.getInfo()['features']])
        else:
            temp_chunk_df = pd.DataFrame()
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")
        continue

    # Appending to combined DataFrame
    ndre = pd.concat([ndre, temp_chunk_df], ignore_index=True)

ndre_grouped = ndre.groupby(['site_id_created'])['ndre'].mean().reset_index()
ndre_grouped.to_csv(output_csv_path, index=False)

print("All chunks processed, combined results saved to:", output_csv_path)

In [None]:
ndvi_monthly = pd.read_csv('../midsave/ndre.csv')

ndvi_monthly

### Combine them

In [None]:
df_combined = (df
               .merge(tree_cover, on = 'site_id_created', how = 'left')
               .merge(other_land_cover, on = 'site_id_created', how = 'left')
               .merge(built_area_cover, on = 'site_id_created', how = 'left')
               .merge(road_length, on = 'site_id_created', how = 'left')
               .merge(forest_loss, on = 'site_id_created', how = 'left')
               .merge(elevation_slope, on = 'site_id_created', how = 'left')
               .merge(ndvi_top3, on = 'site_id_created', how = 'left')
               .merge(savi_index_grouped, on = 'site_id_created', how = 'left')
               .merge(ndvi_grouped, on = 'site_id_created', how = 'left')
               .merge(ndre_grouped, on = 'site_id_created', how = 'left'))

In [None]:
df_combined.info()