In [3]:
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.cluster import DBSCAN
from tqdm import tqdm
import matplotlib.pyplot as plt

## Reading Reforestation Projects file after nesting filtering,Adding description and url from VERRA

In [None]:
projects = gpd.read_file('/updated_with_description_reforestation_projects_with_ndvi.gpkg')

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd

df_reforestation=filtered_projects

country_counts = df_reforestation.groupby('country').size().reset_index(name='counts')

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


world_with_counts = world.merge(country_counts, how="left", left_on="name", right_on="country")


world_with_counts['counts'] = world_with_counts['counts'].fillna(0)


fig, ax = plt.subplots(1, 1, figsize=(15, 10))
world_with_counts.plot(column='counts', ax=ax, legend=True,
                       legend_kwds={'label': "Count of Reforestation Sites by Country",
                                    'orientation': "horizontal"},
                       missing_kwds={"color": "lightgrey", "label": "No data"})
plt.show()

In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import shape
import plotly.express as px
import json

def convert_to_geometry(geojson_input):
    if geojson_input == '{}' or geojson_input is None:
        return None
    try:
        if isinstance(geojson_input, dict): 
            return shape(geojson_input)
        else:
            geojson_obj = json.loads(geojson_input) 
            return shape(geojson_obj)
    except (json.JSONDecodeError, TypeError):
        return None

df_reforestation['geometry'] = df_reforestation['geometry_reported'].apply(convert_to_geometry)
gdf_sites = gpd.GeoDataFrame(df_reforestation, geometry='geometry')




world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


gdf_sites_with_country = gpd.sjoin(gdf_sites, world, how="left", op='intersects')


country_counts_from_sites = gdf_sites_with_country.groupby('name').size().reset_index(name='counts')


country_counts = country_counts.merge(country_counts_from_sites, how="outer", left_on="country", right_on="name")
country_counts['counts'] = country_counts[['counts_x', 'counts_y']].sum(axis=1)
country_counts.drop(['counts_x', 'counts_y', 'name'], axis=1, inplace=True)

world_with_counts = world.merge(country_counts, how="left", left_on="name", right_on="country")


world_with_counts['counts'] = world_with_counts['counts'].fillna(0)


fig = px.choropleth(world_with_counts,
                    locations="iso_a3",
                    color="counts",
                    hover_name="name",
                    color_continuous_scale=px.colors.sequential.Plasma,
                    title="Global Reforestation Sites Count")

fig.show()

In [None]:
# Sort country_counts by 'counts' in descending order and print the top 10
top_10_countries = country_counts.sort_values(by='counts', ascending=False).head(10)
print(top_10_countries)

In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import shape
import plotly.express as px
import json

def convert_to_geometry(geojson_input):
    if geojson_input == '{}' or geojson_input is None:
        return None
    try:
        if isinstance(geojson_input, dict):  
            return shape(geojson_input)
        else:
            geojson_obj = json.loads(geojson_input)  
            return shape(geojson_obj)
    except (json.JSONDecodeError, TypeError):
        return None


df_reforestation['geometry'] = df_reforestation['geometry_reported'].apply(convert_to_geometry)
gdf_sites = gpd.GeoDataFrame(df_reforestation, geometry='geometry')


gdf_sites.crs = "EPSG:4326"

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


gdf_sites_with_country = gpd.sjoin(gdf_sites, world, how="left", predicate='intersects')


gdf_sites_with_country['area_km2'] = gdf_sites_with_country.to_crs(epsg=3395).area / 10**6


country_area_covered = gdf_sites_with_country.groupby('name')['area_km2'].sum().reset_index(name='total_area_km2')

world_with_area_covered = world.merge(country_area_covered, how="left", left_on="name", right_on="name")


world_with_area_covered['total_area_km2'] = world_with_area_covered['total_area_km2'].fillna(0)


fig = px.choropleth(world_with_area_covered,
                    locations="iso_a3",
                    color="total_area_km2",
                    hover_name="name",
                    color_continuous_scale=px.colors.sequential.Viridis,
                    title="Global Reforestation Area Covered (km²)")

fig.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go


fig = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3], 
                    specs=[[{"type": "choropleth"}, {"type": "table"}]])

fig.add_trace(
    px.choropleth(world_with_area_covered,
                  locations="iso_a3",
                  color="total_area_km2",
                  hover_name="name",
                  color_continuous_scale=px.colors.sequential.Viridis).data[0],
    row=1, col=1
)


table_data = world_with_area_covered[['name', 'total_area_km2']].sort_values(by='total_area_km2', ascending=False)

fig.add_trace(
    go.Table(
        header=dict(values=["Country", "Total Area Covered (km²)"],
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[table_data.name, table_data.total_area_km2],
                   fill_color='lavender',
                   align='left')),
    row=1, col=2
)


fig.update_layout(
    title_text="Global Reforestation Area Covered (km²) with Data Table",
    showlegend=False
)

fig.show()

### Clustering the polygons

In [1]:
import os
import json
from sklearn.cluster import DBSCAN
from shapely.geometry import shape
import numpy as np

def load_and_group_features(geojson_file):
    groups = {}
    with open(geojson_file, 'r') as f:
        data = json.load(f)
    features = data['features'] if data['type'] == 'FeatureCollection' else [data]
    for feature in features:
        properties = feature['properties']
        planting_date = properties.get('planting_date_reported')
        ndvi_months = json.dumps(properties.get('Top_Three_NDVI_Months', '[7]'))  
        key = (planting_date, ndvi_months)
        if key not in groups:
            groups[key] = []
        groups[key].append(feature)
    return groups

def calculate_centroids(features):
    centroids = []
    for feature in features:
        polygon = shape(feature['geometry'])
        centroids.append(list(polygon.centroid.coords)[0])
    return centroids

def cluster_features(features, eps=0.1, min_samples=2):
    centroids = calculate_centroids(features)
    db = DBSCAN(eps=eps, min_samples=min_samples, metric='haversine').fit(np.radians(centroids))
    num_clusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
    clusters = {}
    for label, feature in zip(db.labels_, features):
        if label not in clusters:
            clusters[label] = []
        clusters[label].append(feature)
    return clusters, num_clusters

def group_and_cluster_features(geojson_file):
    grouped_features = load_and_group_features(geojson_file)
    clustered_groups = {}
    total_clusters = 0
    for key, features in grouped_features.items():
        clustered, num_clusters = cluster_features(features)
        clustered_groups[key] = clustered
        total_clusters += num_clusters
    return clustered_groups, total_clusters


geojson_file = 'reforestation_projects_with_ndvi.geojson'

clustered_features, total_clusters = group_and_cluster_features(geojson_file)
print(f"Total clusters created: {total_clusters}")

Total clusters created: 11065


In [7]:
import json
import numpy as np
from shapely.geometry import mapping, box, shape

class NumpyEncoder(json.JSONEncoder):
    """ Custom encoder for numpy data types """
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyEncoder, self).default(obj)

def add_bounding_box_to_clusters(clustered_groups):
    geojson = {
        "type": "FeatureCollection",
        "features": []
    }
    for key, clusters in clustered_groups.items():
        for cluster_id, features in clusters.items():
            if cluster_id == -1:  # Skip noise if using DBSCAN
                continue
            minx, miny, maxx, maxy = float('inf'), float('inf'), float('-inf'), float('-inf')
            for feature in features:
                polygon = shape(feature['geometry'])
                minx_, miny_, maxx_, maxy_ = polygon.bounds
                minx, miny = min(minx, minx_), min(miny, miny_)
                maxx, maxy = max(maxx, maxx_), max(maxy, maxy_)
            bounding_box = box(minx, miny, maxx, maxy)
            geojson['features'].append({
                "type": "Feature",
                "properties": {"cluster_id": int(cluster_id)}, 
                "geometry": mapping(bounding_box)
            })
    return geojson


clustered_geojson = add_bounding_box_to_clusters(clustered_features)

output_file = '/home/idisc02/Forest_Monitoring/src/clustered_geojson.geojson'
with open(output_file, 'w') as f:
    json.dump(clustered_geojson, f, cls=NumpyEncoder)  

print(f"Clustered GeoJSON file created: {output_file}")

Clustered GeoJSON file created: /home/idisc02/Forest_Monitoring/src/clustered_geojson.geojson


### Intersecting

In [11]:
projects.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 772494 entries, 0 to 772493
Data columns (total 34 columns):
 #   Column                            Non-Null Count   Dtype   
---  ------                            --------------   -----   
 0   country                           767262 non-null  object  
 1   description_reported              2292 non-null    object  
 2   host_name                         772494 non-null  object  
 3   planting_date_reported            738775 non-null  object  
 4   project_id_reported               771302 non-null  object  
 5   site_id_reported                  772069 non-null  object  
 6   site_sqkm                         772494 non-null  float64 
 7   survival_rate_reported            504 non-null     float64 
 8   trees_planted_reported            9891 non-null    float64 
 9   url                               772494 non-null  object  
 10  geometry_reported                 772494 non-null  object  
 11  Top_Three_NDVI_Months          

### Descriptions_extraction

# clipping Images After download

In [None]:
import os
import shutil

src_dir = "src/clustered_geojson/at_planting"


for root, dirs, files in os.walk(src_dir):
    for file in files:
       
        if file.endswith(".tif"):
         
            file_path = os.path.join(root, file)
            
            dest_path = os.path.join(src_dir, file)
            # Move the file to src_dir
            shutil.move(file_path, dest_path)
            print(f"Moved: {file_path} -> {dest_path}")

print("All .tif images have been moved to the source directory.")

In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import box


polygons = gpd.read_file(geojson_file)

polygons = polygons.to_crs("EPSG:32647")


image_path = "src/clustered_geojson/0e2f528d-de7e-48de-b932-86ab910fe61e/PSScene/20220704_034934_65_2474_3B_Visual_clip.tif"


with rasterio.open(image_path) as src:
    raster_bounds = box(*src.bounds)
    

    for index, row in polygons.iterrows():
    
        if not row.geometry.intersects(raster_bounds):
            print(f"Polygon {row['site_id_reported']} does not intersect with the raster. Skipping.")
            continue
    
        out_image, out_transform = mask(src, [row.geometry], crop=True)
        
      
        out_meta = src.meta.copy()
        out_meta.update({
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform
        })
        
       
        output_filename = f"input/clipped_images/clipped_image_{row['site_id_reported']}.tif"
        
       
        with rasterio.open(output_filename, "w", **out_meta) as dest:
            dest.write(out_image)

In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import box, Polygon
import os
import glob


polygons = gpd.read_file(geojson_file)

# Validate polygons and filter out invalid ones
polygons = polygons[polygons.geometry.is_valid]

raster_folder = "src/clustered_geojson/image_atplanting"
raster_files = glob.glob(os.path.join(raster_folder, "*.tif"))

for image_path in raster_files:
    with rasterio.open(image_path) as src:
        raster_crs = src.crs
        polygons_reprojected = polygons.to_crs(raster_crs)
        
        raster_bounds = box(*src.bounds)
        for index, row in polygons_reprojected.iterrows():
            if not row.geometry.intersects(raster_bounds):
                print(f"Polygon {row['site_id_reported']} does not intersect with the raster. Skipping.")
                continue

            if isinstance(row.geometry, Polygon):
                try:
                    out_image, out_transform = mask(src, [row.geometry], crop=True)
                except ValueError as e:
                    print(f"Error processing polygon {row['site_id_reported']}: {e}")
                    continue

                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })

                raster_filename = os.path.basename(image_path).split('.')[0]
                output_filename = f"src/clustered_geojson/image_atplanting/clipped_image/clipped_image_{row['site_id_reported']}_{raster_filename}.tif"

                with rasterio.open(output_filename, "w", **out_meta) as dest:
                    dest.write(out_image)
            else:
                print(f"Skipping non-polygon geometry for {row['site_id_reported']}.")

### Creating Buffer 

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

geojson_path = '/home/idisc02/Forest_Monitoring/src/df_reforestation.geojson'
gdf = gpd.read_file(geojson_path)


print(f"Original CRS: {gdf.crs}")


gdf = gdf.to_crs(epsg=3395)  

buffer_radius = 500  # Buffer with  radius in meters
gdf_buffered = gdf.copy()
gdf_buffered['geometry'] = gdf['geometry'].buffer(buffer_radius)


gdf_buffered = gdf_buffered.to_crs(gdf.crs)


output_path = '/home/idisc02/Forest_Monitoring/buffered_file.geojson'
gdf_buffered.to_file(output_path, driver='GeoJSON')

# Visualize the buffered polygons
fig, ax = plt.subplots()
gdf_buffered.plot(ax=ax, color='blue', edgecolor='black')
plt.show()

In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import box, Polygon
import os
import glob


polygons = gpd.read_file(output_path)

# Validate polygons and filter out invalid ones
polygons = polygons[polygons.geometry.is_valid]

raster_folder = "src/clustered_geojson/image_atplanting"
raster_files = glob.glob(os.path.join(raster_folder, "*.tif"))

for image_path in raster_files:
    with rasterio.open(image_path) as src:
        raster_crs = src.crs
        polygons_reprojected = polygons.to_crs(raster_crs)
        
        raster_bounds = box(*src.bounds)
        for index, row in polygons_reprojected.iterrows():
            if not row.geometry.intersects(raster_bounds):
                print(f"Polygon {row['site_id_reported']} does not intersect with the raster. Skipping.")
                continue

            if isinstance(row.geometry, Polygon):
                try:
                    out_image, out_transform = mask(src, [row.geometry], crop=True)
                except ValueError as e:
                    print(f"Error processing polygon {row['site_id_reported']}: {e}")
                    continue

                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })

                raster_filename = os.path.basename(image_path).split('.')[0]
                output_filename = f"src/clustered_geojson/image_atplanting/clipped_image/buffer_clipped_image_{row['site_id_reported']}_{raster_filename}.tif"

                with rasterio.open(output_filename, "w", **out_meta) as dest:
                    dest.write(out_image)
            else:
                print(f"Skipping non-polygon geometry for {row['site_id_reported']}.")

In [None]:
import os
import json
from sklearn.cluster import DBSCAN
from shapely.geometry import shape
import numpy as np

def load_and_group_features(geojson_file):
    groups = {}
    with open(geojson_file, 'r') as f:
        data = json.load(f)
    features = data['features'] if data['type'] == 'FeatureCollection' else [data]
    for feature in features:
        properties = feature['properties']
        planting_date = properties.get('planting_date_reported')
        ndvi_months = json.dumps(properties.get('Top_Three_NDVI_Months', '[7]'))  
        key = (planting_date, ndvi_months)
        if key not in groups:
            groups[key] = []
        groups[key].append(feature)
    return groups

def calculate_centroids(features):
    centroids = []
    for feature in features:
        polygon = shape(feature['geometry'])
        centroids.append(list(polygon.centroid.coords)[0])
    return centroids

def cluster_features(features, eps=0.1, min_samples=2):
    centroids = calculate_centroids(features)
    db = DBSCAN(eps=eps, min_samples=min_samples, metric='haversine').fit(np.radians(centroids))
    num_clusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
    clusters = {}
    for label, feature in zip(db.labels_, features):
        if label not in clusters:
            clusters[label] = []
        clusters[label].append(feature)
    return clusters, num_clusters

def group_and_cluster_features(geojson_file):
    grouped_features = load_and_group_features(geojson_file)
    clustered_groups = {}
    total_clusters = 0
    for key, features in grouped_features.items():
        clustered, num_clusters = cluster_features(features)
        clustered_groups[key] = clustered
        total_clusters += num_clusters
    return clustered_groups, total_clusters


geojson_file = '/home/idisc02/Forest_Monitoring/src/df_reforestation.geojson'

clustered_features, total_clusters = group_and_cluster_features(geojson_file)
print(f"Total clusters created: {total_clusters}")