In [65]:
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.cluster import DBSCAN
from tqdm import tqdm
import matplotlib.pyplot as plt

Reading Reforestation Projects file

In [66]:
projects = gpd.read_file('/home/idisc02/Forest_Monitoring/src/df_reforestation.geojson')

Transform to projected CRS

In [69]:
projects= projects.to_crs('EPSG:3857')
gdf=projects
gdf["geometry"]

0        POLYGON ((-8823668.750 -480873.564, -8823652.3...
1        POLYGON ((-8825786.322 -480927.326, -8825779.3...
2        POLYGON ((-64340.412 4971882.613, -64335.445 4...
3        POLYGON ((-64449.123 4968025.475, -64418.843 4...
4        POLYGON ((-9707836.906 1737846.257, -9704014.7...
                               ...                        
39532    POLYGON ((4452114.020 34114.710, 4452149.265 3...
39533    POLYGON ((4451668.266 33510.102, 4451661.317 3...
39534    POLYGON ((4452297.685 34004.013, 4452353.274 3...
39535    POLYGON ((4452264.917 34492.957, 4452311.580 3...
39536    MULTIPOLYGON (((4451896.604 34624.010, 4451814...
Name: geometry, Length: 39537, dtype: geometry

In [79]:
import os
import json
from sklearn.cluster import DBSCAN
from shapely.geometry import shape
import numpy as np

def load_and_group_features(geojson_file):
    groups = {}
    with open(geojson_file, 'r') as f:
        data = json.load(f)
    features = data['features'] if data['type'] == 'FeatureCollection' else [data]
    for feature in features:
        properties = feature['properties']
        planting_date = properties.get('planting_date_reported')
        ndvi_months = json.dumps(properties.get('Top_Three_NDVI_Months', '[7]'))  
        key = (planting_date, ndvi_months)
        if key not in groups:
            groups[key] = []
        groups[key].append(feature)
    return groups

def calculate_centroids(features):
    centroids = []
    for feature in features:
        polygon = shape(feature['geometry'])
        centroids.append(list(polygon.centroid.coords)[0])
    return centroids

def cluster_features(features, eps=0.1, min_samples=2):
    centroids = calculate_centroids(features)
    db = DBSCAN(eps=eps, min_samples=min_samples, metric='haversine').fit(np.radians(centroids))
    num_clusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
    clusters = {}
    for label, feature in zip(db.labels_, features):
        if label not in clusters:
            clusters[label] = []
        clusters[label].append(feature)
    return clusters, num_clusters

def group_and_cluster_features(geojson_file):
    grouped_features = load_and_group_features(geojson_file)
    clustered_groups = {}
    total_clusters = 0
    for key, features in grouped_features.items():
        clustered, num_clusters = cluster_features(features)
        clustered_groups[key] = clustered
        total_clusters += num_clusters
    return clustered_groups, total_clusters


geojson_file = '/home/idisc02/Forest_Monitoring/src/df_reforestation.geojson'

clustered_features, total_clusters = group_and_cluster_features(geojson_file)
print(f"Total clusters created: {total_clusters}")

Total clusters created: 2319


In [80]:
import json
import numpy as np
from shapely.geometry import mapping, box, shape

class NumpyEncoder(json.JSONEncoder):
    """ Custom encoder for numpy data types """
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyEncoder, self).default(obj)

def add_bounding_box_to_clusters(clustered_groups):
    geojson = {
        "type": "FeatureCollection",
        "features": []
    }
    for key, clusters in clustered_groups.items():
        for cluster_id, features in clusters.items():
            if cluster_id == -1:  # Skip noise if using DBSCAN
                continue
            minx, miny, maxx, maxy = float('inf'), float('inf'), float('-inf'), float('-inf')
            for feature in features:
                polygon = shape(feature['geometry'])
                minx_, miny_, maxx_, maxy_ = polygon.bounds
                minx, miny = min(minx, minx_), min(miny, miny_)
                maxx, maxy = max(maxx, maxx_), max(maxy, maxy_)
            bounding_box = box(minx, miny, maxx, maxy)
            geojson['features'].append({
                "type": "Feature",
                "properties": {"cluster_id": int(cluster_id)}, 
                "geometry": mapping(bounding_box)
            })
    return geojson


clustered_geojson = add_bounding_box_to_clusters(clustered_features)

output_file = '/home/idisc02/Forest_Monitoring/src/clustered_geojson.gpkg'
with open(output_file, 'w') as f:
    json.dump(clustered_geojson, f, cls=NumpyEncoder)  

print(f"Clustered GeoJSON file created: {output_file}")

Clustered GeoJSON file created: /home/idisc02/Forest_Monitoring/src/clustered_geojson.gpkg
