In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from geemap import ml
from sklearn.ensemble import RandomForestClassifier

import geemap, ee
import geopandas as gpd

from geopandas.geoseries import *
from shapely.geometry import *


import glob
import os
import warnings
warnings.filterwarnings("ignore")
from matplotlib.pyplot import Line2D
import gc

from datetime import datetime



In [4]:
try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()

In [20]:

def mappy(x):
    return ee.String(x).replace('#', '\n', 'g')


def decode_qamask(img: ee.Image) -> ee.Image:
    '''
    Args
    - img: ee.Image, Sentinel 2 image containing 'pixel_qa' band
    Returns
    - masks: ee.Image, 
    Pixel QA Bit Flags
    Bit  Attribute
    0    Fill
    10   Cloud
    11   Cirrus
    '''
    qa = img.select('QA60')
    cloud = qa.bitwiseAnd(1024).eq(0)  # 0 = cloud, 1 = not cloud
    cloud = cloud.updateMask(cloud).rename(['pxqa_cloud'])
    
    cirrus = qa.bitwiseAnd(2048).eq(0)  # 0 = cloud, 1 = not cloud
    cirrus = cirrus.updateMask(cirrus).rename(['pxqa_cirrus'])
    masks = ee.Image.cat([cloud, cirrus])
    return masks


def mask_qaclear(img: ee.Image) -> ee.Image:
    '''
    Args
    - img: ee.Image
    Returns
    - img: ee.Image, input image with cloud, cirrus
        pixels masked out
    '''
    qam = decode_qamask(img)
    cloud_mask = qam.select('pxqa_cloud')
    cirrus_mask = qam.select('pxqa_cirrus')
    return img.updateMask(cloud_mask).updateMask(cirrus_mask).divide(10000)

def getS1_collection(START_DATE, END_DATE):
    #Import Sentinel 1 and filter data series:
    s1Collection =  ee.ImageCollection('COPERNICUS/S1_GRD')\
    .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))\
    .filter(ee.Filter.eq('instrumentMode', 'IW'))\
    .filterDate(START_DATE, END_DATE)
    
    return s1Collection

def getS2_collection(START_DATE, END_DATE, cloud_pct = 10):

    s2Collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')\
                      .filterDate(START_DATE, END_DATE)\
                      .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE',cloud_pct))\
                      .map(mask_qaclear)
    
    return s2Collection

def addBands(expr, band_map, name):
    expr_s2 = ee.Image().expression({
        'expression': expr,
      'map': band_map}).rename(name)
    
    return expr_s2

def s1s2_image(START_DATE, END_DATE):

    s1Collection = getS1_collection(START_DATE, END_DATE)
    s2Collection = getS2_collection(START_DATE, END_DATE)
    #Calculate median image of time period
    s1_image = ee.Image(s1Collection.median()) 
    s2 = s2Collection.median()
    
        #### Add S2 Bands
    mTGSI_expr = '(R - B + SWIR2 - NIR) / (R + G + B + SWIR2 + NIR)'  
    mTGSI = s2.expression(mTGSI_expr, 
                          {
                            'R': s2.select('B4'),
                            'G': s2.select('B3'),
                            'B': s2.select('B2'),
                            'NIR': s2.select('B8'),
                            'SWIR2': s2.select('B12'),
                            'SWIR1': s2.select('B11')
                          }).rename('mTGSI')

    BSI_expr = '((RED + SWIR1) - (NIR + BLUE)) / ((RED + SWIR1) + (NIR + BLUE))'
    BSI = s2.expression(BSI_expr, 
                          {
                            'NIR' : s2.select('B8'),
                            'RED' : s2.select('B4'),
                            'SWIR1': s2.select('B11'),
                            'BLUE': s2.select('B2')
                          }).rename('BSI')
    
    
    NDWI_expr = '(GREEN - NIR) / (GREEN + NIR)'
    NDWI = s2.expression(NDWI_expr, 
                          {
                         'NIR' : s2.select('B8'),
                        'GREEN' : s2.select('B3')
                          }).rename('NDWI')

    ### Add Dynamic World
    colFilter = ee.Filter.And(ee.Filter.bounds(ee.Geometry.Point(84.37454, 25.05473)), ee.Filter.date(START_DATE, END_DATE))

    DW = ee.ImageCollection('GOOGLE/DYNAMICWORLD/V1').filter(colFilter).median()

    nameOfBands = DW.bandNames().remove('label')
    # ['water', 'trees', 'grass', 'flooded_vegetation', 'crops', 'shrub_and_scrub', 'built', 'bare', 'snow_and_ice']
    DW = DW.select(nameOfBands)


    #### All bands together
    sentinelComp = s2.addBands(s1_image.select([s1_band, s1_band2])).addBands(mTGSI).addBands(BSI).addBands(NDWI)

    outBands =  ["B2","B3","B4","B8","B8A","B11","B12","VV","VH","mTGSI","BSI","NDWI"]

    finalComp= sentinelComp.select(outBands).addBands(DW)
    
    return finalComp


def get_sgr_feat(ee_img, distid):
    
    df_dist1 = gpd.read_file(f"/data/sand_mining/rivers/districts/india-rivers_multipolygons_{distid}.geojson")
    temp1 = geemap.geopandas_to_ee(df_dist1)

    sand_mask = ee_img.eq(1)
    gravel_mask = ee_img.eq(2)

    reduced = sand_mask.reduceRegion(
                  reducer=ee.Reducer.sum(),
                  geometry=temp1,
                  scale=10, 
                  tileScale = 3
                   )

    sand_reduced = ee.Feature(None, reduced)
    sand_reduced = sand_reduced.set('distid', distid)
    sand_reduced = sand_reduced.set('class', 'sand')

    reduced = gravel_mask.reduceRegion(
                  reducer=ee.Reducer.sum(),
                  geometry=temp1,
                  scale=10, 
                  tileScale = 3
                   )

    gravel_reduced = ee.Feature(None, reduced)
    gravel_reduced = gravel_reduced.set('distid', distid)
    gravel_reduced = gravel_reduced.set('class', 'gravel')

    sgr = ee.FeatureCollection([sand_reduced, gravel_reduced])
    
    return sgr


def get_final_classification(image, model):
    #### SNIC
    size_segmentation = 10

    #Segmentation using a SNIC approach based on the dataset previosly generated
    seeds = ee.Algorithms.Image.Segmentation.seedGrid(size_segmentation)
    snic = ee.Algorithms.Image.Segmentation.SNIC(**{
      'image': image, #our multi-band image with selected bands same as for pixel-based
      'compactness': 0.8,  #allow flexibility in object shape, no need to force compactness
      'connectivity': 8, #use all 8 neighboring pixels in a pixel neighborhood
      'neighborhoodSize': 256, 
      'seeds': seeds
    })

    ####Classification
    predictionBands=snic.bandNames().remove("clusters") 
    renamed_bands = ['B2_median', 'B3_median', 'B4_median', 'B8_median', 'B8A_median', 'B11_median', 'B12_median', 
    'VV_median', 'VH_median', 'mTGSI_median', 'BSI_median', 'NDWI_median', 'water_median', 'trees_median', 'grass_median', 
    'flooded_vegetation_median', 'crops_median', 'shrub_and_scrub_median', 'built_median', 'bare_median', 'snow_and_ice_median']

    snic = snic.select(predictionBands, renamed_bands)

    classification = snic.classify(model)
    
    return classification


def calculateMean(image):
    mean = image.reduceRegion(**{
    'reducer': ee.Reducer.mean(),
    'geometry': CURRENT_GEOMETRY,
    'scale': 10,  #Adjust the scale according to your needs
    'maxPixels': 1e13, 
    'tileScale':3
      })
    return image.set('mean_value', mean.get('classification'))  #Replace with the actual band name


def get_class_coll(START_YEAR, END_YEAR, clip_geom, class_val = 1):
    print("Started Coll", datetime.now())
    imgs = ee.List([])
    dates = []
    for start, end in list(zip(pd.date_range(f'{START_YEAR}-01-01', f'{END_YEAR}-12-31', freq= 'MS' ), 
        pd.date_range(f'{START_YEAR}-01-01', f'{END_YEAR}-12-31', freq= 'M' ))):
        img = s1s2_image(str(start.date()), str(end.date()))
        hasNoBands = img.bandNames()
        a = hasNoBands.length()
        a = a.getInfo()
        if a == 21:
            img = img.clip(clip_geom)
            img_class = get_final_classification(img, RF)
            mask = img_class.eq(class_val)
            mask = mask.set('ym', start.strftime("%Y-%m"))
            imgs = imgs.add(ee.Image(mask))
            dates.append(start.strftime("%Y-%m"))
            
    img_coll = ee.ImageCollection.fromImages(imgs)
#     print(dates)
    print("Completed", datetime.now())
    
    return img_coll


def convert_img_coll_to_df(imageCollection):
    # # Extract mean values as a list
    mean_list = imageCollection.aggregate_array('mean_value').getInfo()

    # Convert the list to a Pandas DataFrame
    df = pd.DataFrame({'mean_value': mean_list})

    return df


def get_buffer(poly, buffer_m = 1000, tolerance_m = 250):
    seg = poly.to_crs(poly.estimate_utm_crs())
    minx, miny, maxx, maxy = seg.bounds.values[0]
    bbox = gpd.GeoDataFrame(geometry = [Polygon([(minx, maxy), 
                                              (maxx, maxy), 
                                              (maxx, miny), 
                                              (minx, miny)])])
    
    buffered_poly = seg.simplify(tolerance_m).buffer(buffer_m)
    buffered_poly = buffered_poly.clip(bbox)
    buffered_poly = buffered_poly.to_crs(poly.crs).reset_index()
    buffered_poly = buffered_poly.rename(columns = {0:'geometry'})
    return buffered_poly

In [6]:
#### Load Models

fc = ee.FeatureCollection("projects/gee-sand/assets/RF_sklearn_seg10_n1214_d50_msl1_mss2_mf-None_bTrue")
tree_strings = fc.aggregate_array('tree').map(mappy)
RF = ee.Classifier.decisionTreeEnsemble(tree_strings)

In [8]:
##### Define Globals

s1_band = 'VV'
s1_band2 = 'VH'
s1_band3 = 'angle'


#### Note on river and district shapefiles
# India district boundaries: /data/sand_mining/shapefiles
# River polygons clipped to district boundaries: /data/sand_mining/rivers/districts

In [9]:
segments = gpd.read_file("/data/sand_mining/rivers/segments_merged.geojson")
segments['pair_id'] = segments.apply(lambda x: f"{x['osm_id']}-{x['seg_id']}", axis = 1)

In [16]:
pair_ids = segments['pair_id'].unique()

In [None]:

for pair in pair_ids[1:]:
    print(pair)
    seg = segments[segments['pair_id'] == pair]
    buffer_seg = get_buffer(seg)
    
    #Create Feature Collection
    ee_fc = geemap.geopandas_to_ee(buffer_seg)
    CURRENT_GEOMETRY = ee_fc

    #Get sand and gravel
    img_coll_sand = get_class_coll(2018, 2022, CURRENT_GEOMETRY)
    img_coll_gravel = get_class_coll(2018, 2022, CURRENT_GEOMETRY, class_val = 2)

    #Get Means over the Image Collection
    img_coll_fc_sand = img_coll_sand.map(calculateMean)
    img_coll_fc_gravel = img_coll_gravel.map(calculateMean)


    task1 = ee.batch.Export.table.toDrive(
              collection= ee.FeatureCollection(img_coll_fc_sand),
              description= f'{pair}_sand',
              fileFormat='CSV', 
              folder = 'sgr_sand',
            selectors = ['mean_value', 'ym'])

    task2 = ee.batch.Export.table.toDrive(
              collection= ee.FeatureCollection(img_coll_fc_gravel),
              description= f'{pair}_gravel',
              fileFormat='CSV', 
              folder = 'sgr_gravel',
            selectors = ['mean_value', 'ym']
            )

    task1.start()
    task2.start()

1993017-7920
Started Coll 2023-12-16 20:30:40.419851
Completed 2023-12-16 20:31:06.232527
Started Coll 2023-12-16 20:31:06.232748
Completed 2023-12-16 20:31:30.837514
3031475-7920
Started Coll 2023-12-16 20:31:33.845574
Completed 2023-12-16 20:31:58.558055
Started Coll 2023-12-16 20:31:58.558145
Completed 2023-12-16 20:32:23.206345
9050989-7920
Started Coll 2023-12-16 20:32:26.323277
Completed 2023-12-16 20:32:50.485382
Started Coll 2023-12-16 20:32:50.485495


Started Sand Coll 2023-12-16 20:10:22.479364
Completed 2023-12-16 20:10:48.578896
Started Sand Coll 2023-12-16 20:10:48.579236
Completed 2023-12-16 20:11:14.906866


In [14]:
df = convert_img_coll_to_df(img_coll_fc_sand)

In [64]:
tmp1 = img_coll.first()

In [2]:
ee

NameError: name 'ee' is not defined