In [None]:
import pandas as pd
import numpy as np
import ee
import time
import geemap
import os
from google.colab import drive

# --- 1. Mount Drive ---


# --- 2. CONFIGURATION ---
# Path to your ORIGINAL crop CSV (this one has 'DISTRICT' column)
INPUT_ACCURATE_DISTRICTS = '/content/drive/MyDrive/projk/fields_with_districts_cleaned.csv'

# Folder in your Google Drive where the 12 files will be saved
OUTPUT_DRIVE_FOLDER = 'crop' # GEE Folder name

# *** CHANGED ***: New name for the output tasks
TASK_NAME_PREFIX = 'crop_field_time_series_v2'

START_DATE = '2020-06-01'
END_DATE = '2021-06-01'

# --- 3. GEE HELPER FUNCTIONS ---

def prepare_gee_points(file_path):
    """(This is your original function)
    Loads CSV and converts points into a GEE FeatureCollection."""
    print("Loading local CROP field centroids and preparing for GEE...")
    df = pd.read_csv(file_path)
    df.rename(columns={'district': 'DISTRICT'}, inplace=True)
    df = df.dropna(subset=['lon', 'lat', 'location_id'])
    features = []
    for index, row in df.iterrows():
        point = ee.Geometry.Point([row['lon'], row['lat']])
        properties = {'location_id': int(row['location_id']), 'DISTRICT': row['DISTRICT'], 'lon': row['lon'], 'lat': row['lat']}
        features.append(ee.Feature(point, properties))
    return ee.FeatureCollection(features)

# ***
# *** UPDATED FUNCTION (1 of 2) ***
# ***
def calculate_indices(image):
    """Calculates NDVI, NDWI (for water), and BSI (for bare soil)."""

    # NDVI: (NIR - Red) / (NIR + Red)
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')

    # NDWI (Water): (Green - SWIR) / (Green + SWIR)
    # Using B3 (Green) and B11 (SWIR)
    ndwi = image.normalizedDifference(['B3', 'B11']).rename('NDWI')

    # BSI (Bare Soil): ((SWIR + Red) - (NIR + Blue)) / ((SWIR + Red) + (NIR + Blue))
    bsi_numerator = (image.select('B11').add(image.select('B4'))) \
                     .subtract(image.select('B8').add(image.select('B2')))
    bsi_denominator = (image.select('B11').add(image.select('B4'))) \
                       .add(image.select('B8').add(image.select('B2')))

    # Use .toFloat() to handle potential 0/0 and ensure mask is correct
    bsi = bsi_numerator.divide(bsi_denominator).rename('BSI').toFloat()

    # We must add all bands: NDVI, NDWI, and BSI
    return image.addBands(ndvi).addBands(ndwi).addBands(bsi)

# ***
# *** UPDATED FUNCTION (2 of 2) ***
# ***
def process_time_step(date_start, date_end, points):
    """Filters, composites, and extracts features for one month."""

    month_index = pd.to_datetime(date_start).month

    # *** UPDATED ***: We now need 8 bands from S2
    # B2, B3 are needed for the new indices
    S2_BANDS = ['B2', 'B3', 'B4', 'B8', 'B11', 'NDVI', 'NDWI', 'BSI']
    S1_BANDS = ['VV', 'VH']

    # Update empty images to match new band list
    EMPTY_S2 = ee.Image.constant(0).toFloat().updateMask(ee.Image.constant(0)).rename(S2_BANDS)
    EMPTY_S1 = ee.Image.constant(0).toFloat().updateMask(ee.Image.constant(0)).rename(S1_BANDS)

    # --- S2 ---
    # We call calculate_indices, which adds NDVI, NDWI, BSI
    # Then we select ALL the S2 bands we need.
    s2_collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
                      .filterDate(date_start, date_end) \
                      .filterBounds(points) \
                      .map(calculate_indices) \
                      .select(S2_BANDS) # Select our 8 S2 bands

    s2_size = s2_collection.size()
    s2_median = ee.Algorithms.If(s2_size.gt(0), s2_collection.median(), EMPTY_S2)
    s2_median = ee.Image(s2_median)

    # --- S1 ---
    s1_collection = ee.ImageCollection('COPERNICUS/S1_GRD') \
                      .filterDate(date_start, date_end) \
                      .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')) \
                      .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH')) \
                      .filter(ee.Filter.eq('instrumentMode', 'IW')) \
                      .filterBounds(points).select(S1_BANDS)

    s1_size = s1_collection.size()
    s1_median = ee.Algorithms.If(s1_size.gt(0), s1_collection.median(), EMPTY_S1)
    s1_median = ee.Image(s1_median)

    # --- Combine S1 + S2 ---
    composite = s2_median.addBands(s1_median)

    extracted_features = composite.reduceRegions(
        collection=points,
        reducer=ee.Reducer.mean(),
        scale=10
    )

    # *** UPDATED ***: This list now contains 10 band names
    band_names = S2_BANDS + S1_BANDS

    # Create the new monthly names (e.g., S2_NDVI_06, S1_VV_06)
    new_names = []
    for b in band_names:
        if b in S1_BANDS:
            new_names.append(f'S1_{b}_{month_index:02d}')
        else:
            new_names.append(f'S2_{b}_{month_index:02d}')

    def rename_features(feature):
        renamed_feature = feature.select(band_names, new_names, retainGeometry=False)
        # *** IMPORTANT ***: This copies the 'DISTRICT' column, which is correct
        return renamed_feature.copyProperties(feature, ['location_id', 'DISTRICT', 'lon', 'lat', 'system:index'])

    return extracted_features.map(rename_features)

# --- 4. EXECUTION ---

def execute_gee_extraction():
    """Main execution loop for GEE extraction and export."""

    try:
        ee.Initialize(project ="moonlit-state-475112-b5")
    except Exception:
        print("Authenticating GEE...")
        ee.Authenticate()
        ee.Initialize(project ="moonlit-state-475112-b5")

    print("GEE Initialized.")

    points_collection = prepare_gee_points(INPUT_ACCURATE_DISTRICTS)

    dates_full = pd.date_range(start=START_DATE, end=END_DATE, freq='MS', inclusive='both')
    start_dates = dates_full[:-1]
    end_dates = dates_full[1:]
    time_steps = list(zip(start_dates, end_dates))

    print("\n--- 1/3: Starting GEE Time-Series (v2) Feature Calculation for CROP ---")

    for start, end in time_steps:
        date_start_str = start.strftime('%Y-%m-%d')
        date_end_str = end.strftime('%Y-%m-%d')
        print(f"  Calculating features for: {date_start_str} to {date_end_str}")

        try:
            monthly_collection = process_time_step(date_start_str, date_end_str, points_collection)
            month_index = pd.to_datetime(start).month

            # *** CHANGED ***: Use the new task prefix
            export_name = f'{TASK_NAME_PREFIX}_month_{month_index:02d}'

            task = ee.batch.Export.table.toDrive(
                collection=monthly_collection,
                description=export_name,
                folder=OUTPUT_DRIVE_FOLDER,
                fileNamePrefix=export_name,
                fileFormat='CSV'
            )
            task.start()
            print(f"  Task {export_name} scheduled successfully.")

        except Exception as e:
            print(f"  CRITICAL ERROR SCHEDULING TASK for {date_start_str}: {e}. Skipping this month.")

    print("\n---------------------------------------------------------")
    print("SUCCESS: All 'CROP v2' tasks submitted.")
    print("---------------------------------------------------------")

# --- ACTION 1 RUN ---
execute_gee_extraction()

Authenticating GEE...
GEE Initialized.
Loading local CROP field centroids and preparing for GEE...

--- 1/3: Starting GEE Time-Series (v2) Feature Calculation for CROP ---
  Calculating features for: 2020-06-01 to 2020-07-01
  Task crop_field_time_series_v2_month_06 scheduled successfully.
  Calculating features for: 2020-07-01 to 2020-08-01
  Task crop_field_time_series_v2_month_07 scheduled successfully.
  Calculating features for: 2020-08-01 to 2020-09-01
  Task crop_field_time_series_v2_month_08 scheduled successfully.
  Calculating features for: 2020-09-01 to 2020-10-01
  Task crop_field_time_series_v2_month_09 scheduled successfully.
  Calculating features for: 2020-10-01 to 2020-11-01
  Task crop_field_time_series_v2_month_10 scheduled successfully.
  Calculating features for: 2020-11-01 to 2020-12-01
  Task crop_field_time_series_v2_month_11 scheduled successfully.
  Calculating features for: 2020-12-01 to 2021-01-01
  Task crop_field_time_series_v2_month_12 scheduled successfu

In [None]:
import pandas as pd
import numpy as np
import ee
import time
import geemap
import os
from google.colab import drive


# --- 2. CONFIGURATION ---
# Path to your ORIGINAL crop CSV
INPUT_ACCURATE_DISTRICTS = '/content/drive/MyDrive/projk/fields_with_districts_cleaned.csv'


# Folder in your Google Drive where the 12 files will be saved
OUTPUT_DRIVE_FOLDER = 'crop' # GEE Folder name

# *** CHANGED ***: New name for the output tasks
TASK_NAME_PREFIX = 'crop_field_time_series_v2'
# *** DATES FOR 8TH MONTH (AUGUST) ***
# Start of August 2020
START_DATE = '2020-08-01'
# Start of September 2020 (end of August)
END_DATE = '2020-09-01'
MONTH_INDEX = 8 # August

# --- 3. GEE HELPER FUNCTIONS ---

def prepare_gee_points(file_path):
    """(This is your original function for CROP data)
    Loads CSV and converts points into a GEE FeatureCollection."""
    print("Loading local CROP field centroids and preparing for GEE...")
    df = pd.read_csv(file_path)
    df.rename(columns={'district': 'DISTRICT'}, inplace=True)
    df = df.dropna(subset=['lon', 'lat', 'location_id'])
    features = []
    for index, row in df.iterrows():
        point = ee.Geometry.Point([row['lon'], row['lat']])
        properties = {'location_id': int(row['location_id']), 'DISTRICT': row['DISTRICT'], 'lon': row['lon'], 'lat': row['lat']}
        features.append(ee.Feature(point, properties))
    return ee.FeatureCollection(features)

def calculate_indices(image):
    """Calculates NDVI, NDWI (for water), and BSI (for bare soil)."""
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    ndwi = image.normalizedDifference(['B3', 'B11']).rename('NDWI')

    bsi_numerator = (image.select('B11').add(image.select('B4'))) \
                     .subtract(image.select('B8').add(image.select('B2')))
    bsi_denominator = (image.select('B11').add(image.select('B4'))) \
                       .add(image.select('B8').add(image.select('B2')))
    bsi = bsi_numerator.divide(bsi_denominator).rename('BSI').toFloat()

    return image.addBands(ndvi).addBands(ndwi).addBands(bsi)

def process_time_step(date_start, date_end, points):
    """Filters, composites, and extracts features for one month."""

    month_index = pd.to_datetime(date_start).month

    S2_BANDS = ['B2', 'B3', 'B4', 'B8', 'B11', 'NDVI', 'NDWI', 'BSI']
    S1_BANDS = ['VV', 'VH']

    EMPTY_S2 = ee.Image.constant(0).toFloat().updateMask(ee.Image.constant(0)).rename(S2_BANDS)
    EMPTY_S1 = ee.Image.constant(0).toFloat().updateMask(ee.Image.constant(0)).rename(S1_BANDS)

    s2_collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
                      .filterDate(date_start, date_end) \
                      .filterBounds(points) \
                      .map(calculate_indices) \
                      .select(S2_BANDS)

    s2_size = s2_collection.size()
    s2_median = ee.Algorithms.If(s2_size.gt(0), s2_collection.median(), EMPTY_S2)
    s2_median = ee.Image(s2_median)

    s1_collection = ee.ImageCollection('COPERNICUS/S1_GRD') \
                      .filterDate(date_start, date_end) \
                      .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')) \
                      .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH')) \
                      .filter(ee.Filter.eq('instrumentMode', 'IW')) \
                      .filterBounds(points).select(S1_BANDS)

    s1_size = s1_collection.size()
    s1_median = ee.Algorithms.If(s1_size.gt(0), s1_collection.median(), EMPTY_S1)
    s1_median = ee.Image(s1_median)

    composite = s2_median.addBands(s1_median)

    extracted_features = composite.reduceRegions(
        collection=points,
        reducer=ee.Reducer.mean(),
        scale=10
    )

    band_names = S2_BANDS + S1_BANDS
    new_names = []
    for b in band_names:
        if b in S1_BANDS:
            new_names.append(f'S1_{b}_{month_index:02d}')
        else:
            new_names.append(f'S2_{b}_{month_index:02d}')

    def rename_features(feature):
        renamed_feature = feature.select(band_names, new_names, retainGeometry=False)
        return renamed_feature.copyProperties(feature, ['location_id', 'DISTRICT', 'lon', 'lat', 'system:index'])

    return extracted_features.map(rename_features)

# --- 4. EXECUTION ---

def execute_gee_extraction():
    """Main execution loop for GEE extraction and export."""

    try:
        ee.Initialize(project ="moonlit-state-475112-b5")
    except Exception:
        print("Authenticating GEE...")
        ee.Authenticate()
        ee.Initialize(project ="moonlit-state-475112-b5")

    print("GEE Initialized.")

    points_collection = prepare_gee_points(INPUT_ACCURATE_DISTRICTS)

    print(f"\n--- Starting GEE Task for CROP Month {MONTH_INDEX} ({START_DATE}) ---")

    try:
        monthly_collection = process_time_step(START_DATE, END_DATE, points_collection)

        export_name = f'{TASK_NAME_PREFIX}_month_{MONTH_INDEX:02d}'

        task = ee.batch.Export.table.toDrive(
            collection=monthly_collection,
            description=export_name,
            folder=OUTPUT_DRIVE_FOLDER,
            fileNamePrefix=export_name,
            fileFormat='CSV'
        )
        task.start()
        print(f"  Task {export_name} scheduled successfully.")

    except Exception as e:
        print(f"  CRITICAL ERROR SCHEDULING TASK for {START_DATE}: {e}.")

    print("\n---------------------------------------------------------")
    print("SUCCESS: 'CROP v2 Month 08' task submitted.")
    print("---------------------------------------------------------")

# --- ACTION 1 RUN ---
execute_gee_extraction()

GEE Initialized.
Loading local CROP field centroids and preparing for GEE...

--- Starting GEE Task for CROP Month 8 (2020-08-01) ---
  Task crop_field_time_series_v2_month_08 scheduled successfully.

---------------------------------------------------------
SUCCESS: 'CROP v2 Month 08' task submitted.
---------------------------------------------------------
