## DATA COLLECTION
## STEP 1: Install dependencies


In [None]:
!pip install geemap earthengine-api

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->ipyfilechooser>=0.6.0->geemap)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


## STEP 2: Import and authenticate

In [None]:
import ee
import pandas as pd
import time

ee.Authenticate()
ee.Initialize(project='ee-rahulds24')

##STEP 3: Define state bounding boxes (approximate lat/lon)

In [None]:
# Define the states with bounding boxes (manually chosen rough bounding boxes)
states_bbox = {
    'Jammu': ee.Geometry.Polygon([[
        [73.8, 32.3], [73.8, 34.5], [76.5, 34.5], [76.5, 32.3], [73.8, 32.3]
    ]]),
    'Himachal Pradesh': ee.Geometry.Polygon([[
        [76.5, 30.4], [76.5, 33.2], [79.0, 33.2], [79.0, 30.4], [76.5, 30.4]
    ]]),
    'Punjab': ee.Geometry.Polygon([[
        [73.8, 29.5], [73.8, 32.5], [76.9, 32.5], [76.9, 29.5], [73.8, 29.5]
    ]]),
    'Haryana': ee.Geometry.Polygon([[
        [74.5, 27.4], [74.5, 30.5], [77.5, 30.5], [77.5, 27.4], [74.5, 27.4]
    ]]),
    'West Bengal': ee.Geometry.Polygon([[
        [85.8, 21.5], [85.8, 27.3], [89.9, 27.3], [89.9, 21.5], [85.8, 21.5]
    ]]),
    'Bihar': ee.Geometry.Polygon([[
        [83.0, 24.2], [83.0, 27.5], [88.0, 27.5], [88.0, 24.2], [83.0, 24.2]
    ]]),
    'Jharkhand': ee.Geometry.Polygon([[
        [83.5, 22.0], [83.5, 25.5], [87.5, 25.5], [87.5, 22.0], [83.5, 22.0]
    ]]),
    'Gujarat': ee.Geometry.Polygon([[
        [68.0, 20.0], [68.0, 24.7], [74.5, 24.7], [74.5, 20.0], [68.0, 20.0]
    ]]),
    'Madhya Pradesh': ee.Geometry.Polygon([[
        [74.0, 21.0], [74.0, 26.5], [82.0, 26.5], [82.0, 21.0], [74.0, 21.0]
    ]]),
    'Rajasthan': ee.Geometry.Polygon([[
        [69.3, 23.3], [69.3, 30.0], [78.2, 30.0], [78.2, 23.3], [69.3, 23.3]
    ]]),
    'Kerala': ee.Geometry.Polygon([[
        [74.0, 8.0], [74.0, 12.9], [77.5, 12.9], [77.5, 8.0], [74.0, 8.0]
    ]]),
    'Tamil Nadu': ee.Geometry.Polygon([[
        [76.0, 8.0], [76.0, 13.5], [80.5, 13.5], [80.5, 8.0], [76.0, 8.0]
    ]]),
    'Uttar Pradesh': ee.Geometry.Polygon([
    [[77.0, 23.9], [77.0, 30.4], [84.5, 30.4], [84.5, 23.9], [77.0, 23.9]]
    ]),
    'Assam': ee.Geometry.Polygon([
    [[89.7, 24.0], [89.7, 27.8], [96.0, 27.8], [96.0, 24.0], [89.7, 24.0]]
    ]),
    'Odisha': ee.Geometry.Polygon([
    [[81.5, 18.0], [81.5, 22.9], [87.4, 22.9], [87.4, 18.0], [81.5, 18.0]]
    ]),
    'Andhra Pradesh': ee.Geometry.Polygon([
    [[77.0, 12.5], [77.0, 19.5], [84.0, 19.5], [84.0, 12.5], [77.0, 12.5]]
    ])
}

### Define seasons

In [None]:
seasons = {
    'Spring': ('2022-03-01', '2022-05-31'),
    'Summer': ('2022-06-01', '2022-08-31'),
    'Autumn': ('2022-09-01', '2022-11-30'),
    'Winter': ('2022-12-01', '2023-02-28')
}

## Define points per state and also scale. scale is 250 because MODIS works at 250m or 500m resolution.
##If you set scale=30 (too fine for MODIS), it may return empty data.



In [None]:
points_per_state = 100
scale = 250

### ***1. Collect points wise data from each states and also seasonal wise. and also make the NDVI, Temprature, PH( Convert to 0–14 scale) values in a proper understandable scale. Rainfall is the sum of the season.(not average of daily rainfall/it's sum of one season like autumn or summer)***

## ***2. Atlast Combine all data in an one csv file***

In [None]:
all_data = []

for state, region in states_bbox.items():
    for season, (start, end) in seasons.items():
        print(f"Collecting data for {state} ({season})...")

        try:
            # Generate random points
            points = ee.FeatureCollection.randomPoints(region=region, points=points_per_state, seed=42)

            # Seasonal NDVI
            ndvi = ee.ImageCollection('MODIS/061/MOD13Q1') \
                .filterDate(start, end) \
                .select('NDVI') \
                .mean() \
                .divide(10000)  # 0–1 range

            # Seasonal Rainfall
            seasonal_rainfall = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
                .filterDate(start, end) \
                .sum()

            # Seasonal Temperature (Celsius)
            temperature = ee.ImageCollection('MODIS/061/MOD11A1') \
                .filterDate(start, end) \
                .select('LST_Day_1km') \
                .mean() \
                .multiply(0.02).subtract(273.15)

            # Static soil features
            soil_carbon = ee.Image('projects/soilgrids-isric/ocd_mean') \
                .select('ocd_0-5cm_mean')

            soil_ph = ee.Image('projects/soilgrids-isric/phh2o_mean') \
                .select('phh2o_0-5cm_mean') \
                .multiply(0.1)

            soil_texture = ee.Image('OpenLandMap/SOL/SOL_TEXTURE-CLASS_USDA-TT_M/v02') \
                .select('b0')

            # Combine all bands
            combined = ndvi.rename('NDVI') \
                .addBands(seasonal_rainfall.rename('Seasonal_Rainfall')) \
                .addBands(temperature.rename('Temperature')) \
                .addBands(soil_ph.rename('pH_0_5')) \
                .addBands(soil_carbon.rename('Carbon')) \
                .addBands(soil_texture.rename('Texture'))

            # Annual Rainfall
            year = pd.to_datetime(start).year
            annual_rainfall = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
                .filterDate(f'{year}-01-01', f'{year}-12-31') \
                .filterBounds(region) \
                .sum() \
                .rename('Annual_Rainfall')

            combined = combined.addBands(annual_rainfall)

            # Sample at random points
            sampled = combined.sampleRegions(
                collection=points,
                scale=scale,
                geometries=True
            )

            sample_size = sampled.size().getInfo()
            print(f"Sample size: {sample_size}")

            if sample_size == 0:
                print(f"⚠️ No data returned for {state} ({season})")
                continue

            features = sampled.getInfo()['features']
            df = pd.json_normalize([
                {
                    **f['properties'],
                    'Latitude': f['geometry']['coordinates'][1],
                    'Longitude': f['geometry']['coordinates'][0],
                    'geometry': f['geometry'],
                    'State': state,
                    'Season': season,
                    'Year': year,
                    'Sample_ID': str(uuid.uuid4())
                }
                for f in features
            ])

            all_data.append(df)

        except Exception as e:
            print(f"❌ Error for {state} {season}: {e}")

# Combine all results
if all_data:
    final_df = pd.concat(all_data, ignore_index=True)
    print("✅ Final DataFrame shape:", final_df.shape)

    # Optional column ordering
    columns_order = ['Sample_ID', 'State', 'Season', 'Year', 'Latitude', 'Longitude',
                     'NDVI', 'Seasonal_Rainfall', 'Annual_Rainfall', 'Temperature',
                     'pH_0_5', 'Carbon', 'Texture', 'geometry']
    final_df = final_df[[col for col in columns_order if col in final_df.columns]]

    # Save
    final_df.to_csv('smart_crop_light_data_2022.csv', index=False)
else:
    print("❌ No data collected.")


Collecting data for Jammu (Spring)...
Sample size: 98
Collecting data for Jammu (Summer)...
Sample size: 98
Collecting data for Jammu (Autumn)...
Sample size: 98
Collecting data for Jammu (Winter)...
Sample size: 98
Collecting data for Himachal Pradesh (Spring)...
Sample size: 92
Collecting data for Himachal Pradesh (Summer)...
Sample size: 92
Collecting data for Himachal Pradesh (Autumn)...
Sample size: 92
Collecting data for Himachal Pradesh (Winter)...
Sample size: 92
Collecting data for Punjab (Spring)...
Sample size: 96
Collecting data for Punjab (Summer)...
Sample size: 96
Collecting data for Punjab (Autumn)...
Sample size: 96
Collecting data for Punjab (Winter)...
Sample size: 96
Collecting data for Haryana (Spring)...
Sample size: 98
Collecting data for Haryana (Summer)...
Sample size: 98
Collecting data for Haryana (Autumn)...
Sample size: 98
Collecting data for Haryana (Winter)...
Sample size: 98
Collecting data for West Bengal (Spring)...
Sample size: 89
Collecting data for 

## Combine all csv files from 2019 to 2023

In [None]:
from google.colab import drive
from google.colab import files
import pandas as pd
import glob
import os

drive.mount('/content/drive')

folder_path = '/content/drive/MyDrive/final_project_AGP'
print("Files found:", len(glob.glob(os.path.join(folder_path, '*.csv'))))
print("Files:", folder_path)


csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

df_all = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)

output_filename = 'smart_crop_lat_long.csv'
df_all.to_csv(output_filename, index=False)

print(f"\nSuccessfully combined {len(csv_files)} CSV files into {output_filename}")

# Provide a download link for the generated CSV
files.download(output_filename)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Files found: 3
Files: /content/drive/MyDrive/final_project_AGP

Successfully combined 3 CSV files into smart_crop_lat_long.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Load the final data(2019-2023) and create the crop column as target for prediction.

In [None]:
import pandas as pd

# Load merged data
df = pd.read_csv('/content/drive/MyDrive/Advance-geospatial-programming_capstone_2025/crop_suitability_multi_label_many_crops.csv')

In [None]:
# Define thresholds (relaxed and approximate ranges)
def is_suitable(row, ph_range, temp_range, rain_range, carbon_min=0):
    return (
        ph_range[0] <= row['pH_0_5'] <= ph_range[1] and
        temp_range[0] <= row['Temperature'] <= temp_range[1] and
        rain_range[0] <= row['Annual_Rainfall'] <= rain_range[1] and
        row['Carbon'] >= carbon_min
    )

# --- FIELD CROPS ---
df['Paddy'] = df.apply(lambda r: is_suitable(r, (5.5, 7.0), (20, 35), (1200, 3000), 1), axis=1)
df['Wheat'] = df.apply(lambda r: is_suitable(r, (6.0, 7.5), (15, 28), (300, 1200), 1), axis=1) ## Loamy soil PH-value(6.0-7.5)data source google.

# --- FRUITS ---
df['Apple'] = df.apply(lambda r: is_suitable(r, (5.5, 6.5), (5, 21), (1000, 1600), 300), axis=1)

# --- MEDICINAL / AROMATIC CROPS ---
df['Tea'] = df.apply(lambda r: is_suitable(r, (4.5, 6.3), (14, 26), (1200, 2500), 1), axis=1)

# --- PLANTATION & FLOWER CROPS ---
df['Coconut'] = df.apply(lambda r: is_suitable(r, (5.2, 7.2), (20, 38), (1200, 2500), 1), axis=1)


# Save to CSV
df.to_csv("/content/drive/MyDrive/Advance-geospatial-programming_capstone_2025/C.S_multi_lable_5_class.csv", index=False)
