This code extracts the values of the 14 VIs at identified field polygons (one crop at a time) from the drone data (VI tiff files). It then averages the VI values for that polygon identified by the ID and creates two csv files - the first with the average values of the VIs and the second with the maximum, minimum and mean values of the VIs within each crop polygon. The field point polygons (shapefiles) are different for the three crops and need to be read in accordingly along with the other input files. All the calculations are done locally. No temporary folders are created during execution of the code. The output csv files are saved inside the input folder as crop_Polygon_Drone_Extract.csv which contains the mean VI values (eg. Rice_Polygon_Drone_Extract.csv) and as crop_Polygon_Drone_Extract_maxminmean.csv (eg. Rice_Drone_Landsat_Extract_maxminmean.csv).

In [1]:
import os
import sys
import subprocess

# This gives the name of the environment directory
print("Environment name:", os.path.basename(sys.prefix))

Environment name: A4I064-ML


In [2]:
# Install necessary packages, if needed

required_packages = ["zipfile", "glob", "rasterio", "geopandas", "datetime", "re", "shutil"]

for package in required_packages:
    try:
        __import__(package if package != "scikit-learn" else "sklearn")
        print(f"{package} is already installed.")
    except ImportError:
        print(f"{package} not found. Installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

print("All packages have been installed!")

zipfile is already installed.
glob is already installed.
rasterio is already installed.
geopandas is already installed.
datetime is already installed.
re is already installed.
shutil is already installed.
All packages have been installed!


In [3]:
# Import necessary packages
import tempfile
import os
import rasterio
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterio.mask import mask
from tqdm import tqdm

In [9]:
# Folder setup
source_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\1 - Image Processing\Processed_Drone_VI_TIFFs"

# Enter crop name
crop_name = "Dragonfruit"

# Path to the folder containing the farm polygons
shapefile_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\1 - Image Processing\Raw Data\GIS Maps and Shapefiles\Field Polygons Final"

# Output folder to save extracted VIs
output_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\2 - ML\Raw Data\Drone_VIs"

# Path to cycle dates CSV
cycle_dates_csv = os.path.join(source_folder, "Drone_cycle_dates.csv")

In [10]:
# Load polygon shapefile
shapefile_path = os.path.join(shapefile_folder, f"{crop_name}.shp")

gdf = gpd.read_file(shapefile_path)
gdf = gdf[~gdf.geometry.is_empty].copy()
gdf = gdf[gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])].copy()

# Reproject shapefile to EPSG:4326 for centroid coordinates
gdf_centroid = gdf.to_crs(epsg=4326)

# Load cycle date info
cycle_dates_df = pd.read_csv(cycle_dates_csv)
cycle_dates_df.columns = cycle_dates_df.columns.str.strip()
cycle_dates_df["Province"] = cycle_dates_df["Province"].str.strip()
cycle_dates_df["Crop"] = cycle_dates_df["Crop"].str.strip()
cycle_dates_df["Cycle"] = cycle_dates_df["Cycle"].str.strip()

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\U8019357\\UniSQ\\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\\1 - Image Processing\\Processed_Drone_VI_TIFFs\\Drone_cycle_dates.csv'

In [None]:
# Identify all folders for the crop
all_folders = [f for f in os.listdir(source_folder) if os.path.isdir(os.path.join(source_folder, f))]
crop_folders = [f for f in all_folders if crop_name.lower() in f.lower()]

print(f"Found {len(crop_folders)} folders for crop '{crop_name}'.")
print(crop_folders)

In [None]:
# Vegetation Indices
vi_list = ["ARI", "CIG", "DVI", "EVI", "GNDVI", "MSAVI", "NDVI", "NDWI", "PRI", "RVI", "SAVI", "TVI", "VARI", "WDRVI"]

In [None]:
# Main data extraction loop

# Prepare lists to collect results
all_mean_records = []
all_minmaxmean_records = []

# Loop through crop folders
for folder_name in tqdm(crop_folders, desc="Processing folders"):
    folder_path = os.path.join(source_folder, folder_name)
    gr2_path = os.path.join(folder_path, "Gr2Indices")

    # Parse Province, Crop, Cycle from folder name
    parts = folder_name.split("_")
    if len(parts) < 3:
        print(f"Skipping folder with unexpected name format: {folder_name}")
        continue
    province, crop = parts[0], parts[1]
    if len(parts) == 4 and parts[2].lower() == "cycle":
        cycle_number = parts[3]
    else:
        cycle_number = ''.join(filter(str.isdigit, parts[2]))
    if not cycle_number:
        print(f"Skipping folder with invalid cycle info: {folder_name}")
        continue
    cycle = f"C{cycle_number}"

    # Get date from cycle_dates_df
    date_row = cycle_dates_df[
        (cycle_dates_df["Province"].str.upper() == province.upper()) &
        (cycle_dates_df["Crop"].str.lower() == crop.lower()) &
        (cycle_dates_df["Cycle"].str.upper() == cycle.upper())
    ]
    
    if date_row.empty:
        print(f"Date not found for {folder_name}.")
        continue
    date_value = date_row.iloc[0]["Date"]

    # Check Gr2Indices folder
    if not os.path.exists(gr2_path):
        print(f"Gr2Indices folder missing in {folder_name}, skipping.")
        continue

    # Read first VI raster for CRS reference
    sample_vi_file = os.path.join(gr2_path, f"DJIP4M_{folder_name}_ARI.tif")
    if not os.path.exists(sample_vi_file):
        print(f"Sample VI TIFF missing for {folder_name}, skipping.")
        continue
    with rasterio.open(sample_vi_file) as src:
        raster_crs = src.crs
        raster_bounds = src.bounds

    # Reproject polygon shapefile to match raster CRS
    if gdf.crs != raster_crs:
        gdf_raster = gdf.to_crs(raster_crs)
    else:
        gdf_raster = gdf.copy()

    # Loop through each polygon
    for idx, row in gdf_raster.iterrows():
        poly_geom = [row.geometry]
        centroid_row = gdf_centroid.iloc[idx]

        record_mean = {
            "ID": row.get("Name", row.get("CodeGPS", f"Polygon_{idx}")),
            "latitude": centroid_row.geometry.centroid.y,
            "longitude": centroid_row.geometry.centroid.x,
            "Date": date_value
        }
        record_minmaxmean = record_mean.copy()
        polygon_has_data = False

        # Extract VI values
        for vi in vi_list:
            vi_file = os.path.join(gr2_path, f"DJIP4M_{folder_name}_{vi}.tif")
            if not os.path.exists(vi_file):
                record_mean[vi] = np.nan
                record_minmaxmean[f"Mean{vi}"] = np.nan
                record_minmaxmean[f"Min{vi}"] = np.nan
                record_minmaxmean[f"Max{vi}"] = np.nan
                continue

            with rasterio.open(vi_file) as src:
                try:
                    out_image, out_transform = mask(src, poly_geom, crop=True)
                    out_image = out_image.astype(float)
                    out_image[out_image == src.nodata] = np.nan
                    if not np.isnan(out_image).all():
                        polygon_has_data = True
                except ValueError:
                    out_image = np.full((1,1,1), np.nan)

            mean_val = np.nanmean(out_image)
            min_val = np.nanmin(out_image)
            max_val = np.nanmax(out_image)

            record_mean[vi] = mean_val
            record_minmaxmean[f"Mean{vi}"] = mean_val
            record_minmaxmean[f"Min{vi}"] = min_val
            record_minmaxmean[f"Max{vi}"] = max_val

        if polygon_has_data:
            all_mean_records.append(record_mean)
            all_minmaxmean_records.append(record_minmaxmean)

In [None]:
# Create DataFrames
df_mean = pd.DataFrame(all_mean_records)
df_minmaxmean = pd.DataFrame(all_minmaxmean_records)

# Add Crop and Source columns
df_mean['Crop'] = crop_name
df_mean['Source'] = 'Drone'
df_minmaxmean['Crop'] = crop_name
df_minmaxmean['Source'] = 'Drone'

In [None]:
# Save CSVs
output_mean_csv = os.path.join(output_folder, f"{crop_name}_Polygon_Drone_Extract.csv")
output_minmaxmean_csv = os.path.join(output_folder, f"{crop_name}_Polygon_Drone_Extract_MaxMinMean.csv")

df_mean.to_csv(output_mean_csv, index=False)
df_minmaxmean.to_csv(output_minmaxmean_csv, index=False)

print("\nExtraction completed successfully.")
print(f"Mean values CSV: {os.path.abspath(output_mean_csv)}")
print(f"Min-Max-Mean values CSV: {os.path.abspath(output_minmaxmean_csv)}")