This code extracts the values of the 14 VIs at identified field polygons (one crop at a time) from the Landast data (which was created by mosaicking and clipping the downloaded data locally). It then averages the VI values for that polygon identified by the ID and creates two csv files - the first with the average values of the VIs and the second with the maximum, minimum and mean values of the VIs within each crop polygon. The field point polygons (shapefiles) are different for the three crops and need to be read in accordingly along with the other input files. All the calculations are done locally. No temporary folders are created during execution of the code. The output csv files are saved inside the input folder as crop_Polygon_Landsat_Extract.csv which contains the mean VI values (eg. Rice_Polygon_Landsat_Extract.csv) and as crop_Polygon_Landsat_Extract_maxminmean.csv (eg. Rice_Polygon_Landsat_Extract_maxminmean.csv).

In [None]:
import os
import sys
import subprocess

# This gives the name of the environment directory
print("Environment name:", os.path.basename(sys.prefix))

In [None]:
# Install necessary packages, if needed

required_packages = ["zipfile", "glob", "rasterio", "geopandas", "datetime", "re", "shutil"]

for package in required_packages:
    try:
        __import__(package if package != "scikit-learn" else "sklearn")
        print(f"{package} is already installed.")
    except ImportError:
        print(f"{package} not found. Installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

print("All packages have been installed!")

In [None]:
# Import necessary packages
import tempfile
import os
import glob
import rasterio
import geopandas as gpd
import pandas as pd
import numpy as np
from rasterio.mask import mask
from tqdm import tqdm

In [2]:
# Folder setup
source_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\1 - Image Processing\Processed_Landsat_VI_TIFFs"

# Path to the folder containing the farm polygons
shapefile_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\2 - ML\Maps and Shapefiles\GIS Maps and Shapefiles\Field Polygons Final"

# Output folder to save extracted VIs
output_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\2 - ML\Raw Data\Landsat_VIs"

# Enter crop name
crop_name = "Rice"

In [None]:
# Load polygon shapefile
shapefile_path = os.path.join(shapefile_folder, f"{crop_name}.shp")

gdf = gpd.read_file(shapefile_path)
gdf = gdf[~gdf.geometry.is_empty].copy()
gdf = gdf[gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])].copy()

In [None]:
# List of VIs
vi_list = ["ARI", "CIG", "DVI", "EVI", "GNDVI", "MSAVI", "NDVI", "NDWI", "PRI", "RVI", "SAVI", "TVI", "VARI", "WDRVI"]

In [None]:
# ---------------- Read first raster for CRS reference ----------------
example_date_folder = sorted(os.listdir(source_folder))[0]
example_raster_path = os.path.join(source_folder, example_date_folder, "Gr2Indices", f"Landsat_{example_date_folder}_{vi_list[0]}.TIF")
with rasterio.open(example_raster_path) as src:
    raster_crs = src.crs

# Reproject shapefile to match raster CRS
if gdf.crs != raster_crs:
    gdf = gdf.to_crs(raster_crs)

# Reproject for centroid calculation in decimal degrees (EPSG:4326)
gdf_centroid = gdf.to_crs(epsg=4326)

In [None]:

# Prepare lists to collect results
all_mean_records = []
all_minmaxmean_records = []

# Loop through date folders
for date_folder in tqdm(sorted(os.listdir(source_folder))):
    date_path = os.path.join(source_folder, date_folder, "Gr2Indices")
    if not os.path.isdir(date_path):
        continue

    date_str = date_folder
    date_records_mean = []
    date_records_minmaxmean = []

    # Loop through each polygon in shapefile
    for idx, row in gdf.iterrows():
        poly_geom = [row.geometry]  # Mask expects a list of geometries
        centroid_row = gdf_centroid.iloc[idx]

        record_mean = {
            "ID": row.get("Name", row.get("CodeGPS", f"Polygon_{idx}")),
            "latitude": centroid_row.geometry.centroid.y,
            "longitude": centroid_row.geometry.centroid.x,
            "Date": pd.to_datetime(date_str, format='%Y%m%d')
        }
        record_minmaxmean = record_mean.copy()

        polygon_has_data = False  # Flag to check if polygon overlaps raster

        # Extract values for each VI
        for vi in vi_list:
            vi_file = os.path.join(date_path, f"Landsat_{date_str}_{vi}.TIF")
            if not os.path.exists(vi_file):
                record_mean[vi] = np.nan
                record_minmaxmean[f"Mean{vi}"] = np.nan
                record_minmaxmean[f"Min{vi}"] = np.nan
                record_minmaxmean[f"Max{vi}"] = np.nan
                continue

            with rasterio.open(vi_file) as src:
                try:
                    out_image, out_transform = mask(src, poly_geom, crop=True)
                    out_image = out_image.astype(float)
                    out_image[out_image == src.nodata] = np.nan

                    # Check if polygon has any valid data
                    if np.isnan(out_image).all():
                        continue
                    else:
                        polygon_has_data = True

                except ValueError:  # Polygon outside raster
                    continue

            mean_val = np.nanmean(out_image)
            min_val = np.nanmin(out_image)
            max_val = np.nanmax(out_image)

            record_mean[vi] = mean_val
            record_minmaxmean[f"Mean{vi}"] = mean_val
            record_minmaxmean[f"Min{vi}"] = min_val
            record_minmaxmean[f"Max{vi}"] = max_val

        if polygon_has_data:  # Only append if polygon intersects raster
            date_records_mean.append(record_mean)
            date_records_minmaxmean.append(record_minmaxmean)

    all_mean_records.extend(date_records_mean)
    all_minmaxmean_records.extend(date_records_minmaxmean)

In [None]:
# Create DataFrames
df_mean = pd.DataFrame(all_mean_records)
df_minmaxmean = pd.DataFrame(all_minmaxmean_records)

# Add Crop and Source columns
df_mean['Crop'] = crop_name
df_mean['Source'] = 'Landsat'
df_minmaxmean['Crop'] = crop_name
df_minmaxmean['Source'] = 'Landsat'

In [None]:
# Save CSVs inside input folder
output_mean_csv = os.path.join(output_folder, f"{crop_name}_Polygon_Landsat_Extract.csv")
output_minmaxmean_csv = os.path.join(output_folder, f"{crop_name}_Polygon_Landsat_Extract_MaxMinMean.csv")

df_mean.to_csv(output_mean_csv, index=False)
df_minmaxmean.to_csv(output_minmaxmean_csv, index=False)

print("\nExtraction completed successfully.")
print(f"Mean values CSV: {os.path.abspath(output_mean_csv)}")
print(f"Min-Max-Mean values CSV: {os.path.abspath(output_minmaxmean_csv)}")