In [25]:
import ee
import geemap
import pandas as pd
import numpy as np
import plotly.express as px
import re

ee.Initialize()
Map = geemap.Map()

In [26]:
# Load in location coordinates
plot_df = pd.read_csv(r"C:\Users\AdamMorgan\OneDrive\UCL\Research Project\Data\location_coordinates.csv")

In [27]:
datasets = {"nitrogen_mean": ["nitrogen_0-5cm_mean", "nitrogen_5-15cm_mean", "nitrogen_15-30cm_mean", 
                      "nitrogen_30-60cm_mean", "nitrogen_60-100cm_mean", "nitrogen_100-200cm_mean"],
    
            "soc_mean" : ["soc_0-5cm_mean", "soc_5-15cm_mean", "soc_15-30cm_mean",
                          "soc_30-60cm_mean", "soc_60-100cm_mean", "soc_100-200cm_mean"],

            "bdod_mean": ["bdod_0-5cm_mean", "bdod_5-15cm_mean", "bdod_15-30cm_mean", 
                          "bdod_30-60cm_mean", "bdod_60-100cm_mean", "bdod_100-200cm_mean"],

            "cec_mean" : ["cec_0-5cm_mean", "cec_5-15cm_mean", "cec_15-30cm_mean", 
                          "cec_30-60cm_mean", "cec_60-100cm_mean", "cec_100-200cm_mean"],    

            "cfvo_mean" : ["cfvo_0-5cm_mean", "cfvo_5-15cm_mean", "cfvo_15-30cm_mean",
                           "cfvo_30-60cm_mean", "cfvo_60-100cm_mean", "cfvo_100-200cm_mean"],

            "clay_mean" : ["clay_0-5cm_mean", "clay_5-15cm_mean", "clay_15-30cm_mean",
                           "clay_30-60cm_mean", "clay_60-100cm_mean", "clay_100-200cm_mean"],

            "ocd_mean" : ["ocd_0-5cm_mean", "ocd_5-15cm_mean", "ocd_15-30cm_mean",
                          "ocd_30-60cm_mean", "ocd_60-100cm_mean", "ocd_100-200cm_mean"],
    
            "phh2o_mean" : ["phh2o_0-5cm_mean", "phh2o_5-15cm_mean", "phh2o_15-30cm_mean",
                            "phh2o_30-60cm_mean", "phh2o_60-100cm_mean", "phh2o_100-200cm_mean"],

            "sand_mean" : ["sand_0-5cm_mean", "sand_5-15cm_mean", "sand_15-30cm_mean",
                           "sand_30-60cm_mean", "sand_60-100cm_mean", "sand_100-200cm_mean"],

            "silt_mean" : ["silt_0-5cm_mean", "silt_5-15cm_mean", "silt_15-30cm_mean",
                           "silt_30-60cm_mean", "silt_60-100cm_mean", "silt_100-200cm_mean"],

            "soc_mean" : ["soc_0-5cm_mean", "soc_5-15cm_mean", "soc_15-30cm_mean",
                          "soc_30-60cm_mean", "soc_60-100cm_mean", "soc_100-200cm_mean"]
}

### Strange issue

#### ocs_mean only contains the band 'ocs_0-30cm_mean' so is not included

In [28]:
# Convert dataframe into Feature Collection for GEE
features=[]
for index, row in plot_df.iterrows():
    # Construct the geometry from dataframe
    poi_geometry = ee.Geometry.Point([row['X'], row['Y']])
    # Construct the attributes (properties) for each point 
    poi_properties = dict(row)
    # Construct feature combining geometry and properties
    poi_feature = ee.Feature(poi_geometry, poi_properties)
    features.append(poi_feature)

# Final Feature collection assembly
ee_fc = ee.FeatureCollection(features)

# Combine all images into one image
image_collection = ee.ImageCollection([])
for dataset, bands in datasets.items():
    image = ee.Image(f"projects/soilgrids-isric/{dataset}").select(bands)
    image_collection = image_collection.merge(ee.ImageCollection([image]))

all_bands_image = image_collection.toBands()

# Function to extract raster values
def rasterExtraction(image):
    feature = image.sampleRegions(
        collection = ee_fc,  # feature collection here
        scale = 10  # Cell size of raster
    )
    return feature

# Extract raster values using the defined function
results = all_bands_image.sampleRegions(collection=ee_fc, scale=10)

# Get the information from the feature collection
sample_result = results.getInfo()

# Create DataFrame
df = pd.json_normalize(sample_result['features'])

In [29]:
# Remove 'properties.' prefix from column names
df.columns = df.columns.str.replace('properties.', '')

# Update column names
df.columns = df.columns.map(lambda x: re.sub('^[^a-zA-Z]*', '', x))

filtered_df = df.drop(columns=['X', 'Y'])
df = filtered_df

# Specify the columns to be moved to the front
cols_to_move = ['Location']

# Get the remaining column names and combine with the list above
new_order = cols_to_move + [c for c in df.columns if c not in cols_to_move]

# Reorder the columns
df = df.reindex(columns=new_order)

# Specify the columns to be removed
cols_to_remove = ['type', 'geometry', 'id']

# Remove the specified columns
df = df.drop(columns=cols_to_remove)

df.head(5)

Unnamed: 0,Location,nitrogen_0-5cm_mean,nitrogen_100-200cm_mean,nitrogen_15-30cm_mean,nitrogen_30-60cm_mean,nitrogen_5-15cm_mean,nitrogen_60-100cm_mean,soc_0-5cm_mean,soc_100-200cm_mean,soc_15-30cm_mean,...,sand_15-30cm_mean,sand_30-60cm_mean,sand_5-15cm_mean,sand_60-100cm_mean,silt_0-5cm_mean,silt_100-200cm_mean,silt_15-30cm_mean,silt_30-60cm_mean,silt_5-15cm_mean,silt_60-100cm_mean
0,AGUGLIANO,2654,849,1640,1642,1230,973,459,71,199,...,103,115,89,121,470,502,477,491,478,487
1,ARGELATO,4837,1095,1565,1368,1600,1133,396,56,148,...,69,85,61,92,499,517,527,533,484,524
2,CAMMARATA,2502,912,1449,1172,1728,871,447,51,164,...,294,312,234,314,450,396,398,403,427,404
3,FIORENZUOLA,4320,1063,1423,1045,1734,1191,575,72,129,...,178,185,152,185,482,449,456,471,501,479
4,FOGGIA,2978,417,1335,957,1373,660,307,37,121,...,185,174,207,169,439,404,406,382,438,383


In [30]:
df.to_csv('soil_data.csv', index=False)