<a href="https://colab.research.google.com/github/HEM2058/sentinelhub_remote_sensing/blob/main/SOC_estimation_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install sentinelhub

Collecting sentinelhub
  Downloading sentinelhub-3.10.1-py3-none-any.whl (245 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.4/245.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aenum>=2.1.4 (from sentinelhub)
  Downloading aenum-3.1.15-py3-none-any.whl (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.6/137.6 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json (from sentinelhub)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting tomli-w (from sentinelhub)
  Downloading tomli_w-1.0.0-py3-none-any.whl (6.0 kB)
Collecting utm (from sentinelhub)
  Downloading utm-0.7.0.tar.gz (8.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->sentinelhub)
  Downloading marshmallow-3.21.1-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m4.0 MB/s[0m eta [36m0:

In [9]:
import pandas as pd
from sentinelhub import SentinelHubRequest, DataCollection, MimeType, CRS, SHConfig, BBox

# Credentials
config = SHConfig()
config.sh_client_id = '80cb4233-97cd-4ae8-aa82-787cc091082f'
config.sh_client_secret = 'Oh48OTexSh32T4InF8fBje5BGvnAYH6i'

# Evalscript
evalscript = """
//VERSION=3

function setup() {
  return {
    input: ["B04", "B08"], // Red and NIR bands
    output: { bands: 1,
    sampleType: "FLOAT32"  } // NDVI will be output as a single band
  };
}

function evaluatePixel(sample) {
  // Calculate NDVI
  var ndvi = (sample.B08 - sample.B04) / (sample.B08 + sample.B04);

  // Return NDVI value
  return [ndvi];
}
"""

# List of coordinates with SOC stock values
coordinates = [
    {"Latitude": 28.27187, "Longitude": 83.88414, "soc_stock": 31.96053695678711},
    {"Latitude": 28.29307, "Longitude": 83.88256, "soc_stock": 11.860776901245117},
    {"Latitude": 28.30454, "Longitude": 83.89242, "soc_stock": 91.91413879394531},
    {"Latitude": 28.30732, "Longitude": 83.9224, "soc_stock": 31.96053695678711},
    {"Latitude": 28.30662, "Longitude": 83.97526, "soc_stock": 11.860776901245117},
    {"Latitude": 28.26562, "Longitude": 83.94449, "soc_stock": 32.85845184326172},
    {"Latitude": 28.27222, "Longitude": 83.94765, "soc_stock": 29.808000564575195}
]

# Create an empty list to store the results
results = []

# Loop over each coordinate
for coord in coordinates:
    # Define bounding box around the point of interest
    bbox = BBox(
        bbox=[coord["Longitude"] - 0.0001, coord["Latitude"] - 0.0001,
              coord["Longitude"] + 0.0001, coord["Latitude"] + 0.0001],
        crs=CRS.WGS84
    )

    # Create SentinelHub request with TIFF format and FLOAT32 sample type
    request = SentinelHubRequest(
        evalscript=evalscript,
        input_data=[
            SentinelHubRequest.input_data(
                data_collection=DataCollection.SENTINEL2_L2A,
                time_interval=('2023-11-24', '2024-01-24'),
            ),
        ],
        responses=[
            SentinelHubRequest.output_response('default', MimeType.TIFF),  # Use TIFF format
        ],
        bbox=bbox,
        size=[1, 1],  # Set size to 1x1 pixel to get only one pixel value
        config=config,
    )

    # Get data from the request
    response = request.get_data()

    # Append the results to the list
    results.append({
        'latitude': coord['Latitude'],
        'longitude': coord['Longitude'],
        'soc_stock': coord['soc_stock'],
        'ndvi': response[0][0][0] if response else None  # Extract NDVI value from response
    })

# Convert the results list to a DataFrame
df = pd.DataFrame(results)

# Save the DataFrame to an Excel file
df.to_excel('output.xlsx', index=False)


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Step 1: Load the data
df = pd.read_excel('output.xlsx')

# Step 2: Prepare the data
X = df[['ndvi']]  # Input features (NDVI)
y = df['soc_stock']  # Target variable (SOC stock)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define the model
model = LinearRegression()

# Step 5: Train the model
model.fit(X_train, y_train)

# Step 6: Evaluate the model
train_mse = mean_squared_error(y_train, model.predict(X_train))
test_mse = mean_squared_error(y_test, model.predict(X_test))
print(f'Training MSE: {train_mse}')
print(f'Testing MSE: {test_mse}')

# Step 7: Optionally, fine-tune the model parameters and retrain if necessary

# Step 8: Save the trained model
import joblib
joblib.dump(model, 'soc_prediction_model.pkl')


Training MSE: 723.6782060894732
Testing MSE: 448.9356300353429


['soc_prediction_model.pkl']

In [11]:
import joblib

# Load the trained model
model = joblib.load('soc_prediction_model.pkl')

# NDVI value for prediction
ndvi_value = 0.2

# Predict SOC stock value
soc_predicted = model.predict([[ndvi_value]])

print(f"Predicted SOC stock value for NDVI {ndvi_value}: {soc_predicted[0]}")


Predicted SOC stock value for NDVI 0.2: 49.61433875626267


