In [None]:
#VV
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load your preprocessed SAR image data
data = pd.read_csv('/content/Soilmoisture gt data1.csv')

# Select relevant columns excluding Sigma0_VH_db (31.01.2024)
X = data[['Elevation (in m above MSL)', 'Sigma0_VV_db (31.01.2024)', 'LULC']]
y = data['Moisture Reading']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_train[['Sigma0_VV_db (31.01.2024)']] = imputer.fit_transform(X_train[['Sigma0_VV_db (31.01.2024)']])
X_test[['Sigma0_VV_db (31.01.2024)']] = imputer.transform(X_test[['Sigma0_VV_db (31.01.2024)']])

# Convert categorical columns to one-hot encoding
encoder = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), ['LULC'])
    ],
    remainder='passthrough'
)
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

# Create a Decision Tree regression model
dt_model = DecisionTreeRegressor(random_state=42)

# Drop rows with NaN values in y_train
nan_indices = y_train.index[y_train.isna()]
X_train_encoded = X_train_encoded[~y_train.isna()]
y_train = y_train.dropna()

# Train the model
dt_model.fit(X_train_encoded, y_train)

# Drop rows with NaN values in y_test
nan_indices = y_test.index[y_test.isna()]
X_test_encoded = X_test_encoded[~y_test.isna()]
y_test = y_test.dropna()

# Make predictions
y_pred = dt_model.predict(X_test_encoded)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

from scipy.stats import pearsonr

# Calculate the coefficient of correlation
correlation_coefficient, _ = pearsonr(y_pred, y_test)

print("Coefficient of correlation:", correlation_coefficient)


Mean Squared Error: 9.723427450980395
R-squared: 0.8910621753837689
Coefficient of correlation: 0.9621517467298256


In [None]:
import pickle

# Define the code as a string
code = '''
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load your preprocessed SAR image data
data = pd.read_csv('/content/Soilmoisture gt data1.csv')

# Select relevant columns excluding Sigma0_VH_db (31.01.2024)
X = data[['Elevation (in m above MSL)', 'Sigma0_VV_db (31.01.2024)', 'LULC']]
y = data['Moisture Reading']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_train[['Sigma0_VV_db (31.01.2024)']] = imputer.fit_transform(X_train[['Sigma0_VV_db (31.01.2024)']])
X_test[['Sigma0_VV_db (31.01.2024)']] = imputer.transform(X_test[['Sigma0_VV_db (31.01.2024)']])

# Convert categorical columns to one-hot encoding
encoder = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), ['LULC'])
    ],
    remainder='passthrough'
)
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

# Create a Decision Tree regression model
dt_model = DecisionTreeRegressor(random_state=42)

# Drop rows with NaN values in y_train
nan_indices = y_train.index[y_train.isna()]
X_train_encoded = X_train_encoded[~y_train.isna()]
y_train = y_train.dropna()

# Train the model
dt_model.fit(X_train_encoded, y_train)

# Drop rows with NaN values in y_test
nan_indices = y_test.index[y_test.isna()]
X_test_encoded = X_test_encoded[~y_test.isna()]
y_test = y_test.dropna()

# Make predictions
y_pred = dt_model.predict(X_test_encoded)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

from scipy.stats import pearsonr

# Calculate the coefficient of correlation
correlation_coefficient, _ = pearsonr(y_pred, y_test)

print("Coefficient of correlation:", correlation_coefficient)

# Export the decision tree as text
feature_names_out = encoder.get_feature_names_out()
feature_names_list = list(feature_names_out)
tree_rules = export_text(dt_model, feature_names=feature_names_list)
print(tree_rules)
'''

# Save the code to a pickle file
with open('projectcode.pkl', 'wb') as f:
    pickle.dump(code, f)


In [None]:
pip install rasterio

Collecting rasterio
  Downloading rasterio-1.3.10-cp310-cp310-manylinux2014_x86_64.whl (21.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.5/21.5 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Installing collected packages: snuggs, affine, rasterio
Successfully installed affine-2.4.0 rasterio-1.3.10 snuggs-1.4.7


In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load your preprocessed SAR image data
data = pd.read_csv('/content/Soilmoisture gt data1.csv')

# Select relevant columns excluding Sigma0_VH_db (31.01.2024)
X = data[['Elevation (in m above MSL)', 'Sigma0_VV_db (31.01.2024)', 'LULC']]
y = data['Moisture Reading']

# Handle missing values in features
imputer = SimpleImputer(strategy='mean')
X[['Sigma0_VV_db (31.01.2024)']] = imputer.fit_transform(X[['Sigma0_VV_db (31.01.2024)']])

# Handle missing values in target variable y
y_imputer = SimpleImputer(strategy='mean')
y = y_imputer.fit_transform(y.values.reshape(-1, 1)).flatten()

# Convert categorical columns to one-hot encoding
encoder = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), ['LULC'])
    ],
    remainder='passthrough'
)
X_encoded = encoder.fit_transform(X)

# Create a Decision Tree regression model
dt_model = DecisionTreeRegressor(random_state=42)

# Train the model
dt_model.fit(X_encoded, y)

# Load new data from CSV
new_data = pd.read_csv('/content/12moisturereading.csv')

# Handle missing values in new data
new_data[['Sigma0_VV_db (31.01.2024)']] = imputer.transform(new_data[['Sigma0_VV_db (31.01.2024)']])
new_data_encoded = encoder.transform(new_data)

# Make prediction
y_pred_new = dt_model.predict(new_data_encoded)

# Print or use the prediction
print("Predicted moisture readings for new data:")
print(y_pred_new)

Predicted moisture readings for new data:
[11.925      16.66666667  9.25       15.93333333 11.925      37.23333333
 19.26666667 18.93333333 16.06666667 20.03333333 16.66666667 16.66666667
 19.26666667 34.075       4.73333333  6.4         6.4        11.925
  6.4         6.4         6.4         6.4         6.4         6.4
 19.26666667 34.075      39.1        17.81481928  9.25       14.15
 37.23333333 39.1        15.93333333 13.14       13.14       15.93333333
 16.6        16.6        16.6         9.975       9.975      16.6
  9.975      17.81481928 19.2         9.25        4.73333333  4.73333333
 11.925      11.925      19.2        19.2        37.23333333 18.93333333
 20.03333333 16.06666667 19.26666667 19.26666667 19.26666667 18.93333333
 16.06666667 18.93333333 18.93333333 18.93333333 20.03333333 17.81481928
 34.075      34.075      39.1        19.1        19.1        22.4
 22.4        16.66666667 16.66666667 16.66666667 19.1        11.925
 17.81481928 19.1         9.25       22.4     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['Sigma0_VV_db (31.01.2024)']] = imputer.fit_transform(X[['Sigma0_VV_db (31.01.2024)']])


In [None]:
# Create a DataFrame with the predicted moisture readings
predictions_df = pd.DataFrame(y_pred_new, columns=['Predicted Moisture Reading'])

# Concatenate the predictions with the new_data DataFrame
predicted_data = pd.concat([new_data, predictions_df], axis=1)

# Save the predicted data to a CSV file
predicted_data.to_csv('/content/predicted_moisture_readings.csv', index=False)


In [None]:
# Load new data from CSV
new_data = pd.read_csv('/content/12moisturereading.csv')

# Handle missing values in new data
new_data[['Sigma0_VV_db (31.01.2024)']] = imputer.transform(new_data[['Sigma0_VV_db (31.01.2024)']])
new_data_encoded = encoder.transform(new_data)

# Predict which rows are used
is_used = dt_model.predict(new_data_encoded) > 0

# Add a new column to new_data indicating if the row was used
new_data['Used for Prediction'] = is_used

# Make prediction
y_pred_new = dt_model.predict(new_data_encoded)

# Print or use the prediction
print("Predicted moisture readings for new data:")
print(y_pred_new)

# Save new_data with the added column
new_data.to_csv('/content/new_data_with_prediction_indicator.csv', index=False)


Predicted moisture readings for new data:
[11.925      16.66666667  9.25       15.93333333 11.925      37.23333333
 19.26666667 18.93333333 16.06666667 20.03333333 16.66666667 16.66666667
 19.26666667 34.075       4.73333333  6.4         6.4        11.925
  6.4         6.4         6.4         6.4         6.4         6.4
 19.26666667 34.075      39.1        17.81481928  9.25       14.15
 37.23333333 39.1        15.93333333 13.14       13.14       15.93333333
 16.6        16.6        16.6         9.975       9.975      16.6
  9.975      17.81481928 19.2         9.25        4.73333333  4.73333333
 11.925      11.925      19.2        19.2        37.23333333 18.93333333
 20.03333333 16.06666667 19.26666667 19.26666667 19.26666667 18.93333333
 16.06666667 18.93333333 18.93333333 18.93333333 20.03333333 17.81481928
 34.075      34.075      39.1        19.1        19.1        22.4
 22.4        16.66666667 16.66666667 16.66666667 19.1        11.925
 17.81481928 19.1         9.25       22.4     

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load your preprocessed SAR image data
data = pd.read_csv('/content/Soilmoisture gt data1.csv')

# Select relevant columns excluding Sigma0_VH_db (31.01.2024)
X = data[['Elevation (in m above MSL)', 'Sigma0_VV_db (31.01.2024)', 'LULC']]
y = data['Moisture Reading']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_train[['Sigma0_VV_db (31.01.2024)']] = imputer.fit_transform(X_train[['Sigma0_VV_db (31.01.2024)']])
X_test[['Sigma0_VV_db (31.01.2024)']] = imputer.transform(X_test[['Sigma0_VV_db (31.01.2024)']])

# Convert categorical columns to one-hot encoding
encoder = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), ['LULC'])
    ],
    remainder='passthrough'
)
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

# Create a Decision Tree regression model
dt_model = DecisionTreeRegressor(random_state=42)

# Drop rows with NaN values in y_train
nan_indices = y_train.index[y_train.isna()]
X_train_encoded = X_train_encoded[~y_train.isna()]
y_train = y_train.dropna()

# Train the model
dt_model.fit(X_train_encoded, y_train)

# Drop rows with NaN values in y_test
nan_indices = y_test.index[y_test.isna()]
X_test_encoded = X_test_encoded[~y_test.isna()]
y_test = y_test.dropna()

# Make predictions
y_pred = dt_model.predict(X_test_encoded)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

from scipy.stats import pearsonr

# Calculate the coefficient of correlation
correlation_coefficient, _ = pearsonr(y_pred, y_test)

print("Coefficient of correlation:", correlation_coefficient)

# Print the rows used for predicting soil moisture
print("Rows used for predicting soil moisture:")
print(X_test)

# Concatenate X_test and y_test along the columns axis
test_data = pd.concat([X_test, y_test], axis=1)

# Save the concatenated dataframe to a CSV file
test_data.to_csv('/content/test_data_used_for_prediction.csv', index=False)




Mean Squared Error: 9.723427450980395
R-squared: 0.8910621753837689
Coefficient of correlation: 0.9621517467298256
Rows used for predicting soil moisture:
    Elevation (in m above MSL)  Sigma0_VV_db (31.01.2024)             LULC
76                      542.65                      -9.84      Wheat field
0                       425.90                     -14.99    Mango Orchard
26                      451.89                      -7.03   Ploughed field
22                      444.36                      -8.05    Mango Orchard
12                      389.23                     -10.40      Wheat field
67                      451.89                     -10.97      Wheat field
10                      450.90                     -11.34  Sugarcane field
18                      444.36                      -8.05    Mango Orchard
4                       428.91                      -7.44    Mango Orchard
68                      451.89                      -7.03   Ploughed field
84                  

In [None]:
from osgeo import gdal
import numpy as np

# Load the actual and predicted TIFF files
actual_ds = gdal.Open('/content/final31124map.tif')
predicted_ds = gdal.Open('/content/12124mapfinal.tif')
import numpy as np
from skimage.transform import resize


# Read raster data as arrays
actual_array = actual_ds.GetRasterBand(1).ReadAsArray()
predicted_array = predicted_ds.GetRasterBand(1).ReadAsArray()

# Resize actual_array to match the shape of predicted_array
actual_array_resized = resize(actual_array, predicted_array.shape, anti_aliasing=True)

# Calculate RMSE
rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))

# Calculate R-squared
mean_actual = np.mean(actual_array_resized)
ss_total = np.sum((actual_array_resized - mean_actual) ** 2)
ss_res = np.sum((actual_array_resized - predicted_array) ** 2)
r_squared = 1 - (ss_res / ss_total)

print("Root Mean Squared Error:", rmse)
print("R-squared:", r_squared)

# Close the datasets
actual_ds = None
predicted_ds = None



Root Mean Squared Error: 1.959792
R-squared: 1.0


In [None]:
from osgeo import gdal
import numpy as np

# Load the actual and predicted TIFF files
actual_ds = gdal.Open('/content/E04_SAR_MRS_27JAN2024_027004957871_10765_STGCSMHTD_13631_08_D_R_078_031_00_500m.tif')
predicted_ds = gdal.Open('/content/12124mapfinal.tif')
import numpy as np
from skimage.transform import resize


# Read raster data as arrays
actual_array = actual_ds.GetRasterBand(1).ReadAsArray()
predicted_array = predicted_ds.GetRasterBand(1).ReadAsArray()

# Resize actual_array to match the shape of predicted_array
actual_array_resized = resize(actual_array, predicted_array.shape, anti_aliasing=True)

# Calculate RMSE
rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))

# Calculate R-squared
mean_actual = np.mean(actual_array_resized)
ss_total = np.sum((actual_array_resized - mean_actual) ** 2)
ss_res = np.sum((actual_array_resized - predicted_array) ** 2)
r_squared = 1 - (ss_res / ss_total)

print("Root Mean Squared Error:", rmse)
print("R-squared:", r_squared)

# Close the datasets
actual_ds = None
predicted_ds = None



Root Mean Squared Error: inf
R-squared: -inf


  rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))
  ss_res = np.sum((actual_array_resized - predicted_array) ** 2)


In [None]:
from osgeo import gdal
import numpy as np
from skimage.transform import resize

# Load the actual and predicted TIFF files
actual_ds = gdal.Open('/content/E04_SAR_MRS_27JAN2024_027004957871_10765_STGCSMHTD_13631_08_D_R_078_031_00_500m.tif')
predicted_ds = gdal.Open('/content/12124mapfinal.tif')

# Read raster data as arrays
actual_array = actual_ds.GetRasterBand(1).ReadAsArray()
predicted_array = predicted_ds.GetRasterBand(1).ReadAsArray()

# Resize actual_array to match the shape of predicted_array
actual_array_resized = resize(actual_array, predicted_array.shape, anti_aliasing=True)

# Calculate RMSE
rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))

# Calculate R-squared
mean_actual = np.mean(actual_array_resized)
ss_total = np.sum((actual_array_resized - mean_actual) ** 2)
ss_res = np.sum((actual_array_resized - predicted_array) ** 2)
r_squared = 1 - (ss_res / ss_total)

print("Root Mean Squared Error:", rmse)
print("R-squared:", r_squared)

# Get statistics from the attribute table
actual_stats = actual_ds.GetRasterBand(1).GetStatistics(0, 1)
predicted_stats = predicted_ds.GetRasterBand(1).GetStatistics(0, 1)

print("Actual Statistics:", actual_stats)
print("Predicted Statistics:", predicted_stats)

# Close the datasets
actual_ds = None
predicted_ds = None


Root Mean Squared Error: inf
R-squared: -inf
Actual Statistics: [-9999.0, 0.6913005709648132, -8561.824137266301, 3507.8465414448237]
Predicted Statistics: [4.881261825561523, 39.08750534057617, 17.442583694050835, 3.829281519086117]


  rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))
  ss_res = np.sum((actual_array_resized - predicted_array) ** 2)


In [None]:
from osgeo import gdal

# Open the TIFF file
ds = gdal.Open('/content/E04_SAR_MRS_27JAN2024_027004957871_10765_STGCSMHTD_13631_08_D_R_078_031_00_500m.tif')

# Read the raster data as an array
array = ds.GetRasterBand(1).ReadAsArray()

# Print the array
print(array)

# Close the dataset
ds = None


[[-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 ...
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]]


In [None]:
from osgeo import gdal

# Open the TIFF file
ds = gdal.Open('/content/12124mapfinal.tif')

# Read the raster data as an array
array = ds.GetRasterBand(1).ReadAsArray()

# Print the array
print(array)

# Close the dataset
ds = None


[[ 1.8141230e+01  1.8156471e+01  1.8172104e+01 ...  2.4695681e+01
   2.4700022e+01  2.4704197e+01]
 [ 1.8138565e+01  1.8153961e+01  1.8169760e+01 ...  2.4709770e+01
   2.4713963e+01  2.4717989e+01]
 [ 1.8135509e+01  1.8151047e+01  1.8166998e+01 ...  2.4723969e+01
   2.4728008e+01  2.4731884e+01]
 ...
 [ 1.4121479e+01  1.4127474e+01  1.4134059e+01 ...  1.2564716e+01
   1.2569582e+01  1.2574390e+01]
 [ 1.4151823e+01  1.4157906e+01  1.4164560e+01 ...  1.2562207e+01
   1.2567040e+01  1.2571819e+01]
 [-3.4028235e+38 -3.4028235e+38 -3.4028235e+38 ... -3.4028235e+38
  -3.4028235e+38 -3.4028235e+38]]


In [None]:
from osgeo import gdal

# Open the TIFF file
ds = gdal.Open('/content/E04_SAR_MRS_27JAN2024_027004957871_10765_STGCSMHTD_13631_08_D_R_078_031_00_500m.tif')

# Get the metadata
metadata = ds.GetMetadata()

# Print the metadata
for key, value in metadata.items():
    print(f"{key}: {value}")

# Close the dataset
ds = None


AREA_OR_POINT: Area
TIFFTAG_SOFTWARE: MATLAB 9.12, Mapping Toolbox 5.3


In [None]:
from osgeo import gdal
import numpy as np
from skimage.transform import resize

# Load the actual and predicted TIFF files
actual_ds = gdal.Open('/content/E04_SAR_MRS_19JAN2024_019004443558_10644_STGCSMHTD_13462_09_D_R_079_030_00_500m.tif')
predicted_ds = gdal.Open('/content/12124mapfinal.tif')

# Read raster data as arrays
actual_array = actual_ds.GetRasterBand(1).ReadAsArray()
predicted_array = predicted_ds.GetRasterBand(1).ReadAsArray()

# Resize actual_array to match the shape of predicted_array
actual_array_resized = resize(actual_array, predicted_array.shape, anti_aliasing=True)

# Calculate RMSE
rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))

# Calculate R-squared
mean_actual = np.mean(actual_array_resized)
ss_total = np.sum((actual_array_resized - mean_actual) ** 2)
ss_res = np.sum((actual_array_resized - predicted_array) ** 2)
r_squared = 1 - (ss_res / ss_total)

print("Root Mean Squared Error:", rmse)
print("R-squared:", r_squared)

# Close the datasets
actual_ds = None
predicted_ds = None


Root Mean Squared Error: inf
R-squared: -inf


  rmse = np.sqrt(np.mean((predicted_array - actual_array_resized) ** 2))
  ss_res = np.sum((actual_array_resized - predicted_array) ** 2)
