In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, cross_val_score
import xgboost as xgb
from xgboost import XGBRegressor

In [None]:
base_df_resampled = pd.read_csv("filtered_df.csv")

In [None]:
#Train-test split
train_base, test_base = train_test_split(base_df_resampled, test_size=0.2, random_state=123)
train_base.reset_index(drop=True, inplace=True)
test_base.reset_index(drop=True, inplace=True)

#Predicting moisture at 10 cm depth

##From 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_10cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_20cm features)
features_to_exclude = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full = train_base.drop(columns=features_to_exclude)
y_train_full = train_base['moisture_10cm']
X_test_full = test_base.drop(columns=features_to_exclude)
y_test_full = test_base['moisture_10cm']

# ==== 3. Baseline Extra Trees using moisture_20cm + meteorological features ====
features_to_exclude_partial = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm','timestamp_device']
X_train_20cm_meteo = train_base.drop(columns=features_to_exclude_partial)
X_test_20cm_meteo = test_base.drop(columns=features_to_exclude_partial)

In [None]:
# Model using moisture_20cm only
xgb_20cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = xgb_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = xgb_20cm.predict(X_test_20cm_only)

print("XGBoost - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
xgb_full = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full.fit(X_train_full, y_train_full)
y_train_pred_full = xgb_full.predict(X_train_full)
y_test_pred_full = xgb_full.predict(X_test_full)

print("\nXGBoost - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_full))
print("Test R2:", r2_score(y_test_full, y_test_pred_full))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_full)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_full)))


XGBoost - moisture_20cm only
Train R2: 0.744941736599625
Test R2: 0.7454439808901321
Train RMSE: 0.0435825908160058
Test RMSE: 0.043490752819757825

XGBoost - moisture_20cm + meteorological features
Train R2: 0.8893766326352112
Test R2: 0.8768489417015657
Train RMSE: 0.028702305437761595
Test RMSE: 0.0302499258313184


##From 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_10cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_30cm features)
features_to_exclude = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm','timestamp_device']
X_train_full = train_base.drop(columns=features_to_exclude)
y_train_full = train_base['moisture_10cm']
X_test_full = test_base.drop(columns=features_to_exclude)
y_test_full = test_base['moisture_10cm']


In [None]:
# Model using only moisture_30cm
xgb_30cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = xgb_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = xgb_30cm.predict(X_test_30cm_only)

print("XGBoost - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
xgb_full = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full.fit(X_train_full, y_train_full)
y_train_pred_full = xgb_full.predict(X_train_full)
y_test_pred_full = xgb_full.predict(X_test_full)

print("\nXGBoost - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_full))
print("Test R2:", r2_score(y_test_full, y_test_pred_full))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_full)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_full)))

XGBoost - moisture_30cm only
Train R2: 0.5023451826471594
Test R2: 0.5055129907286264
Train RMSE: 0.06087758991713718
Test RMSE: 0.06061535923646301

XGBoost - moisture_30cm + meteorological features
Train R2: 0.8035440190682781
Test R2: 0.7812607133558145
Train RMSE: 0.03824951847152517
Test RMSE: 0.04031518302154115


##from 40

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_10cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_40cm features)
features_to_exclude = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full = train_base.drop(columns=features_to_exclude)
y_train_full = train_base['moisture_10cm']
X_test_full = test_base.drop(columns=features_to_exclude)
y_test_full = test_base['moisture_10cm']

In [None]:
# Model using moisture_40cm only
xgb_40cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = xgb_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = xgb_40cm.predict(X_test_40cm_only)

print("XGBoost - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
xgb_full = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full.fit(X_train_full, y_train_full)
y_train_pred_full = xgb_full.predict(X_train_full)
y_test_pred_full = xgb_full.predict(X_test_full)

print("\nXGBoost - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_full))
print("Test R2:", r2_score(y_test_full, y_test_pred_full))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_full)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_full)))

XGBoost - moisture_40cm only
Train R2: 0.43490336409992114
Test R2: 0.43544551924125896
Train RMSE: 0.06487161433891546
Test RMSE: 0.06476765443592836

XGBoost - moisture_40cm + meteorological features
Train R2: 0.7891356184147221
Test R2: 0.7620117853622307
Train RMSE: 0.03962734335231106
Test RMSE: 0.042051642681905546


##From 50

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_10cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_50cm features)
features_to_exclude_50cm = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50cm = train_base.drop(columns=features_to_exclude_50cm)
y_train_full_50cm = train_base['moisture_10cm']
X_test_full_50cm = test_base.drop(columns=features_to_exclude_50cm)
y_test_full_50cm = test_base['moisture_10cm']

In [None]:
# Model using moisture_50cm only
xgb_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = xgb_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = xgb_50cm.predict(X_test_50cm_only)

print("XGBoost - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
xgb_full_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_50cm.fit(X_train_full_50cm, y_train_full_50cm)
y_train_pred_full_50cm = xgb_full_50cm.predict(X_train_full_50cm)
y_test_pred_full_50cm = xgb_full_50cm.predict(X_test_full_50cm)

print("\nXGBoost - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50cm, y_train_pred_full_50cm))
print("Test R2:", r2_score(y_test_full_50cm, y_test_pred_full_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50cm, y_train_pred_full_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50cm, y_test_pred_full_50cm)))

XGBoost - moisture_50cm only
Train R2: 0.3623550606439573
Test R2: 0.36873100443529017
Train RMSE: 0.06891008779598716
Test RMSE: 0.0684876823661101

XGBoost - moisture_50cm + meteorological features
Train R2: 0.7447335999949114
Test R2: 0.7141599946996269
Train RMSE: 0.043600369660963124
Test RMSE: 0.046085759129853916


##from 60

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_10cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_60cm features)
features_to_exclude = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude)
y_train_full_60 = train_base['moisture_10cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude)
y_test_full_60 = test_base['moisture_10cm']

In [None]:
# Model using moisture_60cm only
xgb_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = xgb_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = xgb_60cm.predict(X_test_60cm_only)

print("XGBoost - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
xgb_full_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_60cm.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60cm = xgb_full_60cm.predict(X_train_full_60)
y_test_pred_full_60cm = xgb_full_60cm.predict(X_test_full_60)

print("\nXGBoost - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60cm))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60cm)))

XGBoost - moisture_60cm only
Train R2: 0.39559564027424055
Test R2: 0.3992083617222575
Train RMSE: 0.06708989910888154
Test RMSE: 0.06681395493375686

XGBoost - moisture_60cm + meteorological features
Train R2: 0.7705685118716368
Test R2: 0.7429909351395063
Train RMSE: 0.041335182136408966
Test RMSE: 0.043699801144302615


#Predicitng Moisture at 20cm

##From 10cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_20cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_20cm']

# Full model (with meteorological and moisture_10cm features)
features_to_exclude_20 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_20)
y_train_full_10 = train_base['moisture_20cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_20)
y_test_full_10 = test_base['moisture_20cm']


In [None]:
# Model using moisture_10cm only
xgb_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = xgb_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = xgb_10cm.predict(X_test_10cm_only)

print("XGBoost - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
xgb_full_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_10cm.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10cm = xgb_full_10cm.predict(X_train_full_10)
y_test_pred_full_10cm = xgb_full_10cm.predict(X_test_full_10)

print("\nXGBoost - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10cm))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10cm)))

XGBoost - moisture_10cm only
Train R2: 0.7967628879847752
Test R2: 0.7954979999629099
Train RMSE: 0.028416860655735267
Test RMSE: 0.02862811297554261

XGBoost - moisture_10cm + meteorological features
Train R2: 0.9163831412562671
Test R2: 0.9049712728410145
Train RMSE: 0.018227265278821007
Test RMSE: 0.01951512775147976


##From 30cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_20cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_20cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_20_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_20_from_30)
y_train_full_30 = train_base['moisture_20cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_20_from_30)
y_test_full_30 = test_base['moisture_20cm']


In [None]:
# Model using moisture_30cm only
xgb_30cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = xgb_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = xgb_30cm.predict(X_test_30cm_only)

print("XGBoost - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
xgb_full_30cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_30cm.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30cm = xgb_full_30cm.predict(X_train_full_30)
y_test_pred_full_30cm = xgb_full_30cm.predict(X_test_full_30)

print("\nXGBoost - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30cm))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30cm)))

XGBoost - moisture_30cm only
Train R2: 0.7526369074878596
Test R2: 0.7560269137725564
Train RMSE: 0.0313503245442349
Test RMSE: 0.0312690655217951

XGBoost - moisture_30cm + meteorological features
Train R2: 0.9303044476547079
Test R2: 0.9191336909076707
Train RMSE: 0.016640911936742813
Test RMSE: 0.018002289989494904


##from 40cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_20cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_20cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_20_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_20_from_40)
y_train_full_40 = train_base['moisture_20cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_20_from_40)
y_test_full_40 = test_base['moisture_20cm']


In [None]:
# Model using moisture_40cm only
xgb_40cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = xgb_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = xgb_40cm.predict(X_test_40cm_only)

print("XGBoost - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
xgb_full_40cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_40cm.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40cm = xgb_full_40cm.predict(X_train_full_40)
y_test_pred_full_40cm = xgb_full_40cm.predict(X_test_full_40)

print("\nXGBoost - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40cm))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40cm)))

XGBoost - moisture_40cm only
Train R2: 0.6946337149218375
Test R2: 0.7003973072626095
Train RMSE: 0.0348325390815507
Test RMSE: 0.03465108187540542

XGBoost - moisture_40cm + meteorological features
Train R2: 0.8978197242326996
Test R2: 0.8856698060196123
Train RMSE: 0.020149214741467504
Test RMSE: 0.021405452483575872


##from 50cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_20cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_20cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_20_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_20_from_50)
y_train_full_50 = train_base['moisture_20cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_20_from_50)
y_test_full_50 = test_base['moisture_20cm']

In [None]:
# Model using moisture_50cm only
xgb_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = xgb_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = xgb_50cm.predict(X_test_50cm_only)

print("XGBoost - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
xgb_full_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_50cm.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50cm = xgb_full_50cm.predict(X_train_full_50)
y_test_pred_full_50cm = xgb_full_50cm.predict(X_test_full_50)

print("\nXGBoost - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50cm))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50cm)))

XGBoost - moisture_50cm only
Train R2: 0.6361343155122592
Test R2: 0.6408328956881998
Train RMSE: 0.038022891492833236
Test RMSE: 0.037939553821147554

XGBoost - moisture_50cm + meteorological features
Train R2: 0.8854739726754814
Test R2: 0.8697957958535426
Train RMSE: 0.021331759971548563
Test RMSE: 0.022843173911812845


##from 60cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_20cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_20cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_20_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_20_from_60)
y_train_full_60 = train_base['moisture_20cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_20_from_60)
y_test_full_60 = test_base['moisture_20cm']

In [None]:
# Model using moisture_60cm only
xgb_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = xgb_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = xgb_60cm.predict(X_test_60cm_only)

print("XGBoost - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
xgb_full_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_60cm.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60cm = xgb_full_60cm.predict(X_train_full_60)
y_test_pred_full_60cm = xgb_full_60cm.predict(X_test_full_60)

print("\nXGBoost - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60cm))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60cm)))

XGBoost - moisture_60cm only
Train R2: 0.6227320100500078
Test R2: 0.623706281544377
Train RMSE: 0.03871681018434299
Test RMSE: 0.03883357978401483

XGBoost - moisture_60cm + meteorological features
Train R2: 0.876670219397548
Test R2: 0.8590399237722797
Train RMSE: 0.02213648032205266
Test RMSE: 0.023767965340015948


#Predicitng moisture at 30cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_30cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_30cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_30_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_30_from_10)
y_train_full_10 = train_base['moisture_30cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_30_from_10)
y_test_full_10 = test_base['moisture_30cm']


In [None]:
# Model using moisture_10cm only
xgb_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = xgb_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = xgb_10cm.predict(X_test_10cm_only)

print("XGBoost - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
xgb_full_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_10cm.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10cm = xgb_full_10cm.predict(X_train_full_10)
y_test_pred_full_10cm = xgb_full_10cm.predict(X_test_full_10)

print("\nXGBoost - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10cm))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10cm)))

XGBoost - moisture_10cm only
Train R2: 0.6449805469496817
Test R2: 0.642650269280671
Train RMSE: 0.02763822053596381
Test RMSE: 0.027930035350903916

XGBoost - moisture_10cm + meteorological features
Train R2: 0.8782609545780536
Test R2: 0.8573094521230693
Train RMSE: 0.016184487392365412
Test RMSE: 0.017649091657265092


##From 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_30cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_30cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_30_from_20 = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_30_from_20)
y_train_full_20 = train_base['moisture_30cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_30_from_20)
y_test_full_20 = test_base['moisture_30cm']


In [None]:
# Model using moisture_20cm only
xgb_20cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = xgb_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = xgb_20cm.predict(X_test_20cm_only)

print("XGBoost - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
xgb_full_20cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_20cm.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20cm = xgb_full_20cm.predict(X_train_full_20)
y_test_pred_full_20cm = xgb_full_20cm.predict(X_test_full_20)

print("\nXGBoost - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20cm))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20cm)))

XGBoost - moisture_20cm only
Train R2: 0.8186266521671017
Test R2: 0.8220007814883944
Train RMSE: 0.019754713580338253
Test RMSE: 0.019712141299914273

XGBoost - moisture_20cm + meteorological features
Train R2: 0.9558133284142311
Test R2: 0.9496639025960842
Train RMSE: 0.009750561977192344
Test RMSE: 0.010482491682201295


##from 40cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_30cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_30cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_30_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_30_from_40)
y_train_full_40 = train_base['moisture_30cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_30_from_40)
y_test_full_40 = test_base['moisture_30cm']

In [None]:
# Model using moisture_40cm only
xgb_40cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = xgb_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = xgb_40cm.predict(X_test_40cm_only)

print("XGBoost - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
xgb_full_40 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_40.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = xgb_full_40.predict(X_train_full_40)
y_test_pred_full_40 = xgb_full_40.predict(X_test_full_40)

print("\nXGBoost - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))

XGBoost - moisture_40cm only
Train R2: 0.9275114380784053
Test R2: 0.9302470992392351
Train RMSE: 0.012488743462015088
Test RMSE: 0.01233973667978967

XGBoost - moisture_40cm + meteorological features
Train R2: 0.9750806442090919
Test R2: 0.9713036974376122
Train RMSE: 0.007322382443159747
Test RMSE: 0.007914759579234716


##From 50cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_30cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_30cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_30_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_30_from_50)
y_train_full_50 = train_base['moisture_30cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_30_from_50)
y_test_full_50 = test_base['moisture_30cm']

In [None]:
# Model using moisture_50cm only
xgb_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = xgb_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = xgb_50cm.predict(X_test_50cm_only)

print("XGBoost - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
xgb_full_50 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_50.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = xgb_full_50.predict(X_train_full_50)
y_test_pred_full_50 = xgb_full_50.predict(X_test_full_50)

print("\nXGBoost - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))

XGBoost - moisture_50cm only
Train R2: 0.8657714869533557
Test R2: 0.8697638472240278
Train RMSE: 0.01699442176162091
Test RMSE: 0.016861279793067917

XGBoost - moisture_50cm + meteorological features
Train R2: 0.9543613244087196
Test R2: 0.9496431277569226
Train RMSE: 0.009909472096407726
Test RMSE: 0.01048465463905253


##FRom 60cm

In [None]:
# ==== 1. Separate features and targets for predicting 30 from 60 ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_30cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_30cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_30_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_30_from_60)
y_train_full_60 = train_base['moisture_30cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_30_from_60)
y_test_full_60 = test_base['moisture_30cm']


In [None]:
# Model using only moisture_60cm
xgb_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = xgb_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = xgb_60cm.predict(X_test_60cm_only)

print("XGBoost - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
xgb_full_60 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_60.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60 = xgb_full_60.predict(X_train_full_60)
y_test_pred_full_60 = xgb_full_60.predict(X_test_full_60)

print("\nXGBoost - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))

XGBoost - moisture_60cm only
Train R2: 0.7042918534251457
Test R2: 0.7086898035411516
Train RMSE: 0.025224098126882245
Test RMSE: 0.025217533059400495

XGBoost - moisture_60cm + meteorological features
Train R2: 0.9211685613422061
Test R2: 0.9093724943910583
Train RMSE: 0.013023680361165304
Test RMSE: 0.014065486855717154


##Predicting moisture at 40cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_40cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_40cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_40_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_40_from_10)
y_train_full_10 = train_base['moisture_40cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_40_from_10)
y_test_full_10 = test_base['moisture_40cm']

In [None]:
# Model using only moisture_10cm
xgb_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = xgb_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = xgb_10cm.predict(X_test_10cm_only)

print("XGBoost - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
xgb_full_10 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_10.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10 = xgb_full_10.predict(X_train_full_10)
y_test_pred_full_10 = xgb_full_10.predict(X_test_full_10)

print("\nXGBoost - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))

XGBoost - moisture_10cm only
Train R2: 0.44549845671899146
Test R2: 0.44552995744595136
Train RMSE: 0.025575870020341517
Test RMSE: 0.025799748295627215

XGBoost - moisture_10cm + meteorological features
Train R2: 0.8009716780629301
Test R2: 0.7776218574947446
Train RMSE: 0.015322741579911172
Test RMSE: 0.016338887211139273


##from 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_40cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_40cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_40_from_20 = ['moisture_10cm', 'moisture_40cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_40_from_20)
y_train_full_20 = train_base['moisture_40cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_40_from_20)
y_test_full_20 = test_base['moisture_40cm']

In [None]:
# Model using only moisture_20cm
xgb_20cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = xgb_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = xgb_20cm.predict(X_test_20cm_only)

print("XGBoost - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
xgb_full_20 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_20.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20 = xgb_full_20.predict(X_train_full_20)
y_test_pred_full_20 = xgb_full_20.predict(X_test_full_20)

print("\nXGBoost - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))

XGBoost - moisture_20cm only
Train R2: 0.6587012540235693
Test R2: 0.6663071141953563
Train RMSE: 0.020065333518498943
Test RMSE: 0.02001473754522903

XGBoost - moisture_20cm + meteorological features
Train R2: 0.8949694202081094
Test R2: 0.8848903052144843
Train RMSE: 0.011131070104186295
Test RMSE: 0.011755263726095023


##from 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_40cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_40cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_40_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_40_from_30)
y_train_full_30 = train_base['moisture_40cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_40_from_30)
y_test_full_30 = test_base['moisture_40cm']


In [None]:
# Model using only moisture_30cm
xgb_30cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = xgb_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = xgb_30cm.predict(X_test_30cm_only)

print("XGBoost - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
xgb_full_30 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_30.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30 = xgb_full_30.predict(X_train_full_30)
y_test_pred_full_30 = xgb_full_30.predict(X_test_full_30)

print("\nXGBoost - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))

XGBoost - moisture_30cm only
Train R2: 0.8993514028097555
Test R2: 0.9018227035568848
Train RMSE: 0.010896396539839122
Test RMSE: 0.010856303098119541

XGBoost - moisture_30cm + meteorological features
Train R2: 0.9681300355309099
Test R2: 0.9626889060074434
Train RMSE: 0.006131543676171983
Test RMSE: 0.006692608446787428


##from 50cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_40cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_40cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_40_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_40_from_50)
y_train_full_50 = train_base['moisture_40cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_40_from_50)
y_test_full_50 = test_base['moisture_40cm']


In [None]:
# Model using only moisture_50cm
xgb_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = xgb_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = xgb_50cm.predict(X_test_50cm_only)

print("XGBoost - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
xgb_full_50 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_50.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = xgb_full_50.predict(X_train_full_50)
y_test_pred_full_50 = xgb_full_50.predict(X_test_full_50)

print("\nXGBoost - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))

XGBoost - moisture_50cm only
Train R2: 0.8768787340054282
Test R2: 0.8815734418214981
Train RMSE: 0.0120516239766429
Test RMSE: 0.011923423769448051

XGBoost - moisture_50cm + meteorological features
Train R2: 0.9553719036973998
Test R2: 0.9506113838044679
Train RMSE: 0.007255765150003851
Test RMSE: 0.007699984565547367


##from 60cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_40cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_40cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_40_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_40_from_60)
y_train_full_60 = train_base['moisture_40cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_40_from_60)
y_test_full_60 = test_base['moisture_40cm']

In [None]:
# Model using only moisture_60cm
xgb_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = xgb_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = xgb_60cm.predict(X_test_60cm_only)

print("XGBoost - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
xgb_full_60 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_60.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60 = xgb_full_60.predict(X_train_full_60)
y_test_pred_full_60 = xgb_full_60.predict(X_test_full_60)

print("\nXGBoost - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))

XGBoost - moisture_60cm only
Train R2: 0.6736854775508423
Test R2: 0.6828035138284276
Train RMSE: 0.019619920245713084
Test RMSE: 0.019513744282403524

XGBoost - moisture_60cm + meteorological features
Train R2: 0.8938833491819476
Test R2: 0.8835236379684898
Train RMSE: 0.011188472630729009
Test RMSE: 0.011824841399196033


##Predicting moisture at 50cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_50cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_50cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_50_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_50_from_10)
y_train_full_10 = train_base['moisture_50cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_50_from_10)
y_test_full_10 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_10cm
xgb_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = xgb_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = xgb_10cm.predict(X_test_10cm_only)

print("XGBoost - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
xgb_full_10 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_10.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10 = xgb_full_10.predict(X_train_full_10)
y_test_pred_full_10 = xgb_full_10.predict(X_test_full_10)

print("\nXGBoost - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))

XGBoost - moisture_10cm only
Train R2: 0.4530139352768382
Test R2: 0.4497012232894845
Train RMSE: 0.01043142354383556
Test RMSE: 0.010596555208907772

XGBoost - moisture_10cm + meteorological features
Train R2: 0.7899574779208437
Test R2: 0.7654499619233852
Train RMSE: 0.006464114795241913
Test RMSE: 0.006918041857444035


##from 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_50cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_50cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_50_from_20 = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_50_from_20)
y_train_full_20 = train_base['moisture_50cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_50_from_20)
y_test_full_20 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_20cm
xgb_20cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = xgb_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = xgb_20cm.predict(X_test_20cm_only)

print("XGBoost - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
xgb_full_20 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_20.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20 = xgb_full_20.predict(X_train_full_20)
y_test_pred_full_20 = xgb_full_20.predict(X_test_full_20)

print("\nXGBoost - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))

XGBoost - moisture_20cm only
Train R2: 0.670775621107141
Test R2: 0.6751142043015849
Train RMSE: 0.008092849083688571
Test RMSE: 0.008141994290289776

XGBoost - moisture_20cm + meteorological features
Train R2: 0.9001571662558124
Test R2: 0.8899106333366333
Train RMSE: 0.004456705097386881
Test RMSE: 0.004739561010197611


##from 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_50cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_50cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_50_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_50_from_30)
y_train_full_30 = train_base['moisture_50cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_50_from_30)
y_test_full_30 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_30cm
xgb_30cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = xgb_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = xgb_30cm.predict(X_test_30cm_only)

print("XGBoost - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
xgb_full_30 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_30.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30 = xgb_full_30.predict(X_train_full_30)
y_test_pred_full_30 = xgb_full_30.predict(X_test_full_30)

print("\nXGBoost - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))

XGBoost - moisture_30cm only
Train R2: 0.8125145737255424
Test R2: 0.8173010770468401
Train RMSE: 0.0061071586970745375
Test RMSE: 0.006105671367070302

XGBoost - moisture_30cm + meteorological features
Train R2: 0.9291289425729079
Test R2: 0.9218402542339095
Train RMSE: 0.0037548267615859455
Test RMSE: 0.003993530452842434


##from 40cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_50cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_50cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_50_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_50_from_40)
y_train_full_40 = train_base['moisture_50cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_50_from_40)
y_test_full_40 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_40cm
xgb_40cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = xgb_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = xgb_40cm.predict(X_test_40cm_only)

print("XGBoost - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
xgb_full_40 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_40.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = xgb_full_40.predict(X_train_full_40)
y_test_pred_full_40 = xgb_full_40.predict(X_test_full_40)

print("\nXGBoost - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))

XGBoost - moisture_40cm only
Train R2: 0.8839117524346103
Test R2: 0.8869409490748102
Train RMSE: 0.004805621653568976
Test RMSE: 0.004803060965625031

XGBoost - moisture_40cm + meteorological features
Train R2: 0.9557961738874915
Test R2: 0.9512812806788723
Train RMSE: 0.0029654143109154544
Test RMSE: 0.0031529229894240764


##from 60cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_50cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_50cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_50_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_50_from_60)
y_train_full_60 = train_base['moisture_50cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_50_from_60)
y_test_full_60 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_60cm
xgb_60cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = xgb_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = xgb_60cm.predict(X_test_60cm_only)

print("XGBoost - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
xgb_full_60 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_60.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60 = xgb_full_60.predict(X_train_full_60)
y_test_pred_full_60 = xgb_full_60.predict(X_test_full_60)

print("\nXGBoost - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))

XGBoost - moisture_60cm only
Train R2: 0.8941191872866118
Test R2: 0.8979429014704737
Train RMSE: 0.004589486336879878
Test RMSE: 0.0045633842599741795

XGBoost - moisture_60cm + meteorological features
Train R2: 0.9687843758166279
Test R2: 0.9658974805729039
Train RMSE: 0.0024919622550433193
Test RMSE: 0.002637902041491886


##Predicting moisture at 60cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_60cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_60cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_60_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_60_from_10)
y_train_full_10 = train_base['moisture_60cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_60_from_10)
y_test_full_10 = test_base['moisture_60cm']

In [None]:
xgb_10cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = xgb_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = xgb_10cm.predict(X_test_10cm_only)

print("XGBoost - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

xgb_full_10 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_10.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10 = xgb_full_10.predict(X_train_full_10)
y_test_pred_full_10 = xgb_full_10.predict(X_test_full_10)

print("\nXGBoost - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))

XGBoost - moisture_10cm only
Train R2: 0.3956598495595245
Test R2: 0.38831141262714053
Train RMSE: 0.00947061615713289
Test RMSE: 0.0096650647232919

XGBoost - moisture_10cm + meteorological features
Train R2: 0.7399452333855252
Test R2: 0.7012987613330692
Train RMSE: 0.00621255033014976
Test RMSE: 0.006753953863285897


##from 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_60cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_60cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_60_from_20 = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_60_from_20)
y_train_full_20 = train_base['moisture_60cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_60_from_20)
y_test_full_20 = test_base['moisture_60cm']


In [None]:
xgb_20cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = xgb_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = xgb_20cm.predict(X_test_20cm_only)

print("XGBoost - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

xgb_full_20 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_20.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20 = xgb_full_20.predict(X_train_full_20)
y_test_pred_full_20 = xgb_full_20.predict(X_test_full_20)

print("\nXGBoost - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))

XGBoost - moisture_20cm only
Train R2: 0.5848255647990588
Test R2: 0.589098958290611
Train RMSE: 0.007849695544052718
Test RMSE: 0.007921514089095107

XGBoost - moisture_20cm + meteorological features
Train R2: 0.864017316129347
Test R2: 0.8452008035931428
Train RMSE: 0.0044924092743937865
Test RMSE: 0.004862100469283905


##from 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_60cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_60cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_60_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_60_from_30)
y_train_full_30 = train_base['moisture_60cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_60_from_30)
y_test_full_30 = test_base['moisture_60cm']


In [None]:
xgb_30cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = xgb_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = xgb_30cm.predict(X_test_30cm_only)

print("XGBoost - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

xgb_full_30 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_30.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30 = xgb_full_30.predict(X_train_full_30)
y_test_pred_full_30 = xgb_full_30.predict(X_test_full_30)

print("\nXGBoost - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))

XGBoost - moisture_30cm only
Train R2: 0.6213546807128726
Test R2: 0.630720331089548
Train RMSE: 0.007496418213565153
Test RMSE: 0.007509608159118429

XGBoost - moisture_30cm + meteorological features
Train R2: 0.8686734417558442
Test R2: 0.8535408052519149
Train RMSE: 0.004414828039540661
Test RMSE: 0.004729311268196873


##from 40cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_60cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_60cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_60_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_60_from_40)
y_train_full_40 = train_base['moisture_60cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_60_from_40)
y_test_full_40 = test_base['moisture_60cm']

In [None]:
xgb_40cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = xgb_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = xgb_40cm.predict(X_test_40cm_only)

print("XGBoost - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

xgb_full_40 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_40.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = xgb_full_40.predict(X_train_full_40)
y_test_pred_full_40 = xgb_full_40.predict(X_test_full_40)

print("\nXGBoost - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))

XGBoost - moisture_40cm only
Train R2: 0.6754427364155706
Test R2: 0.6826390491405847
Train RMSE: 0.006940378846398856
Test RMSE: 0.006961716315915721

XGBoost - moisture_40cm + meteorological features
Train R2: 0.8849855398761146
Test R2: 0.871616364786077
Train RMSE: 0.004131556854653305
Test RMSE: 0.004427865416530157


##from 50cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_60cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_60cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_60_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_60_from_50)
y_train_full_50 = train_base['moisture_60cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_60_from_50)
y_test_full_50 = test_base['moisture_60cm']

In [None]:
xgb_50cm = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = xgb_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = xgb_50cm.predict(X_test_50cm_only)

print("XGBoost - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

xgb_full_50 = XGBRegressor(random_state=42, n_estimators=100, objective='reg:squarederror')
xgb_full_50.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = xgb_full_50.predict(X_train_full_50)
y_test_pred_full_50 = xgb_full_50.predict(X_test_full_50)

print("\nXGBoost - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))

XGBoost - moisture_50cm only
Train R2: 0.8992255992379774
Test R2: 0.9029779035462617
Train RMSE: 0.0038673423866247927
Test RMSE: 0.0038492392462945715

XGBoost - moisture_50cm + meteorological features
Train R2: 0.9695450224072157
Test R2: 0.9672098752615821
Train RMSE: 0.0021260166091066615
Test RMSE: 0.0022377476629370694
