In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import ExtraTreesRegressor

In [None]:
base_df_resampled = pd.read_csv("filtered_df.csv")

In [None]:
#Train-test split
train_base, test_base = train_test_split(base_df_resampled, test_size=0.2, random_state=123)
train_base.reset_index(drop=True, inplace=True)
test_base.reset_index(drop=True, inplace=True)

#The role of meteorological variables

#Predicting moisture at 10 cm depth

##From 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_10cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_20cm features)
features_to_exclude = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full = train_base.drop(columns=features_to_exclude)
y_train_full = train_base['moisture_10cm']
X_test_full = test_base.drop(columns=features_to_exclude)
y_test_full = test_base['moisture_10cm']

# ==== 3. Baseline Extra Trees using moisture_20cm + meteorological features ====
features_to_exclude_partial = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm','timestamp_device']
X_train_20cm_meteo = train_base.drop(columns=features_to_exclude_partial)
X_test_20cm_meteo = test_base.drop(columns=features_to_exclude_partial)

In [None]:
# Model using moisture_20cm only
et_20cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = et_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = et_20cm.predict(X_test_20cm_only)

print("Extra Trees - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
et_full = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full.fit(X_train_full, y_train_full)
y_train_pred_full = et_full.predict(X_train_full)
y_test_pred_full = et_full.predict(X_test_full)

print("\nExtra Trees - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_full))
print("Test R2:", r2_score(y_test_full, y_test_pred_full))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_full)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_full)))


Extra Trees - moisture_20cm only
Train R2: 0.7485638805925896
Test R2: 0.7475821019526077
Train RMSE: 0.04327202080488243
Test RMSE: 0.04330771927469991

Extra Trees - moisture_20cm + meteorological features
Train R2: 0.9999840742413926
Test R2: 0.9553976852894436
Train RMSE: 0.0003443843512136679
Test RMSE: 0.01820471728669711


In [None]:
# Regulated Extra Trees model
et_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=100,
    max_depth=12)

# Train
et_reg.fit(X_train_full, y_train_full)

# Predict
y_train_pred_reg = et_reg.predict(X_train_full)
y_test_pred_reg = et_reg.predict(X_test_full)

# Evaluate
print("\nRegulated Extra Trees - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_reg))
print("Test R2:", r2_score(y_test_full, y_test_pred_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_reg)))



Regulated Extra Trees - moisture_20cm + meteorological features
Train R2: 0.8471808769007156
Test R2: 0.8420665022431827
Train RMSE: 0.03373511509969707
Test RMSE: 0.03425645094187573


##From 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_10cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_30cm features)
features_to_exclude = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm','timestamp_device']
X_train_full = train_base.drop(columns=features_to_exclude)
y_train_full = train_base['moisture_10cm']
X_test_full = test_base.drop(columns=features_to_exclude)
y_test_full = test_base['moisture_10cm']


In [None]:
# ==== 2. Train and evaluate baseline model ====
et_30cm_only = ExtraTreesRegressor(
    random_state=42,
    n_estimators=100
)
et_30cm_only.fit(X_train_30cm_only, y_train_30cm_only)

# ==== 3. Predict ====
y_train_pred_30cm_only = et_30cm_only.predict(X_train_30cm_only)
y_test_pred_30cm_only = et_30cm_only.predict(X_test_30cm_only)

# ==== 4. Evaluate ====
print("Extra Trees - using only moisture_30cm")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm_only))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm_only))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm_only)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm_only)))

Extra Trees - using only moisture_30cm
Train R2: 0.5069343362602743
Test R2: 0.5081510002313354
Train RMSE: 0.06059624664375811
Test RMSE: 0.06045345636853849


In [None]:
# Regulated Extra Trees
et_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=100, max_depth = 12)

# Fit regulated model
et_reg.fit(X_train_full, y_train_full)

# Predict
y_train_pred_reg = et_reg.predict(X_train_full)
y_test_pred_reg = et_reg.predict(X_test_full)

# Evaluate
print("Extra Trees - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_reg))
print("Test R2:", r2_score(y_test_full, y_test_pred_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_reg)))

Extra Trees - moisture_30cm + meteorological features
Train R2: 0.6831182561837047
Test R2: 0.679218763907139
Train RMSE: 0.04857825992129111
Test RMSE: 0.04882134258068874


##from 40

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_10cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_40cm features)
features_to_exclude = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full = train_base.drop(columns=features_to_exclude)
y_train_full = train_base['moisture_10cm']
X_test_full = test_base.drop(columns=features_to_exclude)
y_test_full = test_base['moisture_10cm']

In [None]:
# Model using moisture_40cm only
et_40cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = et_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = et_40cm.predict(X_test_40cm_only)

print("\nExtra Trees - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
et_full = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full.fit(X_train_full, y_train_full)
y_train_pred_full = et_full.predict(X_train_full)
y_test_pred_full = et_full.predict(X_test_full)

print("\nExtra Trees - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_full))
print("Test R2:", r2_score(y_test_full, y_test_pred_full))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_full)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_full)))


Extra Trees - moisture_40cm only
Train R2: 0.44008561437898797
Test R2: 0.4391921964187855
Train RMSE: 0.0645734748561407
Test RMSE: 0.06455238075303071

Extra Trees - moisture_40cm + meteorological features
Train R2: 0.9998687780321516
Test R2: 0.9037832110048253
Train RMSE: 0.0009885453959038552
Test RMSE: 0.026738107784213472


In [None]:
# ==== Regulated Extra Trees - moisture_40cm + meteorological features ====
et_full_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)
et_full_reg.fit(X_train_full, y_train_full)
y_train_pred_full_reg = et_full_reg.predict(X_train_full)
y_test_pred_full_reg = et_full_reg.predict(X_test_full)

print("\n[Regulated] Extra Trees - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full, y_train_pred_full_reg))
print("Test R2:", r2_score(y_test_full, y_test_pred_full_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full, y_train_pred_full_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full, y_test_pred_full_reg)))


[Regulated] Extra Trees - moisture_40cm + meteorological features
Train R2: 0.6876328724691769
Test R2: 0.6795846678650579
Train RMSE: 0.048230971133398984
Test RMSE: 0.04879349023527963


##From 50

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_10cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_50cm features)
features_to_exclude_50cm = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50cm = train_base.drop(columns=features_to_exclude_50cm)
y_train_full_50cm = train_base['moisture_10cm']
X_test_full_50cm = test_base.drop(columns=features_to_exclude_50cm)
y_test_full_50cm = test_base['moisture_10cm']

In [None]:
# Model using moisture_50cm only
et_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = et_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = et_50cm.predict(X_test_50cm_only)

print("\nExtra Trees - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
et_full_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_50cm.fit(X_train_full_50cm, y_train_full_50cm)
y_train_pred_full_50cm = et_full_50cm.predict(X_train_full_50cm)
y_test_pred_full_50cm = et_full_50cm.predict(X_test_full_50cm)

print("\nExtra Trees - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50cm, y_train_pred_full_50cm))
print("Test R2:", r2_score(y_test_full_50cm, y_test_pred_full_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50cm, y_train_pred_full_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50cm, y_test_pred_full_50cm)))


Extra Trees - moisture_50cm only
Train R2: 0.3623913992523453
Test R2: 0.36867224342636273
Train RMSE: 0.0689081242170287
Test RMSE: 0.06849086984379052

Extra Trees - moisture_50cm + meteorological features
Train R2: 0.999755121795677
Test R2: 0.8667693950817099
Train RMSE: 0.0013504185984114395
Test RMSE: 0.031463515068613


In [None]:
# Regulated Extra Trees
et_full_50cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # Slightly more trees for stability
    max_depth=12,              # Limit tree depth to prevent overfitting
    min_samples_split=12)

# Fit the model
et_full_50cm_reg.fit(X_train_full_50cm, y_train_full_50cm)

# Predict
y_train_pred_50cm_reg = et_full_50cm_reg.predict(X_train_full_50cm)
y_test_pred_50cm_reg = et_full_50cm_reg.predict(X_test_full_50cm)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50cm, y_train_pred_50cm_reg))
print("Test R2:", r2_score(y_test_full_50cm, y_test_pred_50cm_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50cm, y_train_pred_50cm_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50cm, y_test_pred_50cm_reg)))


Extra Trees (Regulated) - moisture_50cm + meteorological features
Train R2: 0.637567265226753
Test R2: 0.6307393779776209
Train RMSE: 0.051952571967056056
Test RMSE: 0.052380753136810225


##from 60

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_10cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_10cm']

# Full model (with meteorological and moisture_60cm features)
features_to_exclude = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude)
y_train_full_60 = train_base['moisture_10cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude)
y_test_full_60 = test_base['moisture_10cm']

In [None]:
# Model using moisture_60cm only
et_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = et_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = et_60cm.predict(X_test_60cm_only)

print("\nExtra Trees - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
et_full_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_60cm.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60cm = et_full_60cm.predict(X_train_full_60)
y_test_pred_full_60cm = et_full_60cm.predict(X_test_full_60)

print("\nExtra Trees - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60cm))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60cm)))


Extra Trees - moisture_60cm only
Train R2: 0.39562751519014383
Test R2: 0.39905403027217456
Train RMSE: 0.06708813000095479
Test RMSE: 0.0668225359723185

Extra Trees - moisture_60cm + meteorological features
Train R2: 0.999881981206948
Test R2: 0.8861033198753994
Train RMSE: 0.0009374949328529688
Test RMSE: 0.02909114216656217


In [None]:
# Regulated Extra Trees Regressor
et_full_60cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # Slightly more trees for stability
    max_depth=12
)

# Fit model
et_full_60cm_reg.fit(X_train_full_60, y_train_full_60)

# Predict
y_train_pred_full_60cm_reg = et_full_60cm_reg.predict(X_train_full_60)
y_test_pred_full_60cm_reg = et_full_60cm_reg.predict(X_test_full_60)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60cm_reg))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60cm_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60cm_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60cm_reg)))


Extra Trees (Regulated) - moisture_60cm + meteorological features
Train R2: 0.6718821493182013
Test R2: 0.6642291471776653
Train RMSE: 0.049432010405944936
Test RMSE: 0.04994899298317372


#Predicitng Moisture at 20cm

##From 10cm depth

In [None]:
# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_20cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_20cm']

# Full model (with meteorological and moisture_10cm features)
features_to_exclude_20 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_20)
y_train_full_10 = train_base['moisture_20cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_20)
y_test_full_10 = test_base['moisture_20cm']


In [None]:
# Model using moisture_10cm only
et_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = et_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = et_10cm.predict(X_test_10cm_only)

print("\nExtra Trees - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
et_full_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_10cm.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10cm = et_full_10cm.predict(X_train_full_10)
y_test_pred_full_10cm = et_full_10cm.predict(X_test_full_10)

print("\nExtra Trees - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10cm))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10cm)))


Extra Trees - moisture_10cm only
Train R2: 0.7999740190979374
Test R2: 0.7964980852160346
Train RMSE: 0.028191474708287208
Test RMSE: 0.028558026514832104

Extra Trees - moisture_10cm + meteorological features
Train R2: 0.9999679855569004
Test R2: 0.9689041698957475
Train RMSE: 0.00035665438336357323
Test RMSE: 0.011163362305015375


In [None]:
# Regulated Extra Trees Model
et_full_10cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # Slightly more trees for better averaging
    max_depth=12
)

# Fit the model
et_full_10cm_reg.fit(X_train_full_10, y_train_full_10)

# Predictions
y_train_pred_full_10cm_reg = et_full_10cm_reg.predict(X_train_full_10)
y_test_pred_full_10cm_reg = et_full_10cm_reg.predict(X_test_full_10)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10cm_reg))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10cm_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10cm_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10cm_reg)))


Extra Trees (Regulated) - moisture_10cm + meteorological features
Train R2: 0.8787829024748693
Test R2: 0.8744791320480519
Train RMSE: 0.021946058403581527
Test RMSE: 0.022428586764054424


##From 30cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_20cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_20cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_20_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_20_from_30)
y_train_full_30 = train_base['moisture_20cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_20_from_30)
y_test_full_30 = test_base['moisture_20cm']


In [None]:
# Model using moisture_30cm only
et_30cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = et_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = et_30cm.predict(X_test_30cm_only)

print("\nExtra Trees - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
et_full_30cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_30cm.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30cm = et_full_30cm.predict(X_train_full_30)
y_test_pred_full_30cm = et_full_30cm.predict(X_test_full_30)

print("\nExtra Trees - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30cm))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30cm)))


Extra Trees - moisture_30cm only
Train R2: 0.7551107893281641
Test R2: 0.7569383331796649
Train RMSE: 0.031193163090367553
Test RMSE: 0.03121060436149733

Extra Trees - moisture_30cm + meteorological features
Train R2: 0.9999389926471678
Test R2: 0.9665022326935357
Train RMSE: 0.0004923406341082033
Test RMSE: 0.011586489521870996


In [None]:
# Regulated Extra Trees model
et_full_30cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Fit the model
et_full_30cm_reg.fit(X_train_full_30, y_train_full_30)

# Predict
y_train_pred_full_30cm_reg = et_full_30cm_reg.predict(X_train_full_30)
y_test_pred_full_30cm_reg = et_full_30cm_reg.predict(X_test_full_30)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30cm_reg))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30cm_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30cm_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30cm_reg)))


Extra Trees (Regulated) - moisture_30cm + meteorological features
Train R2: 0.885347639482344
Test R2: 0.8824918518782079
Train RMSE: 0.0213435222173853
Test RMSE: 0.02170090938613984


##from 40cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_20cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_20cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_20_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_20_from_40)
y_train_full_40 = train_base['moisture_20cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_20_from_40)
y_test_full_40 = test_base['moisture_20cm']


In [None]:
# Model using moisture_40cm only
et_40cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = et_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = et_40cm.predict(X_test_40cm_only)

print("\nExtra Trees - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
et_full_40cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_40cm.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40cm = et_full_40cm.predict(X_train_full_40)
y_test_pred_full_40cm = et_full_40cm.predict(X_test_full_40)

print("\nExtra Trees - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40cm))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40cm)))


Extra Trees - moisture_40cm only
Train R2: 0.6971895534699853
Test R2: 0.7022056750156898
Train RMSE: 0.03468646301464208
Test RMSE: 0.03454634860354316

Extra Trees - moisture_40cm + meteorological features
Train R2: 0.9999590875237251
Test R2: 0.9542567676200698
Train RMSE: 0.0004031832426771406
Test RMSE: 0.01353964780561778


In [None]:
# Regulated Extra Trees model
et_full_40cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,           # More trees for better stability
    max_depth=12
)

# Fit the model
et_full_40cm_reg.fit(X_train_full_40, y_train_full_40)

# Predict
y_train_pred_full_40cm_reg = et_full_40cm_reg.predict(X_train_full_40)
y_test_pred_full_40cm_reg = et_full_40cm_reg.predict(X_test_full_40)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40cm_reg))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40cm_reg))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40cm_reg)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40cm_reg)))


Extra Trees (Regulated) - moisture_40cm + meteorological features
Train R2: 0.8509856097161024
Test R2: 0.8486528281747615
Train RMSE: 0.024332609994916845
Test RMSE: 0.024628110724089918


##from 50cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_20cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_20cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_20_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_20_from_50)
y_train_full_50 = train_base['moisture_20cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_20_from_50)
y_test_full_50 = test_base['moisture_20cm']

In [None]:
# Model using moisture_50cm only
et_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = et_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = et_50cm.predict(X_test_50cm_only)

print("\nExtra Trees - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
et_full_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_50cm.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50cm = et_full_50cm.predict(X_train_full_50)
y_test_pred_full_50cm = et_full_50cm.predict(X_test_full_50)

print("\nExtra Trees - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50cm))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50cm)))


Extra Trees - moisture_50cm only
Train R2: 0.6361680053928853
Test R2: 0.6407826105109663
Train RMSE: 0.03802113120547627
Test RMSE: 0.037942209591114454

Extra Trees - moisture_50cm + meteorological features
Train R2: 0.9998864901222537
Test R2: 0.9383188293736991
Train RMSE: 0.0006715702022886403
Test RMSE: 0.01572245086773362


In [None]:
et_full_50cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

et_full_50cm_reg.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50cm = et_full_50cm_reg.predict(X_train_full_50)
y_test_pred_full_50cm = et_full_50cm_reg.predict(X_test_full_50)

print("\nExtra Trees (Regulated) - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50cm))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50cm)))



Extra Trees (Regulated) - moisture_50cm + meteorological features
Train R2: 0.8339829660615562
Test R2: 0.8293862231009566
Train RMSE: 0.025683305367008396
Test RMSE: 0.026148753500634777


##from 60cm depth

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_20cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_20cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_20_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_20_from_60)
y_train_full_60 = train_base['moisture_20cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_20_from_60)
y_test_full_60 = test_base['moisture_20cm']

In [None]:
# Model using moisture_60cm only
et_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = et_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = et_60cm.predict(X_test_60cm_only)

print("\nExtra Trees - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
et_full_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_60cm.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60cm = et_full_60cm.predict(X_train_full_60)
y_test_pred_full_60cm = et_full_60cm.predict(X_test_full_60)

print("\nExtra Trees - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60cm))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60cm)))


Extra Trees - moisture_60cm only
Train R2: 0.6227662107011787
Test R2: 0.6235738497468035
Train RMSE: 0.03871505523803735
Test RMSE: 0.03884041267560795

Extra Trees - moisture_60cm + meteorological features
Train R2: 0.9999385467190461
Test R2: 0.9383749392818077
Train RMSE: 0.0004941367192575566
Test RMSE: 0.01571529806906293


In [None]:
et_full_60cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

et_full_60cm_reg.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60cm = et_full_60cm_reg.predict(X_train_full_60)
y_test_pred_full_60cm = et_full_60cm_reg.predict(X_test_full_60)

print("\nExtra Trees (Regulated) - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60cm))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60cm)))



Extra Trees (Regulated) - moisture_60cm + meteorological features
Train R2: 0.8183667097631481
Test R2: 0.8124911533457926
Train RMSE: 0.02686410122662278
Test RMSE: 0.027412890025019274


#Predicitng moisture at 30cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_30cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_30cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_30_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_30_from_10)
y_train_full_10 = train_base['moisture_30cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_30_from_10)
y_test_full_10 = test_base['moisture_30cm']


In [None]:
# Model using moisture_10cm only
et_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = et_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = et_10cm.predict(X_test_10cm_only)

print("\nExtra Trees - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
et_full_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_10cm.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10cm = et_full_10cm.predict(X_train_full_10)
y_test_pred_full_10cm = et_full_10cm.predict(X_test_full_10)

print("\nExtra Trees - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10cm))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10cm)))


Extra Trees - moisture_10cm only
Train R2: 0.6546657276722694
Test R2: 0.6499254579861689
Train RMSE: 0.027258618620886636
Test RMSE: 0.0276442632788375

Extra Trees - moisture_10cm + meteorological features
Train R2: 0.9999846102411517
Test R2: 0.9572778453964594
Train RMSE: 0.00018197004812904854
Test RMSE: 0.009657202306976236


In [None]:
et_full_10cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

et_full_10cm_reg.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10cm = et_full_10cm_reg.predict(X_train_full_10)
y_test_pred_full_10cm = et_full_10cm_reg.predict(X_test_full_10)

print("\nExtra Trees (Regulated) - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10cm))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10cm)))



Extra Trees (Regulated) - moisture_10cm + meteorological features
Train R2: 0.8127126803241542
Test R2: 0.8045533488217579
Train RMSE: 0.02007419736914141
Test RMSE: 0.020655650308934945


##From 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_30cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_30cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_30_from_20 = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_30_from_20)
y_train_full_20 = train_base['moisture_30cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_30_from_20)
y_test_full_20 = test_base['moisture_30cm']


In [None]:
# Model using moisture_20cm only
et_20cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = et_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = et_20cm.predict(X_test_20cm_only)

print("\nExtra Trees - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
et_full_20cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_20cm.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20cm = et_full_20cm.predict(X_train_full_20)
y_test_pred_full_20cm = et_full_20cm.predict(X_test_full_20)

print("\nExtra Trees - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20cm))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20cm)))


Extra Trees - moisture_20cm only
Train R2: 0.8248410695010863
Test R2: 0.8255076780675902
Train RMSE: 0.01941333482734624
Test RMSE: 0.01951699346354022

Extra Trees - moisture_20cm + meteorological features
Train R2: 0.9999972230885249
Test R2: 0.9851784187059689
Train RMSE: 7.729742386632724e-05
Test RMSE: 0.005688162907690484


In [None]:
et_full_20cm_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

et_full_20cm_reg.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20cm = et_full_20cm_reg.predict(X_train_full_20)
y_test_pred_full_20cm = et_full_20cm_reg.predict(X_test_full_20)

print("\nExtra Trees (Regulated) - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20cm))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20cm)))



Extra Trees (Regulated) - moisture_20cm + meteorological features
Train R2: 0.9264640140410942
Test R2: 0.9253553348796492
Train RMSE: 0.01257864795560779
Test RMSE: 0.012765097690920092


##from 40cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_30cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_30cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_30_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_30_from_40)
y_train_full_40 = train_base['moisture_30cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_30_from_40)
y_test_full_40 = test_base['moisture_30cm']

In [None]:
# Model using moisture_40cm only
et_40cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = et_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = et_40cm.predict(X_test_40cm_only)

print("\nExtra Trees - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
et_full_40 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_40.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = et_full_40.predict(X_train_full_40)
y_test_pred_full_40 = et_full_40.predict(X_test_full_40)

print("\nExtra Trees - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))


Extra Trees - moisture_40cm only
Train R2: 0.7427129479088999
Test R2: 0.7429090126664543
Train RMSE: 0.01890278816402051
Test RMSE: 0.018841582431613223

Extra Trees - moisture_40cm + meteorological features
Train R2: 0.9999719345020304
Test R2: 0.9658106895115129
Train RMSE: 0.00019742551432044776
Test RMSE: 0.006870986887925795


In [None]:
et_full_40_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

et_full_40_reg.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = et_full_40_reg.predict(X_train_full_40)
y_test_pred_full_40 = et_full_40_reg.predict(X_test_full_40)

print("\nExtra Trees (Regulated) - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))


Extra Trees (Regulated) - moisture_40cm + meteorological features
Train R2: 0.9613044577459486
Test R2: 0.9604089990667221
Train RMSE: 0.009124613361523237
Test RMSE: 0.009296575413460687


##From 50cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_30cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_30cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_30_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_30_from_50)
y_train_full_50 = train_base['moisture_30cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_30_from_50)
y_test_full_50 = test_base['moisture_30cm']

In [None]:
# Model using moisture_50cm only
et_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = et_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = et_50cm.predict(X_test_50cm_only)

print("\nExtra Trees - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
et_full_50 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_50.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = et_full_50.predict(X_train_full_50)
y_test_pred_full_50 = et_full_50.predict(X_test_full_50)

print("\nExtra Trees - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))


Extra Trees - moisture_50cm only
Train R2: 0.7653293672393644
Test R2: 0.7621181802778315
Train RMSE: 0.018052870802309193
Test RMSE: 0.01812402174885098

Extra Trees - moisture_50cm + meteorological features
Train R2: 0.9998783437620671
Test R2: 0.9549632570222923
Train RMSE: 0.0004110400058094687
Test RMSE: 0.007886012047325537


In [None]:
# Regulated Extra Trees model
et_full_50_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Train and predict
et_full_50_reg.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = et_full_50_reg.predict(X_train_full_50)
y_test_pred_full_50 = et_full_50_reg.predict(X_test_full_50)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))



Extra Trees (Regulated) - moisture_50cm + meteorological features
Train R2: 0.9290561255815332
Test R2: 0.9294135392946806
Train RMSE: 0.01235496313703177
Test RMSE: 0.012413248768427984


##FRom 60cm

In [None]:
# ==== 1. Separate features and targets for predicting 30 from 60 ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_30cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_30cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_30_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_30_from_60)
y_train_full_60 = train_base['moisture_30cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_30_from_60)
y_test_full_60 = test_base['moisture_30cm']


In [None]:
# Model using only moisture_60cm
et_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = et_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = et_60cm.predict(X_test_60cm_only)

print("\nExtra Trees - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
et_full_60 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_60.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60 = et_full_60.predict(X_train_full_60)
y_test_pred_full_60 = et_full_60.predict(X_test_full_60)

print("\nExtra Trees - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))


Extra Trees - moisture_60cm only
Train R2: 0.6532266046468012
Test R2: 0.6493361686717392
Train RMSE: 0.02194521233429505
Test RMSE: 0.022004901831087248

Extra Trees - moisture_60cm + meteorological features
Train R2: 0.9998320183128369
Test R2: 0.9391084991557853
Train RMSE: 0.0004830008236472831
Test RMSE: 0.009169640049204652


In [None]:
# Regulated Extra Trees model
et_full_60_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Fit the model
et_full_60_reg.fit(X_train_full_60, y_train_full_60)

# Predictions
y_train_pred_full_60 = et_full_60_reg.predict(X_train_full_60)
y_test_pred_full_60 = et_full_60_reg.predict(X_test_full_60)

# Performance metrics
print("\nExtra Trees (Regulated) - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))


Extra Trees (Regulated) - moisture_60cm + meteorological features
Train R2: 0.8868385203011128
Test R2: 0.8868705570444121
Train RMSE: 0.015603905498709991
Test RMSE: 0.015714934678255743


##Predicting moisture at 40cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_40cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_40cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_40_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_40_from_10)
y_train_full_10 = train_base['moisture_40cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_40_from_10)
y_test_full_10 = test_base['moisture_40cm']

In [None]:
# Model using only moisture_10cm
et_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = et_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = et_10cm.predict(X_test_10cm_only)

print("\nExtra Trees - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
et_full_10 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_10.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10 = et_full_10.predict(X_train_full_10)
y_test_pred_full_10 = et_full_10.predict(X_test_full_10)

print("\nExtra Trees - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))


Extra Trees - moisture_10cm only
Train R2: 0.45318703638430735
Test R2: 0.4488221465751936
Train RMSE: 0.02539793676544404
Test RMSE: 0.02572304071878654

Extra Trees - moisture_10cm + meteorological features
Train R2: 0.9999679821454907
Test R2: 0.932203230284361
Train RMSE: 0.00019434579570324655
Test RMSE: 0.009021545140456212


In [None]:
# Regulated Extra Trees model
et_full_10_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Fit the model
et_full_10_reg.fit(X_train_full_10, y_train_full_10)

# Predict
y_train_pred_full_10 = et_full_10_reg.predict(X_train_full_10)
y_test_pred_full_10 = et_full_10_reg.predict(X_test_full_10)

# Metrics
print("\nExtra Trees (Regulated) - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))


Extra Trees (Regulated) - moisture_10cm + meteorological features
Train R2: 0.7275210208671671
Test R2: 0.7183664480698397
Train RMSE: 0.017928565338388013
Test RMSE: 0.018387327942760538


##from 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_40cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_40cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_40_from_20 = ['moisture_10cm', 'moisture_40cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_40_from_20)
y_train_full_20 = train_base['moisture_40cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_40_from_20)
y_test_full_20 = test_base['moisture_40cm']

In [None]:
# Model using only moisture_20cm
et_20cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = et_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = et_20cm.predict(X_test_20cm_only)

print("\nExtra Trees - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
et_full_20 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_20.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20 = et_full_20.predict(X_train_full_20)
y_test_pred_full_20 = et_full_20.predict(X_test_full_20)

print("\nExtra Trees - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))


Extra Trees - moisture_20cm only
Train R2: 0.6657515991251386
Test R2: 0.6710290181169696
Train RMSE: 0.019857003191626556
Test RMSE: 0.019872624258123847

Extra Trees - moisture_20cm + meteorological features
Train R2: 0.9999845833673809
Test R2: 0.9612681212089871
Train RMSE: 0.0001348570943702969
Test RMSE: 0.006818843259429237


In [None]:
# Regulated Extra Trees model
et_full_20_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Fit the model
et_full_20_reg.fit(X_train_full_20, y_train_full_20)

# Predict
y_train_pred_full_20 = et_full_20_reg.predict(X_train_full_20)
y_test_pred_full_20 = et_full_20_reg.predict(X_test_full_20)

# Metrics
print("\nExtra Trees (Regulated) - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))


Extra Trees (Regulated) - moisture_20cm + meteorological features
Train R2: 0.8360333723068805
Test R2: 0.8341687569036963
Train RMSE: 0.01390774658790287
Test RMSE: 0.014109434371009161


##from 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_40cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_40cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_40_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_40_from_30)
y_train_full_30 = train_base['moisture_40cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_40_from_30)
y_test_full_30 = test_base['moisture_40cm']


In [None]:
# Model using only moisture_30cm
et_30cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = et_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = et_30cm.predict(X_test_30cm_only)

print("\nExtra Trees - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
et_full_30 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_30.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30 = et_full_30.predict(X_train_full_30)
y_test_pred_full_30 = et_full_30.predict(X_test_full_30)

print("\nExtra Trees - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))


Extra Trees - moisture_30cm only
Train R2: 0.9019411320289267
Test R2: 0.9030746867895451
Train RMSE: 0.010755298647848872
Test RMSE: 0.010786859752392783

Extra Trees - moisture_30cm + meteorological features
Train R2: 0.9999776436669012
Test R2: 0.9851362750508259
Train RMSE: 0.0001623974936911563
Test RMSE: 0.00422415898825016


In [None]:
# Regulated Extra Trees model
et_full_30_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Train the model
et_full_30_reg.fit(X_train_full_30, y_train_full_30)

# Predictions
y_train_pred_full_30 = et_full_30_reg.predict(X_train_full_30)
y_test_pred_full_30 = et_full_30_reg.predict(X_test_full_30)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))


Extra Trees (Regulated) - moisture_30cm + meteorological features
Train R2: 0.9565925705976374
Test R2: 0.9542196980577994
Train RMSE: 0.007155847395878144
Test RMSE: 0.007413371293748791


##from 50cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_40cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_40cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_40_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_40_from_50)
y_train_full_50 = train_base['moisture_40cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_40_from_50)
y_test_full_50 = test_base['moisture_40cm']


In [None]:
# Model using only moisture_50cm
et_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = et_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = et_50cm.predict(X_test_50cm_only)

print("\nExtra Trees - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

# Model using moisture_50cm + meteorological features
et_full_50 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_50.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = et_full_50.predict(X_train_full_50)
y_test_pred_full_50 = et_full_50.predict(X_test_full_50)

print("\nExtra Trees - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))



Extra Trees - moisture_50cm only
Train R2: 0.8768940179424155
Test R2: 0.8815573682972084
Train RMSE: 0.012050875925618869
Test RMSE: 0.011924232899350828

Extra Trees - moisture_50cm + meteorological features
Train R2: 0.9999747745187693
Test R2: 0.9769413778566464
Train RMSE: 0.00017250383980189018
Test RMSE: 0.005261300435705153


In [None]:
# Regulated Extra Trees model
et_full_50_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=12
)

# Train the model
et_full_50_reg.fit(X_train_full_50, y_train_full_50)

# Predictions
y_train_pred_full_50 = et_full_50_reg.predict(X_train_full_50)
y_test_pred_full_50 = et_full_50_reg.predict(X_test_full_50)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))


Extra Trees (Regulated) - moisture_50cm + meteorological features
Train R2: 0.9401140545867588
Test R2: 0.9399791282577149
Train RMSE: 0.008405073035643916
Test RMSE: 0.008488433977046104


##from 60cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_40cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_40cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_40_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_40_from_60)
y_train_full_60 = train_base['moisture_40cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_40_from_60)
y_test_full_60 = test_base['moisture_40cm']

In [None]:
# Model using only moisture_60cm
et_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = et_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = et_60cm.predict(X_test_60cm_only)

print("\nExtra Trees - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
et_full_60 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_60.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60 = et_full_60.predict(X_train_full_60)
y_test_pred_full_60 = et_full_60.predict(X_test_full_60)

print("\nExtra Trees - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))



Extra Trees - moisture_60cm only
Train R2: 0.6737222867781687
Test R2: 0.6827974890670199
Train RMSE: 0.019618813622467236
Test RMSE: 0.01951392960144133

Extra Trees - moisture_60cm + meteorological features
Train R2: 0.9999373796535803
Test R2: 0.948744013770667
Train RMSE: 0.0002717920625832644
Test RMSE: 0.007844201167914424


In [None]:
# Regulated Extra Trees model
et_full_60_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,           # More trees for better ensemble performance
    max_depth=12                 # Utilize all CPU cores
)

# Fit the model
et_full_60_reg.fit(X_train_full_60, y_train_full_60)

# Predictions
y_train_pred_full_60 = et_full_60_reg.predict(X_train_full_60)
y_test_pred_full_60 = et_full_60_reg.predict(X_test_full_60)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))


Extra Trees (Regulated) - moisture_60cm + meteorological features
Train R2: 0.8589952578289766
Test R2: 0.8588908015818598
Train RMSE: 0.012897213699809972
Test RMSE: 0.013015297156202345


##Predicting moisture at 50cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_50cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_50cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_50_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_50_from_10)
y_train_full_10 = train_base['moisture_50cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_50_from_10)
y_test_full_10 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_10cm
et_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = et_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = et_10cm.predict(X_test_10cm_only)

print("\nExtra Trees - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

# Model using moisture_10cm + meteorological features
et_full_10 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_10.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10 = et_full_10.predict(X_train_full_10)
y_test_pred_full_10 = et_full_10.predict(X_test_full_10)

print("\nExtra Trees - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))



Extra Trees - moisture_10cm only
Train R2: 0.4611237001811356
Test R2: 0.4537916476768551
Train RMSE: 0.010353805203452153
Test RMSE: 0.010557099138643972

Extra Trees - moisture_10cm + meteorological features
Train R2: 0.9999501655195508
Test R2: 0.9273957907641875
Train RMSE: 9.956814489207793e-05
Test RMSE: 0.0038489859991071174


In [None]:
# Regulated Extra Trees model
et_full_10_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,           # Increased number of trees for stability
    max_depth=12                 # Uses all processors for faster computation
)

# Train the model
et_full_10_reg.fit(X_train_full_10, y_train_full_10)

# Predict on train and test sets
y_train_pred_full_10 = et_full_10_reg.predict(X_train_full_10)
y_test_pred_full_10 = et_full_10_reg.predict(X_test_full_10)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))


Extra Trees (Regulated) - moisture_10cm + meteorological features
Train R2: 0.7117287058404002
Test R2: 0.6992648559058406
Train RMSE: 0.007572794071065157
Test RMSE: 0.007833530216377496


##from 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_50cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_50cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_50_from_20 = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_50_from_20)
y_train_full_20 = train_base['moisture_50cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_50_from_20)
y_test_full_20 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_20cm
et_20cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = et_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = et_20cm.predict(X_test_20cm_only)

print("\nExtra Trees - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

# Model using moisture_20cm + meteorological features
et_full_20 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_20.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20 = et_full_20.predict(X_train_full_20)
y_test_pred_full_20 = et_full_20.predict(X_test_full_20)

print("\nExtra Trees - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))



Extra Trees - moisture_20cm only
Train R2: 0.6770211112566744
Test R2: 0.6788120181220254
Train RMSE: 0.008015719591656154
Test RMSE: 0.008095526053253628

Extra Trees - moisture_20cm + meteorological features
Train R2: 0.9999800508318412
Test R2: 0.9622199283427078
Train RMSE: 6.299670633608555e-05
Test RMSE: 0.0027764945168011904


In [None]:
# Regulated Extra Trees model
et_full_20_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,           # More trees for better stability
    max_depth=12                 # Parallel computation
)

# Train the model
et_full_20_reg.fit(X_train_full_20, y_train_full_20)

# Predictions
y_train_pred_full_20 = et_full_20_reg.predict(X_train_full_20)
y_test_pred_full_20 = et_full_20_reg.predict(X_test_full_20)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))


Extra Trees (Regulated) - moisture_20cm + meteorological features
Train R2: 0.8581007638279685
Test R2: 0.8569410863017279
Train RMSE: 0.0053130692567181816
Test RMSE: 0.005402849783836585


##from 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_50cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_50cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_50_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_50_from_30)
y_train_full_30 = train_base['moisture_50cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_50_from_30)
y_test_full_30 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_30cm
et_30cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = et_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = et_30cm.predict(X_test_30cm_only)

print("\nExtra Trees - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

# Model using moisture_30cm + meteorological features
et_full_30 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_30.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30 = et_full_30.predict(X_train_full_30)
y_test_pred_full_30 = et_full_30.predict(X_test_full_30)

print("\nExtra Trees - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))



Extra Trees - moisture_30cm only
Train R2: 0.8155139636730089
Test R2: 0.8179432513714772
Train RMSE: 0.006058110607012413
Test RMSE: 0.006094931410805464

Extra Trees - moisture_30cm + meteorological features
Train R2: 0.9999640877698039
Test R2: 0.9661583024245571
Train RMSE: 8.452333729958192e-05
Test RMSE: 0.0026277951184614295


In [None]:
# Regulated Extra Trees model
et_full_30_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,           # More trees for stability
    max_depth=12               # Use all cores
)

# Train the model
et_full_30_reg.fit(X_train_full_30, y_train_full_30)

# Predict
y_train_pred_full_30 = et_full_30_reg.predict(X_train_full_30)
y_test_pred_full_30 = et_full_30_reg.predict(X_test_full_30)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))


Extra Trees (Regulated) - moisture_30cm + meteorological features
Train R2: 0.9091028586814016
Test R2: 0.9075857377020661
Train RMSE: 0.0042523652523460895
Test RMSE: 0.004342451054326144


##from 40cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_50cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_50cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_50_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_50_from_40)
y_train_full_40 = train_base['moisture_50cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_50_from_40)
y_test_full_40 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_40cm
et_40cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = et_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = et_40cm.predict(X_test_40cm_only)

print("\nExtra Trees - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

# Model using moisture_40cm + meteorological features
et_full_40 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_40.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = et_full_40.predict(X_train_full_40)
y_test_pred_full_40 = et_full_40.predict(X_test_full_40)

print("\nExtra Trees - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))



Extra Trees - moisture_40cm only
Train R2: 0.8854410762813598
Test R2: 0.8883193607361667
Train RMSE: 0.004773862550311442
Test RMSE: 0.004773691805607433

Extra Trees - moisture_40cm + meteorological features
Train R2: 0.9999926964098823
Test R2: 0.979513873074121
Train RMSE: 3.8117432890455633e-05
Test RMSE: 0.002044538820304147


In [None]:
# Regulated Extra Trees model for 40cm depth
et_full_40_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,           # More trees for better generalization
    max_depth=12                   # Parallel computation
)

# Fit the model
et_full_40_reg.fit(X_train_full_40, y_train_full_40)

# Predict on train and test data
y_train_pred_full_40 = et_full_40_reg.predict(X_train_full_40)
y_test_pred_full_40 = et_full_40_reg.predict(X_test_full_40)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))


Extra Trees (Regulated) - moisture_40cm + meteorological features
Train R2: 0.9417231689423226
Test R2: 0.9414677803343352
Train RMSE: 0.0034048925575113883
Test RMSE: 0.003455914014337709


##from 60cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_60cm
X_train_60cm_only = train_base[['moisture_60cm']]
y_train_60cm_only = train_base['moisture_50cm']
X_test_60cm_only = test_base[['moisture_60cm']]
y_test_60cm_only = test_base['moisture_50cm']

# Full model (moisture_60cm + meteorological features)
features_to_exclude_50_from_60 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'timestamp_device']
X_train_full_60 = train_base.drop(columns=features_to_exclude_50_from_60)
y_train_full_60 = train_base['moisture_50cm']
X_test_full_60 = test_base.drop(columns=features_to_exclude_50_from_60)
y_test_full_60 = test_base['moisture_50cm']

In [None]:
# Model using only moisture_60cm
et_60cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_60cm.fit(X_train_60cm_only, y_train_60cm_only)
y_train_pred_60cm = et_60cm.predict(X_train_60cm_only)
y_test_pred_60cm = et_60cm.predict(X_test_60cm_only)

print("\nExtra Trees - moisture_60cm only")
print("Train R2:", r2_score(y_train_60cm_only, y_train_pred_60cm))
print("Test R2:", r2_score(y_test_60cm_only, y_test_pred_60cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_60cm_only, y_train_pred_60cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_60cm_only, y_test_pred_60cm)))

# Model using moisture_60cm + meteorological features
et_full_60 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_60.fit(X_train_full_60, y_train_full_60)
y_train_pred_full_60 = et_full_60.predict(X_train_full_60)
y_test_pred_full_60 = et_full_60.predict(X_test_full_60)

print("\nExtra Trees - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))



Extra Trees - moisture_60cm only
Train R2: 0.8941742133316004
Test R2: 0.89795040860385
Train RMSE: 0.004588293608494997
Test RMSE: 0.00456321641979064

Extra Trees - moisture_60cm + meteorological features
Train R2: 0.9999689894455959
Test R2: 0.985061626074424
Train RMSE: 7.85434933802659e-05
Test RMSE: 0.001745890872760278


In [None]:
# Regulated Extra Trees model for 60cm depth
et_full_60_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # More trees improve stability
    max_depth=12                  # Use all processors
)

# Fit the model
et_full_60_reg.fit(X_train_full_60, y_train_full_60)

# Predictions
y_train_pred_full_60 = et_full_60_reg.predict(X_train_full_60)
y_test_pred_full_60 = et_full_60_reg.predict(X_test_full_60)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_60cm + meteorological features")
print("Train R2:", r2_score(y_train_full_60, y_train_pred_full_60))
print("Test R2:", r2_score(y_test_full_60, y_test_pred_full_60))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_60, y_train_pred_full_60)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_60, y_test_pred_full_60)))



Extra Trees (Regulated) - moisture_60cm + meteorological features
Train R2: 0.9592159886110925
Test R2: 0.9594977668999797
Train RMSE: 0.0028483963323630884
Test RMSE: 0.0028747819930921566


##Predicting moisture at 60cm depth

##from 10cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_10cm
X_train_10cm_only = train_base[['moisture_10cm']]
y_train_10cm_only = train_base['moisture_60cm']
X_test_10cm_only = test_base[['moisture_10cm']]
y_test_10cm_only = test_base['moisture_60cm']

# Full model (moisture_10cm + meteorological features)
features_to_exclude_60_from_10 = ['moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_10 = train_base.drop(columns=features_to_exclude_60_from_10)
y_train_full_10 = train_base['moisture_60cm']
X_test_full_10 = test_base.drop(columns=features_to_exclude_60_from_10)
y_test_full_10 = test_base['moisture_60cm']

In [None]:
et_10cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_10cm.fit(X_train_10cm_only, y_train_10cm_only)
y_train_pred_10cm = et_10cm.predict(X_train_10cm_only)
y_test_pred_10cm = et_10cm.predict(X_test_10cm_only)

print("\nExtra Trees - moisture_10cm only")
print("Train R2:", r2_score(y_train_10cm_only, y_train_pred_10cm))
print("Test R2:", r2_score(y_test_10cm_only, y_test_pred_10cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_10cm_only, y_train_pred_10cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_10cm_only, y_test_pred_10cm)))

et_full_10 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_10.fit(X_train_full_10, y_train_full_10)
y_train_pred_full_10 = et_full_10.predict(X_train_full_10)
y_test_pred_full_10 = et_full_10.predict(X_test_full_10)

print("\nExtra Trees - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))



Extra Trees - moisture_10cm only
Train R2: 0.4043409099108375
Test R2: 0.3932352317244199
Train RMSE: 0.009402349656072052
Test RMSE: 0.009626086404722494

Extra Trees - moisture_10cm + meteorological features
Train R2: 0.9999269586726164
Test R2: 0.9129406364657291
Train RMSE: 0.00010411704637697742
Test RMSE: 0.0036462573851354977


In [None]:
# Regulated Extra Trees model for 10cm depth
et_full_10_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # Increased number of trees for better performance
    max_depth=12               # Utilize all CPU cores
)

# Fit the model
et_full_10_reg.fit(X_train_full_10, y_train_full_10)

# Predict
y_train_pred_full_10 = et_full_10_reg.predict(X_train_full_10)
y_test_pred_full_10 = et_full_10_reg.predict(X_test_full_10)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_10cm + meteorological features")
print("Train R2:", r2_score(y_train_full_10, y_train_pred_full_10))
print("Test R2:", r2_score(y_test_full_10, y_test_pred_full_10))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_10, y_train_pred_full_10)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_10, y_test_pred_full_10)))


Extra Trees (Regulated) - moisture_10cm + meteorological features
Train R2: 0.6202640694482378
Test R2: 0.6056546454209968
Train RMSE: 0.0075072064091378735
Test RMSE: 0.00776028984609143


##from 20cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_20cm
X_train_20cm_only = train_base[['moisture_20cm']]
y_train_20cm_only = train_base['moisture_60cm']
X_test_20cm_only = test_base[['moisture_20cm']]
y_test_20cm_only = test_base['moisture_60cm']

# Full model (moisture_20cm + meteorological features)
features_to_exclude_60_from_20 = ['moisture_10cm', 'moisture_30cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_20 = train_base.drop(columns=features_to_exclude_60_from_20)
y_train_full_20 = train_base['moisture_60cm']
X_test_full_20 = test_base.drop(columns=features_to_exclude_60_from_20)
y_test_full_20 = test_base['moisture_60cm']


In [None]:
et_20cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_20cm.fit(X_train_20cm_only, y_train_20cm_only)
y_train_pred_20cm = et_20cm.predict(X_train_20cm_only)
y_test_pred_20cm = et_20cm.predict(X_test_20cm_only)

print("\nExtra Trees - moisture_20cm only")
print("Train R2:", r2_score(y_train_20cm_only, y_train_pred_20cm))
print("Test R2:", r2_score(y_test_20cm_only, y_test_pred_20cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_20cm_only, y_train_pred_20cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_20cm_only, y_test_pred_20cm)))

et_full_20 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_20.fit(X_train_full_20, y_train_full_20)
y_train_pred_full_20 = et_full_20.predict(X_train_full_20)
y_test_pred_full_20 = et_full_20.predict(X_test_full_20)

print("\nExtra Trees - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))



Extra Trees - moisture_20cm only
Train R2: 0.5919017555386243
Test R2: 0.5937396621267004
Train RMSE: 0.0077825133469408255
Test RMSE: 0.007876654399150463

Extra Trees - moisture_20cm + meteorological features
Train R2: 0.9999814690834212
Test R2: 0.9454485225392841
Train RMSE: 5.2442810881208016e-05
Test RMSE: 0.0028863090862228003


In [None]:
# Regulated Extra Trees model for 20cm depth
et_full_20_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # More trees for stability
    max_depth=12                 # Use all processors
)

# Train the model
et_full_20_reg.fit(X_train_full_20, y_train_full_20)

# Predictions
y_train_pred_full_20 = et_full_20_reg.predict(X_train_full_20)
y_test_pred_full_20 = et_full_20_reg.predict(X_test_full_20)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_20cm + meteorological features")
print("Train R2:", r2_score(y_train_full_20, y_train_pred_full_20))
print("Test R2:", r2_score(y_test_full_20, y_test_pred_full_20))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_20, y_train_pred_full_20)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_20, y_test_pred_full_20)))


Extra Trees (Regulated) - moisture_20cm + meteorological features
Train R2: 0.7987563532383684
Test R2: 0.7980462318706147
Train RMSE: 0.005465106130355739
Test RMSE: 0.005553484425440124


##from 30cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_30cm
X_train_30cm_only = train_base[['moisture_30cm']]
y_train_30cm_only = train_base['moisture_60cm']
X_test_30cm_only = test_base[['moisture_30cm']]
y_test_30cm_only = test_base['moisture_60cm']

# Full model (moisture_30cm + meteorological features)
features_to_exclude_60_from_30 = ['moisture_10cm', 'moisture_20cm', 'moisture_40cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_30 = train_base.drop(columns=features_to_exclude_60_from_30)
y_train_full_30 = train_base['moisture_60cm']
X_test_full_30 = test_base.drop(columns=features_to_exclude_60_from_30)
y_test_full_30 = test_base['moisture_60cm']


In [None]:
et_30cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_30cm.fit(X_train_30cm_only, y_train_30cm_only)
y_train_pred_30cm = et_30cm.predict(X_train_30cm_only)
y_test_pred_30cm = et_30cm.predict(X_test_30cm_only)

print("\nExtra Trees - moisture_30cm only")
print("Train R2:", r2_score(y_train_30cm_only, y_train_pred_30cm))
print("Test R2:", r2_score(y_test_30cm_only, y_test_pred_30cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_30cm_only, y_train_pred_30cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_30cm_only, y_test_pred_30cm)))

et_full_30 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_30.fit(X_train_full_30, y_train_full_30)
y_train_pred_full_30 = et_full_30.predict(X_train_full_30)
y_test_pred_full_30 = et_full_30.predict(X_test_full_30)

print("\nExtra Trees - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))


Extra Trees - moisture_30cm only
Train R2: 0.6260743551267063
Test R2: 0.6310772169188034
Train RMSE: 0.007449551666356206
Test RMSE: 0.00750597849736756

Extra Trees - moisture_30cm + meteorological features
Train R2: 0.9999385627449441
Test R2: 0.937141405448454
Train RMSE: 9.548901483771627e-05
Test RMSE: 0.003098288899653602


In [None]:
# Regulated Extra Trees model for 30cm depth
et_full_30_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # Increase trees for stability
    max_depth=12                 # Use all CPU cores
)

# Train the model
et_full_30_reg.fit(X_train_full_30, y_train_full_30)

# Predict
y_train_pred_full_30 = et_full_30_reg.predict(X_train_full_30)
y_test_pred_full_30 = et_full_30_reg.predict(X_test_full_30)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_30cm + meteorological features")
print("Train R2:", r2_score(y_train_full_30, y_train_pred_full_30))
print("Test R2:", r2_score(y_test_full_30, y_test_pred_full_30))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_30, y_train_pred_full_30)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_30, y_test_pred_full_30)))


Extra Trees (Regulated) - moisture_30cm + meteorological features
Train R2: 0.820586478579324
Test R2: 0.8164389888720531
Train RMSE: 0.005160182909817638
Test RMSE: 0.005294559076183959


##from 40cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_40cm
X_train_40cm_only = train_base[['moisture_40cm']]
y_train_40cm_only = train_base['moisture_60cm']
X_test_40cm_only = test_base[['moisture_40cm']]
y_test_40cm_only = test_base['moisture_60cm']

# Full model (moisture_40cm + meteorological features)
features_to_exclude_60_from_40 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_50cm', 'moisture_60cm', 'timestamp_device']
X_train_full_40 = train_base.drop(columns=features_to_exclude_60_from_40)
y_train_full_40 = train_base['moisture_60cm']
X_test_full_40 = test_base.drop(columns=features_to_exclude_60_from_40)
y_test_full_40 = test_base['moisture_60cm']

In [None]:
et_40cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_40cm.fit(X_train_40cm_only, y_train_40cm_only)
y_train_pred_40cm = et_40cm.predict(X_train_40cm_only)
y_test_pred_40cm = et_40cm.predict(X_test_40cm_only)

print("\nExtra Trees - moisture_40cm only")
print("Train R2:", r2_score(y_train_40cm_only, y_train_pred_40cm))
print("Test R2:", r2_score(y_test_40cm_only, y_test_pred_40cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_40cm_only, y_train_pred_40cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_40cm_only, y_test_pred_40cm)))

et_full_40 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_40.fit(X_train_full_40, y_train_full_40)
y_train_pred_full_40 = et_full_40.predict(X_train_full_40)
y_test_pred_full_40 = et_full_40.predict(X_test_full_40)

print("\nExtra Trees - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))



Extra Trees - moisture_40cm only
Train R2: 0.6783700637470922
Test R2: 0.6852274875261417
Train RMSE: 0.006909008758230661
Test RMSE: 0.006933267844810687

Extra Trees - moisture_40cm + meteorological features
Train R2: 0.9999690256207164
Test R2: 0.9494178622826452
Train RMSE: 6.78014242596474e-05
Test RMSE: 0.002779317536505113


In [None]:
# Regulated Extra Trees model for 40cm depth
et_full_40_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # More trees for better averaging
    max_depth=12                 # Use all CPU cores
)

# Train the model
et_full_40_reg.fit(X_train_full_40, y_train_full_40)

# Predict
y_train_pred_full_40 = et_full_40_reg.predict(X_train_full_40)
y_test_pred_full_40 = et_full_40_reg.predict(X_test_full_40)

# Evaluate
print("\nExtra Trees (Regulated) - moisture_40cm + meteorological features")
print("Train R2:", r2_score(y_train_full_40, y_train_pred_full_40))
print("Test R2:", r2_score(y_test_full_40, y_test_pred_full_40))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_40, y_train_pred_full_40)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_40, y_test_pred_full_40)))


Extra Trees (Regulated) - moisture_40cm + meteorological features
Train R2: 0.8391168418621644
Test R2: 0.838742710394037
Train RMSE: 0.004886442608538017
Test RMSE: 0.004962485557289526


##from 50cm

In [None]:
# ==== 1. Separate features and targets ====

# Model using only moisture_50cm
X_train_50cm_only = train_base[['moisture_50cm']]
y_train_50cm_only = train_base['moisture_60cm']
X_test_50cm_only = test_base[['moisture_50cm']]
y_test_50cm_only = test_base['moisture_60cm']

# Full model (moisture_50cm + meteorological features)
features_to_exclude_60_from_50 = ['moisture_10cm', 'moisture_20cm', 'moisture_30cm', 'moisture_40cm', 'moisture_60cm', 'timestamp_device']
X_train_full_50 = train_base.drop(columns=features_to_exclude_60_from_50)
y_train_full_50 = train_base['moisture_60cm']
X_test_full_50 = test_base.drop(columns=features_to_exclude_60_from_50)
y_test_full_50 = test_base['moisture_60cm']

In [None]:
et_50cm = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_50cm.fit(X_train_50cm_only, y_train_50cm_only)
y_train_pred_50cm = et_50cm.predict(X_train_50cm_only)
y_test_pred_50cm = et_50cm.predict(X_test_50cm_only)

print("\nExtra Trees - moisture_50cm only")
print("Train R2:", r2_score(y_train_50cm_only, y_train_pred_50cm))
print("Test R2:", r2_score(y_test_50cm_only, y_test_pred_50cm))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_50cm_only, y_train_pred_50cm)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_50cm_only, y_test_pred_50cm)))

et_full_50 = ExtraTreesRegressor(random_state=42, n_estimators=100)
et_full_50.fit(X_train_full_50, y_train_full_50)
y_train_pred_full_50 = et_full_50.predict(X_train_full_50)
y_test_pred_full_50 = et_full_50.predict(X_test_full_50)

print("\nExtra Trees - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))



Extra Trees - moisture_50cm only
Train R2: 0.8992701646493689
Test R2: 0.9029222967297936
Train RMSE: 0.0038664871656465816
Test RMSE: 0.003850342156290689

Extra Trees - moisture_50cm + meteorological features
Train R2: 0.9999859002402116
Test R2: 0.9846378433938613
Train RMSE: 4.574497217926834e-05
Test RMSE: 0.0015316716425970726


In [None]:
# Regulated Extra Trees model for 50cm depth
et_full_50_reg = ExtraTreesRegressor(
    random_state=42,
    n_estimators=150,          # More trees for robustness
    max_depth=12                  # Parallelize across all CPUs
)

# Train the model
et_full_50_reg.fit(X_train_full_50, y_train_full_50)

# Predictions
y_train_pred_full_50 = et_full_50_reg.predict(X_train_full_50)
y_test_pred_full_50 = et_full_50_reg.predict(X_test_full_50)

# Evaluation
print("\nExtra Trees (Regulated) - moisture_50cm + meteorological features")
print("Train R2:", r2_score(y_train_full_50, y_train_pred_full_50))
print("Test R2:", r2_score(y_test_full_50, y_test_pred_full_50))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train_full_50, y_train_pred_full_50)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test_full_50, y_test_pred_full_50)))


Extra Trees (Regulated) - moisture_50cm + meteorological features
Train R2: 0.9560817263971644
Test R2: 0.9560911539291292
Train RMSE: 0.0025530548238744933
Test RMSE: 0.0025894980589309
