In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import joblib

# Load dataset
df = pd.read_csv("pond_data_interpolated_with_indices (1).csv")

# Convert Date column
df['Date of Data Collection'] = pd.to_datetime(df['Date of Data Collection'], errors='coerce')

# Add useful date features
df['Day'] = df['Date of Data Collection'].dt.day
df['Month'] = df['Date of Data Collection'].dt.month
df['DOY'] = df['Date of Data Collection'].dt.dayofyear  # seasonal pattern

# Features (bands + indices + location + date features)
feature_cols = ['B2','B3','B4','B5','B6','B7','B8','B8A','B9','B11','B12',
                'NDVI','NDWI','MNDWI','NDCI',
                'Latitude','Longitude']

X = df[feature_cols]

# Targets (water quality parameters)
Y = df[['Dissolved Oxygen (mg/L)','Ammonia (mg/L)','pH']]

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Random Forest as base model
rf = RandomForestRegressor(n_estimators=300, random_state=42)

# Multi-output wrapper for predicting 3 parameters at once
model = MultiOutputRegressor(rf)
model.fit(X_train, Y_train)

# Predict
Y_pred = model.predict(X_test)

# Evaluate
r2 = r2_score(Y_test, Y_pred, multioutput='raw_values')
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred))  # works in all sklearn versions
mae = mean_absolute_error(Y_test, Y_pred)

print("R² scores (DO, Ammonia, pH):", r2)
print("RMSE:", rmse)
print("MAE:", mae)

# Save model
joblib.dump(model, "rf_water_quality_model.pkl")
print("✅ Random Forest model trained and saved as rf_water_quality_model.pkl")

R² scores (DO, Ammonia, pH): [-0.73952811 -3.65262606  0.20842171]
RMSE: 3.2029100520986384
MAE: 1.6313742333524932
✅ Random Forest model trained and saved as rf_water_quality_model.pkl


In [6]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4098.6994, 3896.2292, 4143.4164, 4194.8789, 4123.6345, 4057.5229, 4111.7531, 3945.4866, 2564.0069, 1899.2133, 1693.0121,
              -0.004, -0.027, 0.345, 0.006,   # NDVI, NDWI, MNDWI, NDCI
             16.66561, 81.1385]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("RRO-1",1)

Predicted DO, Ammonia, pH: [[6.33498239 0.18779401 8.26545222]]
RRO-1 1




In [7]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[3637.3840, 3586.4735, 3648.3408, 3599.1971, 3518.4111, 3419.9949, 3386.1331, 3286.6409, 1851.9159, 1484.9786, 1305.5754,
              -0.037, 0.029, 0.414, -0.007,   # NDVI, NDWI, MNDWI, NDCI
             16.66566, 81.14022]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("BSK-2",2)

Predicted DO, Ammonia, pH: [[6.15460756 0.17044846 8.22989651]]
BSK-2 2




In [8]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[5214.2832, 4666.8344, 5006.3304, 5023.3770, 4959.6395, 4912.6841, 5157.7651, 4782.6407, 3296.3891, 1831.0283, 1683.0752,
              0.015,-0.050,0.436,0.002,   # NDVI, NDWI, MNDWI, NDCI
             16.65471, 81.15165]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("PRA-1",3)

Predicted DO, Ammonia, pH: [[7.02526255 0.21207636 8.25519422]]
PRA-1 3




In [9]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[3387.7761, 3650.7578, 4144.3326, 4339.2387, 4320.4035, 4330.1200, 4375.8919, 4265.7768, 3494.4766, 3172.3723, 2833.8446,
              0.027, -0.090, 0.070, 0.023,
             16.65365, 81.1481]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("SRV-1",4)

Predicted DO, Ammonia, pH: [[6.53864717 0.1974223  8.21868543]]
SRV-1 4




In [10]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4468.9337, 4015.7645, 4323.8741, 4394.1953, 4326.4799, 4262.4145, 4392.4848, 4114.9372, 2910.8076, 1482.2896, 1359.9206,
              0.008, -0.045, 0.461, 0.008,
             16.65121, 81.15147]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("GOW-2",5)

Predicted DO, Ammonia, pH: [[6.7252135  0.17077949 8.25128611]]
GOW-2 5




In [11]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[3603.7269, 3400.7966, 3656.5136, 3717.3296, 3654.9173, 3600.7114, 3720.2049, 3489.7323, 2736.6864, 1466.2095, 1351.1964,
             0.009, -0.045, 0.397, 0.008,
             16.62862, 81.14603]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NRO-1",6)

Predicted DO, Ammonia, pH: [[5.99296952 0.18947199 8.27820421]]
NRO-1 6




In [12]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[5792.0163, 5077.9633, 5440.4658, 5527.3326, 5330.8078, 5109.5509, 5253.9416, 4843.8942, 3637.8371, 1136.6864, 1091.2091,
             -0.017, -0.017, 0.634, 0.008,
             16.62167, 81.13806]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("KVR-3",7)

Predicted DO, Ammonia, pH: [[5.21551537 0.13572776 8.34787079]]
KVR-3 7




In [None]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[5792.0163, 5077.9633, 5440.4658, 5527.3326, 5330.8078, 5109.5509, 5253.9416, 4843.8942, 3637.8371, 1136.6864, 1091.2091,
             -0.017, -0.017, 0.634, 0.008,
             16.62167, 81.13806]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("MUR-1",8)

In [13]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[5708.2642, 4956.5670, 5352.3884, 5416.2271, 5301.3591, 5188.0754, 5413.0737, 4981.8178, 3342.1074, 1386.3713, 1311.6437,
             0.006, -0.044, 0.563, 0.006,
             16.61484, 81.13797]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NSR-1",9)

Predicted DO, Ammonia, pH: [[5.79450579 0.21024251 8.32712937]]
NSR-1 9




In [14]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4315.0660, 3914.1667, 4267.8925, 4285.0324, 4245.2866, 4214.0268, 4394.1985, 4107.3024, 2673.8723, 1440.3527, 1332.6376,
             0.015, -0.058, 0.462, 0.002,
             16.61598, 81.14353]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NSR-2",10)

Predicted DO, Ammonia, pH: [[5.80914289 0.1929532  8.3381223 ]]
NSR-2 10




In [15]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4575.8495, 4071.5544, 4398.6214, 4399.3641, 4354.9571, 4309.5053, 4492.9840, 4191.6449, 2654.2657, 1242.9070, 1156.7745,
             0.011, -0.049, 0.532, 0.000,
             16.60964, 81.14461]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NSR-3",11)

Predicted DO, Ammonia, pH: [[5.61838161 0.22189237 8.31584286]]
NSR-3 11




In [16]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[5214.2832, 4666.8344, 5006.3304, 5023.3770, 4959.6395, 4912.6841, 5157.7651, 4782.6407, 3296.3891, 1831.0283, 1683.0752,
              0.015, -0.050, 0.436,0.002,
             16.65412, 81.15436]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NRR-1",12)

Predicted DO, Ammonia, pH: [[7.00709588 0.21207636 8.25504978]]
NRR-1 12




In [17]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[2674.0702, 2979.4659, 3349.5621, 3580.2002, 3540.4587, 3555.9253, 3538.7520, 3487.8818, 3038.6966, 2906.3157, 2596.1822,
              0.027, -0.086, 0.012, 0.033,
             16.64802, 81.14733]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NRR-2",13)

Predicted DO, Ammonia, pH: [[6.3074982  0.19872626 8.23489767]]
NRR-2 13




In [18]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[2674.0702, 2979.4659, 3349.5621, 3580.2002, 3540.4587, 3555.9253, 3538.7520, 3487.8818, 3038.6966, 2906.3157, 2596.1822,
              0.027, -0.086, 0.012, 0.033,
             16.64802, 81.14733]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("NRR-2",13)

Predicted DO, Ammonia, pH: [[6.3074982  0.19872626 8.23489767]]
NRR-2 13




In [19]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[3831.1187, 3692.6655, 3975.7554, 3980.6104, 3938.6512, 3886.0141, 4011.7685, 3783.4054, 2436.6249, 1729.2840, 1555.6499,
              0.005, -0.041, 0.362, 0.001,
             16.6515, 81.13997]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("PNR-1",14)

Predicted DO, Ammonia, pH: [[6.83244845 0.19524461 8.22782722]]
PNR-1 14




In [20]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4663.9357, 3945.0903, 4227.4903, 4325.4136, 4230.2954, 4230.0875, 4423.7850, 4105.6287, 2800.4324, 1825.0028, 1670.1542,
              0.023, -0.057, 0.367, 0.011,
             16.644157, 81.119572]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("SRI-1",15)

Predicted DO, Ammonia, pH: [[7.09127192 0.16232152 8.31463302]]
SRI-1 15




In [21]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4659.8304, 3949.7018, 4235.8154, 4332.7328, 4239.6208, 4238.9801, 4432.2821, 4115.2075, 2794.8803, 1826.6917, 1671.4635,
              0.023, -0.058, 0.368, 0.011,
             16.64767, 81.11736]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("SRI-2",16)

Predicted DO, Ammonia, pH: [[7.40507683 0.1460283  8.31161079]]
SRI-2 16




In [22]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[3510.6668, 3470.3968, 3735.7831, 3797.3173, 3745.5880, 3703.1392, 3747.9541, 3614.3919, 2507.1484, 2042.8272, 1820.6283,
              0.002, -0.038, 0.259, 0.008,
             16.66541, 81.13716]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("VVR-1",17)

Predicted DO, Ammonia, pH: [[6.46307001 0.19194231 8.24072667]]
VVR-1 17




In [23]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[1868.3120, 2621.3881, 3190.4114, 3488.9784, 3514.2213, 3596.1666, 3522.3620, 3605.9387, 3611.1335, 4313.3905, 3826.4941,
              0.049, -0.147, -0.244, 0.045,
             16.672, 81.1734]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("AKR-1",18)

Predicted DO, Ammonia, pH: [[4.63979673 0.1356376  8.15996516]]
AKR-1 18




In [24]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[993.2923, 889.5428, 886.0716, 876.9367, 853.4986, 877.6728, 860.1958, 862.6825, 674.4420, 850.8895, 756.8715,
              -0.015, 0.017, 0.022, -0.005,
             16.671, 81.17259]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("AKR-2",19)

Predicted DO, Ammonia, pH: [[4.91521667 0.24547025 8.23466706]]
AKR-2 19




In [25]:
# Load trained model
model = joblib.load("rf_water_quality_model.pkl")

# Example new input row (replace with real values for a given date/pond)
new_data = [[4849.6874, 4632.2757, 4903.1041, 4871.4120, 4784.1784, 4705.9642, 4911.7397, 4559.0781, 3207.2023, 1668.7977, 1517.2955,
               0.001, -0.029, 0.470, -0.003,
            16.67107, 81.16655]]                # Lat, Lo]]               # Day=15, Month=7, DOY=196

# Predict DO, Ammonia, pH
prediction = model.predict(new_data)
print("Predicted DO, Ammonia, pH:", prediction)
print("SRM-1",20)

Predicted DO, Ammonia, pH: [[6.65040401 0.19398284 8.2629111 ]]
SRM-1 20


