# **DATA PREPROCESSING**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

KeyError: "Registering two gradient with name 'ReduceDataset'! (Previous registration was in register /usr/local/lib/python3.10/dist-packages/tensorflow/python/framework/registry.py:65)"

# **TRAINING THE MODELS**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_combined)

# Train-test split for full dataset
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X_scaled, y_combined, test_size=0.2, random_state=1)

# Define the ANN model function
def create_model():
    model = Sequential()
    model.add(Dense(64, input_dim=X_train_full.shape[1], activation='relu'))
    model.add(Dense(64, activation='sigmoid'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

# Create the model
model = create_model()

# Train the model
model.fit(X_train_full, y_train_full, epochs=100, batch_size=20, verbose=1)

# Evaluate on test set
test_predictions_full = model.predict(X_test_full).flatten()
test_mse_full = mean_squared_error(y_test_full, test_predictions_full)
test_rmse_full = np.sqrt(test_mse_full)
test_mae_full = mean_absolute_error(y_test_full, test_predictions_full)
test_r2_full = r2_score(y_test_full, test_predictions_full)
test_mape_full = np.mean(np.abs((y_test_full - test_predictions_full) / y_test_full)) * 100

print('Test Results on Full Dataset:')
print('MSE:', test_mse_full)
print('RMSE:', test_rmse_full)
print('MAE:', test_mae_full)
print('R^2:', test_r2_full)
print('MAPE:', test_mape_full, '%')

# Save the model
model.save('ANN_model.h5')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 2.9091
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.6839 
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.3351
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.1957 
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.0125 
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.0086 
Epoch 7/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.9709 
Epoch 8/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6957 
Epoch 9/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8994 
Epoch 10/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss:



Test Results on Full Dataset:
MSE: 0.3200016982884289
RMSE: 0.565686926036327
MAE: 0.4368241270306784
R^2: 0.8414652132824783
MAPE: 39.6147447786252 %


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam, Adamax, Nadam, Ftrl

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Split the combined dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the MLP model
model = Sequential()
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dense(64, activation='tanh'))
model.add(Dense(64, activation='tanh'))
model.add(Dense(1, activation='linear'))

# Compile the model using mean squared error loss
model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.01, epsilon= 1e-8), metrics=['mean_absolute_error'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=500, batch_size=50, validation_split=0.2)

# Evaluate the model
loss, mae = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss (MSE): {loss}")

# Predict on test set
y_pred = model.predict(X_test_scaled)

# Calculate additional metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred.flatten()) / y_test)) * 100

# Print metrics
print(f"MSE : {mse}")
print(f"RMSE: {rmse}")
print(f"MAE : {mae}")
print(f"R2  : {r2}")
print(f"MAPE: {mape}%")

model.save('no_opt_mlp_model.h5')
print("Model saved to trained_mlp_model(ADAM).h5")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - loss: 4.1601 - mean_absolute_error: 1.6247 - val_loss: 0.6691 - val_mean_absolute_error: 0.6336
Epoch 2/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.0282 - mean_absolute_error: 0.7886 - val_loss: 0.7463 - val_mean_absolute_error: 0.6629
Epoch 3/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.7436 - mean_absolute_error: 0.6312 - val_loss: 0.6881 - val_mean_absolute_error: 0.6370
Epoch 4/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.5341 - mean_absolute_error: 0.5187 - val_loss: 0.6852 - val_mean_absolute_error: 0.6462
Epoch 5/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.4661 - mean_absolute_error: 0.5132 - val_loss: 0.5158 - val_mean_absolute_error: 0.5417
Epoch 6/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss



[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 65ms/step



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




MSE : 0.4318443394201852
RMSE: 0.6571486433221826
MAE : 0.44109487533569336
R2  : 0.8282191412144658
MAPE: 32.94821248805299%
Model saved to trained_mlp_model(ADAM).h5


In [None]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-24.7.0-py3-none-any.whl.metadata (11 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-24.7.0-py3-none-any.whl (24 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-24.7.0 scikit-optimize-0.10.2


In [None]:
pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Collecting scikit-learn>=1.4.2 (from scikeras)
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn, scikeras
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.3.2
    Uninstalling scikit-learn-1.3.2:
      Successfully uninstalled scikit-learn-1.3.2
Successfully installed scikeras-0.13.0 scikit-learn-1.5.1


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pickle

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=1)

# Define the hyperparameters
n_estimators = 100
max_depth = None
min_samples_split = 2
min_samples_leaf = 1

# Train the Random Forest model
rf = RandomForestRegressor(
    n_estimators=n_estimators,
    max_depth=max_depth,
    min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf,
    random_state=42
)

rf.fit(X_train, y_train)

# Evaluate the model
predictions = rf.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print('Test MSE:', mse)
print('Test RMSE:', rmse)
print('Test MAE:', mae)
print('Test R^2:', r2)
print('Test MAPE:', mape)

# Save the trained model as a .pkl file
with open('random_forest_model.pkl', 'wb') as model_file:
    pickle.dump(rf, model_file)


Test MSE: 0.28489952243589745
Test RMSE: 0.5337597984448599
Test MAE: 0.3651115384615385
Test R^2: 0.8588554833712518
Test MAPE: 42.38896439425764


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pickle

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=2)

# Define the hyperparameters
C = 1
epsilon = 0.5
kernel = 'linear'

# Train the SVR model
svr = SVR(
    C=C,
    epsilon=epsilon,
    kernel=kernel,
    tol=0.01,
    shrinking=False
)

svr.fit(X_train, y_train)

# Evaluate the model
predictions = svr.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print('Test MSE:', mse)
print('Test RMSE:', rmse)
print('Test MAE:', mae)
print('Test R^2:', r2)
print('Test MAPE:', mape)

# Save the trained model as a .pkl file
with open('svr_model.pkl', 'wb') as model_file:
    pickle.dump(svr, model_file)


Test MSE: 0.9711557115505729
Test RMSE: 0.9854723291653464
Test MAE: 0.7463335425409465
Test R^2: 0.4338875489577164
Test MAPE: 68.80460193060578


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pickle

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

best_mse = float('inf')
best_model = None
best_seed = None

# Iterate over a range of random seeds
for seed in range(5000):
    print(seed)
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=seed)

    # Train the Linear Regression model
    linear_reg = LinearRegression()
    linear_reg.fit(X_train, y_train)

    # Evaluate the model
    predictions = linear_reg.predict(X_test)
    mse = mean_squared_error(y_test, predictions)

    # Check if this model is better than the previous best
    if mse < best_mse:
        best_mse = mse
        best_model = linear_reg
        best_seed = seed

# Evaluate the best model with all metrics
best_predictions = best_model.predict(X_test)
best_rmse = np.sqrt(best_mse)
best_mae = mean_absolute_error(y_test, best_predictions)
best_r2 = r2_score(y_test, best_predictions)
best_mape = np.mean(np.abs((y_test - best_predictions) / y_test)) * 100

print('Best Seed:', best_seed)
print('Best Test MSE:', best_mse)
print('Best Test RMSE:', best_rmse)
print('Best Test MAE:', best_mae)
print('Best Test R^2:', best_r2)
print('Best Test MAPE:', best_mape)

# Save the best model as a .pkl file
with open('best_linear_regression_model.pkl', 'wb') as model_file:
    pickle.dump(best_model, model_file)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
2

# **TESTING MODELS**

In [None]:
# import pandas as pd
# import joblib

# # Function to load the model and predict BOD from a CSV file
# def predict_bod_from_csv(input_csv):
#     # Load the trained model
#     model = joblib.load('/content/svr_model.pkl')  # Use the path to your saved model

#     # Define the feature columns
#     feature_columns = [
#         'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
#         'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
#         'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
#         'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
#     ]

#     # Read the input data from CSV
#     input_df = pd.read_csv(input_csv)

#     # Ensure the input data has all necessary columns
#     missing_cols = set(feature_columns) - set(input_df.columns)
#     if missing_cols:
#         raise ValueError(f"Missing columns in input data: {missing_cols}")

#     # Predict BOD
#     predictions = model.predict(input_df[feature_columns])

#     # Print only the predictions
#     for prediction in predictions:
#         print(prediction)

# # Example usage
# input_csv = 'Book1.csv'  # Replace with the path to your CSV file
# predict_bod_from_csv(input_csv)


2.718974244498855


In [None]:
# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import load_model

# # Load the saved model
# model = load_model('/content/no_opt_mlp_model.h5')

# # Load new input data from a CSV file
# new_input_data = pd.read_csv('Book1.csv')

# # Define the feature columns (should be the same as the ones used in training)
# feature_columns = [
#     'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
#     'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
#     'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
#     'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
# ]

# # Extract features from the new input data
# X_new = new_input_data[feature_columns]

# # Load the scaler used for standardization
# scaler = StandardScaler()

# # Assume the scaler has been previously fitted to the training data
# # Here we fit the scaler on the combined original training data as an example
# # In practice, you should load the already fitted scaler from your training phase
# combined_training_data = pd.concat([tikob_fil[feature_columns], gunao_fil[feature_columns]], axis=0)
# scaler.fit(combined_training_data)

# # Standardize the new input data
# X_new_scaled = scaler.transform(X_new)

# # Make predictions using the loaded model
# predictions = model.predict(X_new_scaled).flatten()

# # Output predictions to the console
# print('Predictions for new input data:')
# print(predictions)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
Predictions for new input data:
[1.0552795]


# **PREDICTION TO CSV PICKLE MODELS**

# **Support Vector Regression**

## **Genetics Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from two CSV files
def predict_bod_from_two_csvs(csv1, csv2, output_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/All_Shit/SVR/SVR_Genetics.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read both input data CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    # Ensure both input data have all necessary columns
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")

    # Sample 15 random rows from both dataframes
    sampled_df1 = df1.sample(n=sample_size, random_state=40)
    sampled_df2 = df2.sample(n=sample_size, random_state=40)

    # Concatenate the two sampled dataframes
    combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

    # Check if the actual BOD column exists
    if 'BOD (mg/L)' not in combined_df.columns:
        raise ValueError("The input data must contain an 'BOD (mg/L)' column for actual values.")

    # Predict BOD for the combined data (only feature columns)
    predictions = model.predict(combined_df[feature_columns])

    # Add predictions to the combined DataFrame
    combined_df['Predicted_BOD'] = predictions

    # Keep the actual BOD values alongside predictions
    output_df = combined_df[['BOD (mg/L)', 'Predicted_BOD']]

    # Save the DataFrame with actual and predicted BOD to a new CSV file
    output_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
output_csv = '/content/All_Shit/Exported_CSV/Combined_Pred_SVR_GA.csv'  # Output CSV file name
predict_bod_from_two_csvs(csv1, csv2, output_csv)


Predictions saved to /content/All_Shit/Exported_CSV/Combined_Pred_SVR_GA.csv


## **Tiered Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from two CSV files
def predict_bod_from_two_csvs(csv1, csv2, output_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/All_Shit/SVR/SVR_Tiered.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read both input data CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    # Ensure both input data have all necessary columns
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")

    # Sample 15 random rows from both dataframes
    sampled_df1 = df1.sample(n=sample_size, random_state=41)
    sampled_df2 = df2.sample(n=sample_size, random_state=41)

    # Print sampled data for debugging
    print("Sampled data from first CSV:")
    print(sampled_df1.head())
    print("Sampled data from second CSV:")
    print(sampled_df2.head())

    # Concatenate the two sampled dataframes
    combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

    # Predict BOD for the combined data
    predictions = model.predict(combined_df[feature_columns])

    # Print predictions for debugging
    print("Predictions:")
    print(predictions)

    # Add predictions to the combined DataFrame
    combined_df['Predicted_BOD'] = predictions

    # Save the combined DataFrame with predictions to a new CSV file
    combined_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
output_csv = '/content/All_Shit/Exported_CSV/Combined_Pred_SVR_TA.csv'  # Output CSV file name
predict_bod_from_two_csvs(csv1, csv2, output_csv)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Sampled data from first CSV:
       pH  DO (mg/L)  TDS (mg/L)  Salinity (ppt)  Cond (uS/cm)  Temp (°C)  \
278  7.33       1.26      114.20            0.12        228.00       26.4   
341  7.27       3.10      121.20            0.12        242.00       26.5   
364  7.04       2.11      125.20            0.20        248.00       26.5   
61   7.45       0.42      108.16            0.11        217.07       26.7   
261  7.34       0.52      107.80            0.11        216.00       26.8   

     BOD (mg/L)  TSS (mg/L)  NO2 (ppm)  NO3 (ppm)  ...  As(ppm)  Pb(ppm)  \
278        0.83      0.0018      0.048      0.041  ...      3.8      4.7   
341        1.12      0.0035      0.047      0.063  ...      2.9      4.6   
364        1.13      0.0143      0.034      0.033  ...      3.4      4.8   
61         1.94      0.0388      0.042      0.044  ...      3.7      3.6   
261        2.31      0.0300      0.048      0.046  ...      3.8      3.7   

          DATE     MONTH  YEAR  STATION  REPLICATE 

## **Least Square Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from two CSV files
def predict_bod_from_two_csvs(csv1, csv2, output_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/All_Shit/SVR/SVR_Least.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read both input data CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    # Ensure both input data have all necessary columns
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")

    # Sample 15 random rows from both dataframes
    sampled_df1 = df1.sample(n=sample_size, random_state=42)
    sampled_df2 = df2.sample(n=sample_size, random_state=42)

    # Print sampled data for debugging
    print("Sampled data from first CSV:")
    print(sampled_df1.head())
    print("Sampled data from second CSV:")
    print(sampled_df2.head())

    # Concatenate the two sampled dataframes
    combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

    # Predict BOD for the combined data
    predictions = model.predict(combined_df[feature_columns])

    # Print predictions for debugging
    print("Predictions:")
    print(predictions)

    # Add predictions to the combined DataFrame
    combined_df['Predicted_BOD'] = predictions

    # Save the combined DataFrame with predictions to a new CSV file
    combined_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
output_csv = '/content/All_Shit/Exported_CSV/Combined_Pred_SVR_LS.csv'  # Output CSV file name
predict_bod_from_two_csvs(csv1, csv2, output_csv)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Sampled data from first CSV:
       pH  DO (mg/L)  TDS (mg/L)  Salinity (ppt)  Cond (uS/cm)  Temp (°C)  \
9    8.49       6.59      130.68            0.16        259.02       30.8   
42   8.84       9.49       94.15            0.11        189.10       29.7   
33   8.87       9.74       31.71            0.04        157.90       30.1   
311  7.38       1.39      116.10            0.65        233.00       26.5   
272  7.57       1.24      118.90            0.12        222.00       26.4   

     BOD (mg/L)  TSS (mg/L)  NO2 (ppm)  NO3 (ppm)  ...  As(ppm)  Pb(ppm)  \
9          1.26      0.0072      0.032      0.041  ...      3.4      6.2   
42         0.86      0.0443      0.038      0.174  ...      3.3      5.8   
33         0.77      0.0402      0.071      0.189  ...      3.6      4.9   
311        3.40      0.0011      0.039      0.042  ...      3.2      4.6   
272        1.05      0.0248      0.043      0.069  ...      3.1      4.3   

           DATE      MONTH  YEAR  STATION  REPLICAT

# **Random Forest**

## **Genetics Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from two CSV files
def predict_bod_from_two_csvs(csv1, csv2, output_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/RF/RF_Genetics.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read both input data CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    # Ensure both input data have all necessary columns
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")

    # Sample 15 random rows from both dataframes
    sampled_df1 = df1.sample(n=sample_size, random_state=42)
    sampled_df2 = df2.sample(n=sample_size, random_state=42)

    # Print sampled data for debugging
    print("Sampled data from first CSV:")
    print(sampled_df1.head())
    print("Sampled data from second CSV:")
    print(sampled_df2.head())

    # Concatenate the two sampled dataframes
    combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

    # Predict BOD for the combined data
    predictions = model.predict(combined_df[feature_columns])

    # Print predictions for debugging
    print("Predictions:")
    print(predictions)

    # Add predictions to the combined DataFrame
    combined_df['Predicted_BOD'] = predictions

    # Save the combined DataFrame with predictions to a new CSV file
    combined_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
output_csv = '/content/Exported_CSV/Combined_Pred_RF_GA.csv'  # Output CSV file name
predict_bod_from_two_csvs(csv1, csv2, output_csv)


Sampled data from first CSV:
       pH  DO (mg/L)  TDS (mg/L)  Salinity (ppt)  Cond (uS/cm)  Temp (°C)  \
9    8.49       6.59      130.68            0.16        259.02       30.8   
42   8.84       9.49       94.15            0.11        189.10       29.7   
33   8.87       9.74       31.71            0.04        157.90       30.1   
311  7.38       1.39      116.10            0.65        233.00       26.5   
272  7.57       1.24      118.90            0.12        222.00       26.4   

     BOD (mg/L)  TSS (mg/L)  NO2 (ppm)  NO3 (ppm)  ...  As(ppm)  Pb(ppm)  \
9          1.26      0.0072      0.032      0.041  ...      3.4      6.2   
42         0.86      0.0443      0.038      0.174  ...      3.3      5.8   
33         0.77      0.0402      0.071      0.189  ...      3.6      4.9   
311        3.40      0.0011      0.039      0.042  ...      3.2      4.6   
272        1.05      0.0248      0.043      0.069  ...      3.1      4.3   

           DATE      MONTH  YEAR  STATION  REPLICAT

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## **Tiered Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from two CSV files
def predict_bod_from_two_csvs(csv1, csv2, output_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/RF/RF_Tiered.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read both input data CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    # Ensure both input data have all necessary columns
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")

    # Sample 15 random rows from both dataframes
    sampled_df1 = df1.sample(n=sample_size, random_state=42)
    sampled_df2 = df2.sample(n=sample_size, random_state=42)

    # Print sampled data for debugging
    print("Sampled data from first CSV:")
    print(sampled_df1.head())
    print("Sampled data from second CSV:")
    print(sampled_df2.head())

    # Concatenate the two sampled dataframes
    combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

    # Predict BOD for the combined data
    predictions = model.predict(combined_df[feature_columns])

    # Print predictions for debugging
    print("Predictions:")
    print(predictions)

    # Add predictions to the combined DataFrame
    combined_df['Predicted_BOD'] = predictions

    # Save the combined DataFrame with predictions to a new CSV file
    combined_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
output_csv = '/content/Exported_CSV/Combined_Pred_RF_TA.csv'  # Output CSV file name
predict_bod_from_two_csvs(csv1, csv2, output_csv)


Sampled data from first CSV:
       pH  DO (mg/L)  TDS (mg/L)  Salinity (ppt)  Cond (uS/cm)  Temp (°C)  \
9    8.49       6.59      130.68            0.16        259.02       30.8   
42   8.84       9.49       94.15            0.11        189.10       29.7   
33   8.87       9.74       31.71            0.04        157.90       30.1   
311  7.38       1.39      116.10            0.65        233.00       26.5   
272  7.57       1.24      118.90            0.12        222.00       26.4   

     BOD (mg/L)  TSS (mg/L)  NO2 (ppm)  NO3 (ppm)  ...  As(ppm)  Pb(ppm)  \
9          1.26      0.0072      0.032      0.041  ...      3.4      6.2   
42         0.86      0.0443      0.038      0.174  ...      3.3      5.8   
33         0.77      0.0402      0.071      0.189  ...      3.6      4.9   
311        3.40      0.0011      0.039      0.042  ...      3.2      4.6   
272        1.05      0.0248      0.043      0.069  ...      3.1      4.3   

           DATE      MONTH  YEAR  STATION  REPLICAT

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## **Least Square**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from two CSV files
def predict_bod_from_two_csvs(csv1, csv2, output_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/RF/RF_Least.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read both input data CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    # Ensure both input data have all necessary columns
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")

    # Sample 15 random rows from both dataframes
    sampled_df1 = df1.sample(n=sample_size, random_state=42)
    sampled_df2 = df2.sample(n=sample_size, random_state=42)

    # Print sampled data for debugging
    print("Sampled data from first CSV:")
    print(sampled_df1.head())
    print("Sampled data from second CSV:")
    print(sampled_df2.head())

    # Concatenate the two sampled dataframes
    combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

    # Predict BOD for the combined data
    predictions = model.predict(combined_df[feature_columns])

    # Print predictions for debugging
    print("Predictions:")
    print(predictions)

    # Add predictions to the combined DataFrame
    combined_df['Predicted_BOD'] = predictions

    # Save the combined DataFrame with predictions to a new CSV file
    combined_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
output_csv = '/content/Exported_CSV/Combined_Pred_RF_LS.csv'  # Output CSV file name
predict_bod_from_two_csvs(csv1, csv2, output_csv)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Sampled data from first CSV:
       pH  DO (mg/L)  TDS (mg/L)  Salinity (ppt)  Cond (uS/cm)  Temp (°C)  \
9    8.49       6.59      130.68            0.16        259.02       30.8   
42   8.84       9.49       94.15            0.11        189.10       29.7   
33   8.87       9.74       31.71            0.04        157.90       30.1   
311  7.38       1.39      116.10            0.65        233.00       26.5   
272  7.57       1.24      118.90            0.12        222.00       26.4   

     BOD (mg/L)  TSS (mg/L)  NO2 (ppm)  NO3 (ppm)  ...  As(ppm)  Pb(ppm)  \
9          1.26      0.0072      0.032      0.041  ...      3.4      6.2   
42         0.86      0.0443      0.038      0.174  ...      3.3      5.8   
33         0.77      0.0402      0.071      0.189  ...      3.6      4.9   
311        3.40      0.0011      0.039      0.042  ...      3.2      4.6   
272        1.05      0.0248      0.043      0.069  ...      3.1      4.3   

           DATE      MONTH  YEAR  STATION  REPLICAT

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


# **Multi-Linear Regression**

## **Genetics Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from a CSV file with 15 random rows
def predict_bod_from_csv(input_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/MLR/MLR_Genetics 2.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read the input data from CSV
    input_df = pd.read_csv(input_csv)

    # Ensure the input data has all necessary columns
    missing_cols = set(feature_columns) - set(input_df.columns)
    if missing_cols:
        raise ValueError(f"Missing columns in input data: {missing_cols}")

    # Sample 15 random rows from the input data
    sampled_df = input_df.sample(n=sample_size, random_state=42)

    # Predict BOD for the sampled data
    predictions = model.predict(sampled_df[feature_columns])

    # Add predictions to the sampled DataFrame
    sampled_df['Predicted_BOD'] = predictions

    return sampled_df

# Example usage for the first dataset
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
df1 = predict_bod_from_csv(csv1)

# Example usage for the second dataset
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
df2 = predict_bod_from_csv(csv2)

# Combine both datasets into a single DataFrame
combined_df = pd.concat([df1, df2], ignore_index=True)

# Save the combined DataFrame with predictions to a new CSV file
combined_output_csv = '/content/Exported_CSV/Combined_Pred_MLR_GA.csv'  # Combined output CSV file
combined_df.to_csv(combined_output_csv, index=False)
print(f"Combined predictions saved to {combined_output_csv}")


Combined predictions saved to /content/Exported_CSV/Combined_Pred_MLR_GA.csv


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## **Tiered Algorithm**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from a CSV file with 15 random rows
def predict_bod_from_csv(input_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/MLR/MLR_Tiered.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'BGA-PC (ug/L)', 'Chlorophyll (ug/L)',
        'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 'Mn(ppm)', 'Zn(ppm)',
        'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read the input data from CSV
    input_df = pd.read_csv(input_csv)

    # Ensure the input data has all necessary columns
    missing_cols = set(feature_columns) - set(input_df.columns)
    if missing_cols:
        raise ValueError(f"Missing columns in input data: {missing_cols}")

    # Sample 15 random rows from the input data
    sampled_df = input_df.sample(n=sample_size, random_state=42)

    # Predict BOD for the sampled data
    predictions = model.predict(sampled_df[feature_columns])

    # Add predictions to the sampled DataFrame
    sampled_df['Predicted_BOD'] = predictions

    return sampled_df

# Example usage for the first dataset
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
df1 = predict_bod_from_csv(csv1)

# Example usage for the second dataset
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
df2 = predict_bod_from_csv(csv2)

# Combine both datasets into a single DataFrame
combined_df = pd.concat([df1, df2], ignore_index=True)

# Save the combined DataFrame with predictions to a new CSV file
combined_output_csv = '/content/Exported_CSV/Combined_Pred_MLR_TA.csv'  # Combined output CSV file
combined_df.to_csv(combined_output_csv, index=False)
print(f"Combined predictions saved to {combined_output_csv}")


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Combined predictions saved to /content/Exported_CSV/Combined_Pred_MLR_TA.csv


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## **Least Square**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD from a CSV file with 15 random rows
def predict_bod_from_csv(input_csv, sample_size=15):
    # Load the trained model
    model = joblib.load('/content/MLR/MLR_Least.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Read the input data from CSV
    input_df = pd.read_csv(input_csv)

    # Ensure the input data has all necessary columns
    missing_cols = set(feature_columns) - set(input_df.columns)
    if missing_cols:
        raise ValueError(f"Missing columns in input data: {missing_cols}")

    # Sample 15 random rows from the input data
    sampled_df = input_df.sample(n=sample_size, random_state=42)

    # Predict BOD for the sampled data
    predictions = model.predict(sampled_df[feature_columns])

    # Add predictions to the sampled DataFrame
    sampled_df['Predicted_BOD'] = predictions

    return sampled_df

# Example usage for the first dataset
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
df1 = predict_bod_from_csv(csv1)

# Example usage for the second dataset
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
df2 = predict_bod_from_csv(csv2)

# Combine both datasets into a single DataFrame
combined_df = pd.concat([df1, df2], ignore_index=True)

# Save the combined DataFrame with predictions to a new CSV file
combined_output_csv = '/content/Exported_CSV/Combined_Pred_MLR_LS.csv'  # Combined output CSV file
combined_df.to_csv(combined_output_csv, index=False)
print(f"Combined predictions saved to {combined_output_csv}")


Predictions saved to /content/Exported_CSV/Pred_MLR_GA.csv


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


# **PREDICTION TO CSV H5 MODELS**


# **Multi-Layered Peceptron**

## **Genetics Algorithm**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/All_Shit/MLP/MLP_Genetics.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

# Sample 15 random rows from each dataframe
sampled_df1 = df1.sample(n=15, random_state=42)
sampled_df2 = df2.sample(n=15, random_state=42)

# Combine the two sampled dataframes
combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

# Ensure the dataset has the actual BOD column
if 'BOD (mg/L)' in combined_df.columns:
    actual_bod = combined_df['BOD (mg/L)']  # Extract actual BOD values
else:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")

# Extract features from the combined data
X_combined = combined_df[feature_columns]

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Fit the scaler on the combined data for now (in practice, use a pre-fitted scaler)
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the input features
X_combined_scaled = scaler.transform(X_combined)

# Make predictions using the loaded model
predictions = model.predict(X_combined_scaled).flatten()

# Add the actual and predicted BOD values to the combined dataframe
combined_df['Actual BOD (mg/L)'] = actual_bod
combined_df['Predicted BOD (mg/L)'] = predictions

# Optionally, include the 'Month' column if available
if 'Month' in combined_df.columns:
    output_data = combined_df[['Month', 'Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]
else:
    combined_df['Month'] = 'January'  # Placeholder, modify as needed
    output_data = combined_df[['Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]

# Save the results to a new CSV file
output_data.to_csv('/content/All_Shit/Exported_CSV/Combined_Pred_MLP_GA.csv', index=False)

# Output predictions to the console
print('Predictions for combined sampled data with month:')
print(output_data)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Predictions for combined sampled data with month:
    Actual BOD (mg/L)  Predicted BOD (mg/L)
0                1.26              2.178982
1                0.86              2.656128
2                0.77              1.740568
3                3.40              2.512649
4                1.05              1.706314
5                1.72              1.497130
6                1.10              2.119810
7                1.00              1.138694
8                1.54              0.721108
9                3.19              2.869582
10               1.97              2.294593
11               1.21              3.401082
12               0.93              1.238418
13               2.82              1.539461
14               0.83              2.086167
15               2.72              3.505661
16               0.61              1.111352
17               3.54              4.293103
18               3.77              3.53565

## **Tiered Algo**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/All_Shit/MLP/MLP_Tiering.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

# Sample 15 random rows from each dataframe
sampled_df1 = df1.sample(n=15, random_state=42)
sampled_df2 = df2.sample(n=15, random_state=42)

# Combine the two sampled dataframes
combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

# Ensure the dataset has the actual BOD column
if 'BOD (mg/L)' in combined_df.columns:
    actual_bod = combined_df['BOD (mg/L)']  # Extract actual BOD values
else:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")

# Extract features from the combined data
X_combined = combined_df[feature_columns]

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Fit the scaler on the combined data for now (in practice, use a pre-fitted scaler)
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the input features
X_combined_scaled = scaler.transform(X_combined)

# Make predictions using the loaded model
predictions = model.predict(X_combined_scaled).flatten()

# Add the actual and predicted BOD values to the combined dataframe
combined_df['Actual BOD (mg/L)'] = actual_bod
combined_df['Predicted BOD (mg/L)'] = predictions

# Optionally, include the 'Month' column if available
if 'Month' in combined_df.columns:
    output_data = combined_df[['Month', 'Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]
else:
    combined_df['Month'] = 'January'  # Placeholder, modify as needed
    output_data = combined_df[['Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]


# Save the results to a new CSV file
output_data.to_csv('/content/All_Shit/Exported_CSV/Combined_Pred_MLP_TA.csv', index=False)

# Output predictions to the console
print('Predictions for combined sampled data with month:')
print(output_data)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
Predictions for combined sampled data with month:
    Actual BOD (mg/L)  Predicted BOD (mg/L)
0                1.26              1.184218
1                0.86              1.141109
2                0.77              1.629049
3                3.40              1.404057
4                1.05              1.633756
5                1.72              0.794916
6                1.10              1.091761
7                1.00              1.341880
8                1.54              0.990906
9                3.19              3.432413
10               1.97              1.788775
11               1.21              1.703771
12               0.93              0.966348
13               2.82              1.117143
14               0.83              2.116500
15               2.72              2.900607
16               0.61              0.971863
17               3.54              4.400919
18               3.77              3.2378

## **Least Square**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/All_Shit/MLP/MLP_Least.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

# Sample 15 random rows from each dataframe
sampled_df1 = df1.sample(n=15, random_state=42)
sampled_df2 = df2.sample(n=15, random_state=42)

# Combine the two sampled dataframes
combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

# Ensure the dataset has the actual BOD column
if 'BOD (mg/L)' in combined_df.columns:
    actual_bod = combined_df['BOD (mg/L)']  # Extract actual BOD values
else:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")

# Extract features from the combined data
X_combined = combined_df[feature_columns]

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Fit the scaler on the combined data for now (in practice, use a pre-fitted scaler)
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the input features
X_combined_scaled = scaler.transform(X_combined)

# Make predictions using the loaded model
predictions = model.predict(X_combined_scaled).flatten()

# Add the actual and predicted BOD values to the combined dataframe
combined_df['Actual BOD (mg/L)'] = actual_bod
combined_df['Predicted BOD (mg/L)'] = predictions

# Optionally, include the 'Month' column if available
if 'Month' in combined_df.columns:
    output_data = combined_df[['Month', 'Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]
else:
    combined_df['Month'] = 'January'  # Placeholder, modify as needed
    output_data = combined_df[['Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]

# Save the results to a new CSV file
output_data.to_csv('/content/All_Shit/Exported_CSV/Combined_Pred_MLP_LS.csv', index=False)

# Output predictions to the console
print('Predictions for combined sampled data with month:')
print(output_data)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
Predictions for combined sampled data with month:
    Actual BOD (mg/L)  Predicted BOD (mg/L)
0                1.26              2.457682
1                0.86              1.895507
2                0.77              2.827764
3                3.40              1.592305
4                1.05              1.636373
5                1.72              0.972071
6                1.10              2.258449
7                1.00              2.380834
8                1.54              0.854637
9                3.19              3.477560
10               1.97              2.099782
11               1.21              2.735597
12               0.93              1.858386
13               2.82              0.638295
14               0.83              1.493240
15               2.72              3.712967
16               0.61              1.775438
17               3.54              4.522511
18               3.77              3.5608

# **Artificial Neural Network**

## **Genetics Algorithm**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/All_Shit/ANN/ANN_Genetics.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

# Sample 15 random rows from each dataframe
sampled_df1 = df1.sample(n=15, random_state=42)
sampled_df2 = df2.sample(n=15, random_state=42)

# Combine the two sampled dataframes
combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

# Ensure the dataset has the actual BOD column
if 'BOD (mg/L)' in combined_df.columns:
    actual_bod = combined_df['BOD (mg/L)']  # Extract actual BOD values
else:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")

# Extract features from the combined data
X_combined = combined_df[feature_columns]

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Fit the scaler on the combined data for now (in practice, use a pre-fitted scaler)
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the input features
X_combined_scaled = scaler.transform(X_combined)

# Make predictions using the loaded model
predictions = model.predict(X_combined_scaled).flatten()

# Add the actual and predicted BOD values to the combined dataframe
combined_df['Actual BOD (mg/L)'] = actual_bod
combined_df['Predicted BOD (mg/L)'] = predictions

# Optionally, include the 'Month' column if available
if 'Month' in combined_df.columns:
    output_data = combined_df[['Month', 'Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]
else:
    combined_df['Month'] = 'January'  # Placeholder, modify as needed
    output_data = combined_df[['Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]

# Save the results to a new CSV file
output_data.to_csv('/content/All_Shit/Exported_CSV/Combined_Pred_ANN_GA.csv', index=False)

# Output predictions to the console
print('Predictions for combined sampled data with month:')
print(output_data)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
Predictions for combined sampled data with month:
    Actual BOD (mg/L)  Predicted BOD (mg/L)
0                1.26              1.853683
1                0.86              2.312531
2                0.77              1.994415
3                3.40              0.600375
4                1.05              2.621516
5                1.72              0.684571
6                1.10              1.713614
7                1.00              1.118037
8                1.54              0.609683
9                3.19              3.664783
10               1.97              1.305569
11               1.21              3.496611
12               0.93              1.311317
13               2.82              1.008914
14               0.83              2.852187
15               2.72              2.825196
16               0.61              1.526996
17               3.54              4.335528
18               3.77              3.09087

## **Tiered Algo**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/All_Shit/ANN/ANN_Tiered.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

# Sample 15 random rows from each dataframe
sampled_df1 = df1.sample(n=15, random_state=42)
sampled_df2 = df2.sample(n=15, random_state=42)

# Combine the two sampled dataframes
combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

# Ensure the dataset has the actual BOD column
if 'BOD (mg/L)' in combined_df.columns:
    actual_bod = combined_df['BOD (mg/L)']  # Extract actual BOD values
else:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")

# Extract features from the combined data
X_combined = combined_df[feature_columns]

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Fit the scaler on the combined data for now (in practice, use a pre-fitted scaler)
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the input features
X_combined_scaled = scaler.transform(X_combined)

# Make predictions using the loaded model
predictions = model.predict(X_combined_scaled).flatten()

# Add the actual and predicted BOD values to the combined dataframe
combined_df['Actual BOD (mg/L)'] = actual_bod
combined_df['Predicted BOD (mg/L)'] = predictions

# Optionally, include the 'Month' column if available
if 'Month' in combined_df.columns:
    output_data = combined_df[['Month', 'Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]
else:
    combined_df['Month'] = 'January'  # Placeholder, modify as needed
    output_data = combined_df[['Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]

# Save the results to a new CSV file
output_data.to_csv('/content/All_Shit/Exported_CSV/Combined_Pred_ANN_TA.csv', index=False)

# Output predictions to the console
print('Predictions for combined sampled data with month:')
print(output_data)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step
Predictions for combined sampled data with month:
    Actual BOD (mg/L)  Predicted BOD (mg/L)
0                1.26              1.789281
1                0.86              1.933297
2                0.77              1.555702
3                3.40              1.242415
4                1.05              1.481246
5                1.72              1.169735
6                1.10              1.370100
7                1.00              0.878690
8                1.54              1.145615
9                3.19              3.446601
10               1.97              1.341378
11               1.21              2.630800
12               0.93              0.702352
13               2.82              1.441138
14               0.83              1.589655
15               2.72              2.324499
16               0.61              0.566675
17               3.54              4.088719
18               3.77              3.0605

## **Least Square**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/ANN_Least.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

# Sample 15 random rows from each dataframe
sampled_df1 = df1.sample(n=15, random_state=42)
sampled_df2 = df2.sample(n=15, random_state=42)

# Combine the two sampled dataframes
combined_df = pd.concat([sampled_df1, sampled_df2], ignore_index=True)

# Ensure the dataset has the actual BOD column
if 'BOD (mg/L)' in combined_df.columns:
    actual_bod = combined_df['BOD (mg/L)']  # Extract actual BOD values
else:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")

# Extract features from the combined data
X_combined = combined_df[feature_columns]

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Fit the scaler on the combined data for now (in practice, use a pre-fitted scaler)
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the input features
X_combined_scaled = scaler.transform(X_combined)

# Make predictions using the loaded model
predictions = model.predict(X_combined_scaled).flatten()

# Add the actual and predicted BOD values to the combined dataframe
combined_df['Actual BOD (mg/L)'] = actual_bod
combined_df['Predicted BOD (mg/L)'] = predictions

# Optionally, include the 'Month' column if available
if 'Month' in combined_df.columns:
    output_data = combined_df[['Month', 'Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]
else:
    combined_df['Month'] = 'January'  # Placeholder, modify as needed
    output_data = combined_df[['Actual BOD (mg/L)', 'Predicted BOD (mg/L)']]

# Save the results to a new CSV file
output_data.to_csv('/content/Combined_Pred_ANN_LS.csv', index=False)

# Output predictions to the console
print('Predictions for combined sampled data with month:')
print(output_data)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
Predictions for combined sampled data with month:
    Actual BOD (mg/L)  Predicted BOD (mg/L)
0                1.26              2.380203
1                0.86              2.370511
2                0.77              3.081838
3                3.40              1.718762
4                1.05              1.228480
5                1.72              0.989553
6                1.10              1.304582
7                1.00              1.126583
8                1.54              1.065019
9                3.19              4.328072
10               1.97              1.696573
11               1.21              1.809787
12               0.93              2.059227
13               2.82              0.717234
14               0.83              1.409939
15               2.72              2.367879
16               0.61              1.629561
17               3.54              4.346528
18               3.77              2.7926

# **Prototype**

## **Test prototype**

In [None]:
import pandas as pd
import joblib
import random

# Function to load the model and predict BOD from a randomly chosen CSV file
def predict_bod_from_random_csv(csv1, csv2, sample_size=1):
    # Load the trained model
    model = joblib.load('/content/All_Shit/RF/RF_Genetics.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    # Randomly choose one of the CSV files
    chosen_csv = random.choice([csv1, csv2])
    print(f"Selected CSV for prediction: {chosen_csv}")

    # Read the input data CSV
    df = pd.read_csv(chosen_csv)

    # Ensure the input data has all necessary columns
    missing_cols = set(feature_columns) - set(df.columns)
    if missing_cols:
        raise ValueError(f"Missing columns in input data: {missing_cols}")

    # Sample 15 random rows from the chosen dataframe
    sampled_df = df.sample(n=sample_size)

    # Print sampled data for debugging
    print("Sampled data for prediction:")
    print(sampled_df.head())

    # Predict BOD for the sampled data
    predictions = model.predict(sampled_df[feature_columns])

    # Add predictions to the sampled DataFrame
    sampled_df['Predicted_BOD'] = predictions

    # Print actual BOD values and predictions
    print("Actual and Predicted BOD values:")
    print(sampled_df[['BOD (mg/L)', 'Predicted_BOD']])

# Example usage
csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
predict_bod_from_random_csv(csv1, csv2)


Selected CSV for prediction: /content/tikub_surface_bottom.csv
Sampled data for prediction:
       pH  DO (mg/L)  TDS (mg/L)  Salinity (ppt)  Cond (uS/cm)  Temp (°C)  \
180  8.58       8.45       136.6            0.17         372.0       31.2   

     BOD (mg/L)  TSS (mg/L)  NO2 (ppm)  NO3 (ppm)  ...  As(ppm)  Pb(ppm)  \
180        0.36      0.0006      0.038      0.028  ...      3.3      3.8   

          DATE      MONTH  YEAR  STATION  REPLICATE  COLLECTION    Latitude  \
180  9/28/2023  September  2023        1          1     Surface  13°57.873'   

      Longtitude  
180  121°18.294'  

[1 rows x 35 columns]
Actual and Predicted BOD values:
     BOD (mg/L)  Predicted_BOD
180        0.36       1.190295


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## **Random Forest Least**

In [None]:
import pandas as pd
import joblib

# Function to load the model and predict BOD for one random row
def predict_single_bod(csv1, csv2):
    # Load the trained model
    model = joblib.load('/content/RF_Least.pkl')  # Use the path to your saved model

    # Define the feature columns
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
    ]

    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)
    missing_cols_df1 = set(feature_columns) - set(df1.columns)
    missing_cols_df2 = set(feature_columns) - set(df2.columns)
    if missing_cols_df1:
        raise ValueError(f"Missing columns in first input data: {missing_cols_df1}")
    if missing_cols_df2:
        raise ValueError(f"Missing columns in second input data: {missing_cols_df2}")
    combined_df = pd.concat([df1, df2], ignore_index=True)
    sample_row = combined_df.sample(n=1)
    actual_bod = sample_row["BOD (mg/L)"].values[0]
    predicted_bod = model.predict(sample_row[feature_columns])[0]
    print(f"Actual BOD: {actual_bod}")
    print(f"Predicted BOD: {predicted_bod}")

csv1 = '/content/tikub_surface_bottom.csv'  # First input CSV file
csv2 = '/content/gunao_surface.csv'  # Second input CSV file
predict_single_bod(csv1, csv2)


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-917900a2a25e>", line 2, in <cell line: 2>
    import joblib
  File "/usr/local/lib/python3.10/dist-packages/joblib/__init__.py", line 129, in <module>
    from .parallel import Parallel
  File "/usr/local/lib/python3.10/dist-packages/joblib/parallel.py", line 31, in <module>
    from ._parallel_backends import (FallbackToBackend, MultiprocessingBackend,
  File "/usr/local/lib/python3.10/dist-packages/joblib/_parallel_backends.py", line 12, in <module>
    from ._utils import (
  File "/usr/local/lib/python3.10/dist-packages/joblib/_utils.py", line 11, in <module>
    from .externals.loky.process_executor import _ExceptionWithTraceback
  File "/usr/local/lib/python3.10/dist-packages/joblib/externals/loky/__init__.py", line 18, in <module>
    from .backend.context import

TypeError: object of type 'NoneType' has no len()

In [None]:

import pandas as pd
import joblib
def predict_single_bod():
    model = joblib.load('/content/RF_Least.pkl')  # Use the path to your saved model
    print("type 'back' to correct the previous input,\n 'preview' to see all inputs,\n 'clear' to reset all inputs")
    feature_columns = [
        'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
        'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
        'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
        'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)']
    user_data = [None] * len(feature_columns)
    i = 0
    while i < len(feature_columns):
        feature = feature_columns[i]
        user_input = input(f"Enter the value for {feature}: ")
        if user_input.lower() == "back":         # Check if the user wants to go back
            if i > 0:
                i -= 1  # Move one step back
                print(f"Going back to {feature_columns[i]}")
            else:
                print("Already at the first input, cannot go back further.")
            continue  # Skip the rest and restart the loop
        elif user_input.lower() == "preview": # Show preview of all inputs so far
            print("Current inputs:")
            for j in range(len(feature_columns)):
                print(f"{feature_columns[j]}: {user_data[j]}")
            continue  # Re-prompt for the same feature
        elif user_input.lower() == "clear":# Clear all inputs and reset
            user_data = [None] * len(feature_columns)  # Reset all values to None
            i = 0  # Start over from the first feature
            print("All inputs have been cleared. Starting over.")
            continue  # Restart input collection from the first feature
        try:
            user_data[i] = float(user_input)
            i += 1  # Move to the next feature
        except ValueError:
            print("Invalid input. Please enter a numeric value.")
    input_df = pd.DataFrame([user_data], columns=feature_columns)
    predicted_bod = model.predict(input_df)[0]
    print(f"Predicted BOD: {predicted_bod}")
predict_single_bod()


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


type 'back' to correct the previous input,
 'preview' to see all inputs,
 'clear' to reset all inputs
Enter the value for pH: 7.57
Enter the value for DO (mg/L): 1.62
Enter the value for TDS (mg/L): 89.2
Enter the value for Salinity (ppt): 0.1
Enter the value for Cond (uS/cm): 178.8
Enter the value for Temp (°C): 28.2
Enter the value for TSS (mg/L): 0.0253
Enter the value for NO2 (ppm): 0.052
Enter the value for NO3 (ppm): 0.024
Enter the value for PO4  (ppm): 0.264
Enter the value for NH4 (ppm): 1.388
Enter the value for TN (ppm): 1.463
Enter the value for TP (ppm): 0.28
Enter the value for BGA-PC (ug/L): 1.73
Enter the value for Chlorophyll (ug/L): 14.93
Enter the value for Turbidity (FNU): back
Going back to Chlorophyll (ug/L)
Enter the value for Chlorophyll (ug/L): 14.94
Enter the value for Turbidity (FNU): 3.29
Enter the value for Coliform (CFU/100ml): 210
Enter the value for Cu (ppm): 9.5
Enter the value for Fe (ppm): 30.4
Enter the value for Mn(ppm): 53.3
Enter the value for Zn(

## **Artificial Neural Network Least**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/ANN_Least.h5')
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Load data from both CSV files
df1 = pd.read_csv('/content/tikub_surface.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')

combined_df = pd.concat([df1, df2], ignore_index=True)
if 'BOD (mg/L)' not in combined_df.columns:
    raise ValueError("Actual BOD column 'BOD (mg/L)' not found in the dataset.")
    sample_row = combined_df.sample(n=1)
    actual_bod = sample_row['BOD (mg/L)'].values[0]
    X_sample = sample_row[feature_columns]
    scaler = StandardScaler()
    combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
    scaler.fit(combined_training_data)
    X_sample_scaled = scaler.transform(X_sample)
    predicted_bod = model.predict(X_sample_scaled).flatten()[0]

# Output the actual and predicted BOD values
print(f"Actual BOD (mg/L): {actual_bod}")
print(f"Predicted BOD (mg/L): {predicted_bod}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Actual BOD (mg/L): nan
Predicted BOD (mg/L): nan


# **Random Forest**

In [None]:
0import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/ANN_Least.h5')
print("type 'back' to correct the previous input,\n 'preview' to see all inputs,\n 'clear' to reset all inputs")


# Define feature columns
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

def predict_bod_from_input():
    # Initialize a list to store user inputs
    user_data = [None] * len(feature_columns)

    # Loop through each feature to collect user input
    i = 0
    while i < len(feature_columns):
        feature = feature_columns[i]
        user_input = input(f"Enter the value for {feature}: ")

        # Navigate based on user commands
        if user_input.lower() == "back":
            if i > 0:
                i -= 1
                print(f"Going back to {feature_columns[i]}")
            else:
                print("Already at the first input.")
            continue

        elif user_input.lower() == "preview":
            print("Current inputs:")
            for j in range(len(feature_columns)):
                print(f"{feature_columns[j]}: {user_data[j]}")
            continue

        elif user_input.lower() == "clear":
            user_data = [None] * len(feature_columns)
            i = 0
            print("All inputs cleared. Starting over.")
            continue

        # Attempt to parse the input as a float
        try:
            user_data[i] = float(user_input)
            i += 1
        except ValueError:
            print("Invalid input. Please enter a numeric value.")

    # Convert the user data to a DataFrame
    input_df = pd.DataFrame([user_data], columns=feature_columns)

    # Predict BOD using the model directly without additional scaling
    predicted_bod = model.predict(input_df).flatten()[0]
    print(f"Predicted BOD (mg/L): {predicted_bod}")

# Call the function to start the prediction process
predict_bod_from_input()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


type 'back' to correct the previous input,
 'preview' to see all inputs,
 'clear' to reset all inputs
Enter the value for pH: 7.45
Enter the value for DO (mg/L): 0.42
Enter the value for TDS (mg/L): 108.16
Enter the value for Salinity (ppt): 0.11
Enter the value for Cond (uS/cm): 217.07
Enter the value for Temp (°C): 26.7
Enter the value for TSS (mg/L): 0.0388
Enter the value for NO2 (ppm): 0.042
Enter the value for NO3 (ppm): 0.044
Enter the value for PO4  (ppm): 0.052
Enter the value for NH4 (ppm): 0.518
Enter the value for TN (ppm): 0.604
Enter the value for TP (ppm): 0.072
Enter the value for BGA-PC (ug/L): -0.01
Enter the value for Chlorophyll (ug/L): 0.26
Enter the value for Turbidity (FNU): 0.69
Enter the value for Coliform (CFU/100ml): 1400
Enter the value for Cu (ppm): 11.5
Enter the value for Fe (ppm): 34
Enter the value for Mn(ppm): 54.7
Enter the value for Zn(ppm): 7.1
Enter the value for Cr(ppm): 5.8
Enter the value for Cd(ppm): 10.5
Enter the value for Hg(ppm): 1
Enter th

# **Artificial Neural Network**


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/ANN_Least.h5')

# Define the feature columns (same as used during training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)',
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)',
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)',
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Function to manually input data with back, clear, and preview options
def get_manual_input():
    user_input = {}
    index = 0

    print("Please enter the values for each feature. Type 'back' to go to the previous feature, 'clear' to clear all inputs, or 'preview' to see entered inputs.")

    while index < len(feature_columns):
        feature = feature_columns[index]

        # Show current inputs if requested
        if feature in user_input:
            print(f"{feature}: {user_input[feature]}")

        value = input(f"Enter value for {feature}: ")

        if value.lower() == 'back':
            # Go back to the previous feature
            if index > 0:
                index -= 1
            continue

        elif value.lower() == 'clear':
            # Clear all inputs
            user_input.clear()
            index = 0
            print("All inputs cleared. Restarting input...")
            continue

        elif value.lower() == 'preview':
            # Preview all inputs so far
            print("\nCurrent Inputs:")
            for feat in feature_columns:
                print(f"{feat}: {user_input.get(feat, 'Not entered')}")
            print("\n")
            continue

        else:
            try:
                # Attempt to parse the input as a float
                user_input[feature] = float(value)
                index += 1  # Move to the next feature
            except ValueError:
                print("Invalid input. Please enter a numeric value.")

    # Convert the user inputs to a DataFrame
    return pd.DataFrame([user_input])

# Collect input from the user
manual_input_df = get_manual_input()

# Load the scaler used for standardization (this should be the same scaler used during training)
scaler = StandardScaler()

# Load your dataset once to fit the scaler on all training data
df1 = pd.read_csv('/content/tikub_surface_bottom.csv')
df2 = pd.read_csv('/content/gunao_surface.csv')
combined_training_data = pd.concat([df1[feature_columns], df2[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the manual input features
manual_input_scaled = scaler.transform(manual_input_df)

# Make predictions using the loaded model
predicted_bod = model.predict(manual_input_scaled).flatten()[0]

# Output the prediction
print(f"\nPredicted BOD (mg/L): {predicted_bod}")




Please enter the values for each feature. Type 'back' to go to the previous feature, 'clear' to clear all inputs, or 'preview' to see entered inputs.
Enter value for pH: 13.64
Enter value for DO (mg/L): 16.76
Enter value for TDS (mg/L): 85.1
Enter value for Salinity (ppt): 0.11
Enter value for Cond (uS/cm): 169.4
Enter value for Temp (°C): 31.7
Enter value for TSS (mg/L): 0.0916
Enter value for NO2 (ppm): 0.037
Enter value for NO3 (ppm): 0.016
Enter value for PO4  (ppm): 0.274
Enter value for NH4 (ppm): 0.291
Enter value for TN (ppm): 0.344
Enter value for TP (ppm): 0.294
Enter value for BGA-PC (ug/L): 1.91
Enter value for Chlorophyll (ug/L): 0.68
Enter value for Turbidity (FNU): 1.3
Enter value for Coliform (CFU/100ml): 1100
Enter value for Cu (ppm): 16.9
Enter value for Fe (ppm): 46.9
Enter value for Mn(ppm): 70.5
Enter value for Zn(ppm): 10
Enter value for Cr(ppm): 9.2
Enter value for Cd(ppm): 13.5
Enter value for Hg(ppm): 1.1
Enter value for As(ppm): 3.2
Enter value for Pb(ppm): 4.

# **Zipping**

In [None]:
import shutil

# Path to the folder you want to zip
folder_to_zip = '/content/All_Shit'

# Output path for the zipped file
output_zip = '/content/New_Shit.zip'

# Zipping the folder
shutil.make_archive(output_zip.replace('.zip', ''), 'zip', folder_to_zip)

# Verify if the file was created
!ls -lh /content/


total 24M
drwxr-xr-x 9 root root 4.0K Oct 20 06:51  All_Shit
-rw-r--r-- 1 root root 9.5M Oct 20 06:58 'All_Shit(1).zip'
-rw-r--r-- 1 root root   22 Oct 20 06:57  All_Shit.zip
-rw-r--r-- 1 root root  38K Oct 20 05:32  gunao_surface.csv
-rw-r--r-- 1 root root  14M Oct 20 07:39  New_Shit.zip
drwxr-xr-x 1 root root 4.0K Oct 17 13:21  sample_data
-rw-r--r-- 1 root root  74K Oct 20 05:32  tikub_surface_bottom.csv
