In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)', 
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)', 
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

**TIERED ANN**

In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)', 
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)', 
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_combined)

# First tier: small subset for initial parameter tuning
X_small, X_rest, y_small, y_rest = train_test_split(X_scaled, y_combined, test_size=0.9, random_state=1)

# Train-test split for small subset
X_train_small, X_val_small, y_train_small, y_val_small = train_test_split(X_small, y_small, test_size=0.2, random_state=1)

# Define and train ANN model on small subset
ann_small = Sequential()
ann_small.add(Dense(128, input_dim=X_train_small.shape[1], activation='relu'))
ann_small.add(Dense(64, activation='sigmoid'))
ann_small.add(Dense(32, activation='sigmoid'))
ann_small.add(Dense(32, activation='sigmoid'))
ann_small.add(Dense(32, activation='sigmoid'))
ann_small.add(Dense(32, activation='sigmoid'))
ann_small.add(Dense(1, activation='linear'))

ann_small.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

ann_small.fit(X_train_small, y_train_small, epochs=100, batch_size=20, validation_data=(X_val_small, y_val_small), verbose=1)

# Evaluate on validation set
val_predictions_small = ann_small.predict(X_val_small).flatten()
val_mse_small = mean_squared_error(y_val_small, val_predictions_small)
val_rmse_small = np.sqrt(val_mse_small)
val_mae_small = mean_absolute_error(y_val_small, val_predictions_small)
val_r2_small = r2_score(y_val_small, val_predictions_small)
val_mape_small = np.mean(np.abs((y_val_small - val_predictions_small) / y_val_small)) * 100

print('Validation Results on Small Subset:')
print('MSE:', val_mse_small)
print('RMSE:', val_rmse_small)
print('MAE:', val_mae_small)
print('R^2:', val_r2_small)
print('MAPE:', val_mape_small, '%')

# Second tier: larger subset for more refined training
X_large, X_rest, y_large, y_rest = train_test_split(X_scaled, y_combined, test_size=0.5, random_state=1)

# Train-test split for large subset
X_train_large, X_val_large, y_train_large, y_val_large = train_test_split(X_large, y_large, test_size=0.2, random_state=1)

# Define and train ANN model on large subset
ann_large = Sequential()
ann_large.add(Dense(128, input_dim=X_train_large.shape[1], activation='relu'))
ann_large.add(Dense(64, activation='sigmoid'))
ann_large.add(Dense(32, activation='sigmoid'))
ann_large.add(Dense(32, activation='sigmoid'))
ann_large.add(Dense(32, activation='sigmoid'))
ann_large.add(Dense(32, activation='sigmoid'))
ann_large.add(Dense(1, activation='linear'))

ann_large.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

ann_large.fit(X_train_large, y_train_large, epochs=100, batch_size=20, validation_data=(X_val_large, y_val_large), verbose=1)

# Evaluate on validation set
val_predictions_large = ann_large.predict(X_val_large).flatten()
val_mse_large = mean_squared_error(y_val_large, val_predictions_large)
val_rmse_large = np.sqrt(val_mse_large)
val_mae_large = mean_absolute_error(y_val_large, val_predictions_large)
val_r2_large = r2_score(y_val_large, val_predictions_large)
val_mape_large = np.mean(np.abs((y_val_large - val_predictions_large) / y_val_large)) * 100

print('Validation Results on Large Subset:')
print('MSE:', val_mse_large)
print('RMSE:', val_rmse_large)
print('MAE:', val_mae_large)
print('R^2:', val_r2_large)
print('MAPE:', val_mape_large, '%')

# Third tier: full dataset for final training and testing
# Train-test split for full dataset
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X_scaled, y_combined, test_size=0.2, random_state=1)

# Define and train ANN model on full dataset
ann_full = Sequential()
ann_full.add(Dense(128, input_dim=X_train_full.shape[1], activation='relu'))
ann_full.add(Dense(64, activation='sigmoid'))
ann_full.add(Dense(32, activation='sigmoid'))
ann_full.add(Dense(32, activation='sigmoid'))
ann_full.add(Dense(32, activation='sigmoid'))
ann_full.add(Dense(32, activation='sigmoid'))
ann_full.add(Dense(1, activation='linear'))

ann_full.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

ann_full.fit(X_train_full, y_train_full, epochs=100, batch_size=20, validation_split=0.1, verbose=1)

# Evaluate on test set
test_predictions_full = ann_full.predict(X_test_full).flatten()
test_mse_full = mean_squared_error(y_test_full, test_predictions_full)
test_rmse_full = np.sqrt(test_mse_full)
test_mae_full = mean_absolute_error(y_test_full, test_predictions_full)
test_r2_full = r2_score(y_test_full, test_predictions_full)
test_mape_full = np.mean(np.abs((y_test_full - test_predictions_full) / y_test_full)) * 100

print('Test Results on Full Dataset:')
print('MSE:', test_mse_full)
print('RMSE:', test_rmse_full)
print('MAE:', test_mae_full)
print('R^2:', test_r2_full)
print('MAPE:', test_mape_full, '%')

ann_full.save('ANN_TIERED.h5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  return t[start:end]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('ANN_BayesSearch1.h5')

# Load new input data from a CSV file
new_input_data = pd.read_csv('Book1.csv')

# Define the feature columns (should be the same as the ones used in training)
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)', 
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)', 
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]

# Extract features from the new input data
X_new = new_input_data[feature_columns]

# Load the scaler used for standardization
scaler = StandardScaler()

# Assume the scaler has been previously fitted to the training data
# Here we fit the scaler on the combined original training data as an example
# In practice, you should load the already fitted scaler from your training phase
combined_training_data = pd.concat([tikob_fil[feature_columns], gunao_fil[feature_columns]], axis=0)
scaler.fit(combined_training_data)

# Standardize the new input data
X_new_scaled = scaler.transform(X_new)

# Make predictions using the loaded model
predictions = model.predict(X_new_scaled).flatten()

# Output predictions to the console
print('Predictions for new input data:')
print(predictions)


Predictions for new input data:
[1.0504938]


**LEAST ANN**

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
from keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)', 
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)', 
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_combined)

# Train-test split for full dataset
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X_scaled, y_combined, test_size=0.2, random_state=1)

# Define the ANN model function
def create_model(n_layers=1, n_neurons=32, activation='relu', learning_rate=0.01):
    model = Sequential()
    model.add(Dense(n_neurons, input_dim=X_train_full.shape[1], activation=activation))
    for _ in range(n_layers - 1):
        model.add(Dense(n_neurons, activation=activation))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

# Create the KerasRegressor
model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=20, verbose=1)

# Define the parameter grid
param_grid = {
    'n_layers': Integer(1, 10),
    'n_neurons': Integer(32, 256),
    'activation': Categorical(['relu', 'sigmoid']),
    'learning_rate': Real(1e-4, 1e-1, prior='log-uniform')
}

# Create the BayesSearchCV object
opt = BayesSearchCV(estimator=model, search_spaces=param_grid, n_iter=50, cv=3, n_jobs=-1, verbose=1)

# Perform the search
opt.fit(X_train_full, y_train_full)

# Print the best parameters
print('Best parameters found: ', opt.best_params_)

# Evaluate on test set
test_predictions_full = opt.predict(X_test_full).flatten()
test_mse_full = mean_squared_error(y_test_full, test_predictions_full)
test_rmse_full = np.sqrt(test_mse_full)
test_mae_full = mean_absolute_error(y_test_full, test_predictions_full)
test_r2_full = r2_score(y_test_full, test_predictions_full)
test_mape_full = np.mean(np.abs((y_test_full - test_predictions_full) / y_test_full)) * 100

print('Test Results on Full Dataset:')
print('MSE:', test_mse_full)
print('RMSE:', test_rmse_full)
print('MAE:', test_mae_full)
print('R^2:', test_r2_full)
print('MAPE:', test_mape_full, '%')

# Save the best model
opt.best_estimator_.model.save('ANN_BayesSearch1.h5')


  model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=20, verbose=1)


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fi

**GENETICS ANN**

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import kerastuner as kt

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)', 
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)', 
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_combined)

# Train-test split for full dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_combined, test_size=0.2, random_state=1)

# Define the model-building function for keras-tuner
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units1', min_value=32, max_value=512, step=32), input_dim=X_train.shape[1], activation='relu'))
    for i in range(hp.Int('num_layers', 1, 10)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), activation='sigmoid'))
    model.add(Dense(1, activation='linear'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), 
                  loss='mean_squared_error')
    return model

# Instantiate the tuner
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=100,
    factor=3,
    directory='my_dir',
    project_name='ann_opt1'
)

# Perform the hyperparameter search
tuner.search(X_train, y_train, epochs=100, validation_split=0.2, verbose=1)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters
model = tuner.hypermodel.build(best_hps)

# Train the model
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, verbose=1)

# Evaluate on the test set
test_predictions = model.predict(X_test).flatten()
test_mse = mean_squared_error(y_test, test_predictions)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(y_test, test_predictions)
test_r2 = r2_score(y_test, test_predictions)
test_mape = np.mean(np.abs((y_test - test_predictions) / y_test)) * 100

print('Test Results:')
print('MSE:', test_mse)
print('RMSE:', test_rmse)
print('MAE:', test_mae)
print('R^2:', test_r2)
print('MAPE:', test_mape, '%')

model.save('ANN_TUNED.h5')


Reloading Tuner from my_dir\ann_opt1\tuner0.json
Epoch 1/100


  return t[start:end]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [11]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
     -------------------------------------- 129.1/129.1 kB 2.5 MB/s eta 0:00:00
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
import tensorflow as tf

# List all physical devices
physical_devices = tf.config.list_physical_devices()

# Print out the physical devices
print("All Physical Devices:", physical_devices)

# Check if there are any GPUs
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPUs are available:")
    for gpu in gpus:
        print(gpu)
else:
    print("No GPUs available.")


All Physical Devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
No GPUs available.


Collecting tensorflow-gpu==2.12.0
  Using cached tensorflow-gpu-2.12.0.tar.gz (2.6 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting python_version>"3.7"
  Using cached python_version-0.0.2-py2.py3-none-any.whl (3.4 kB)
Building wheels for collected packages: tensorflow-gpu
  Building wheel for tensorflow-gpu (setup.py): started
  Building wheel for tensorflow-gpu (setup.py): finished with status 'error'
  Running setup.py clean for tensorflow-gpu
Failed to build tensorflow-gpu
Installing collected packages: python_version, tensorflow-gpu
  Running setup.py install for tensorflow-gpu: started
  Running setup.py install for tensorflow-gpu: finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  python setup.py bdist_wheel did not run successfully.
  exit code: 1
  
  [18 lines of output]
  Traceback (most recent call last):
    File "<string>", line 2, in <module>
    File "<pip-setuptools-caller>", line 34, in <module>
    File "C:\Users\Admin\AppData\Local\Temp\pip-install-wyomwncf\tensorflow-gpu_f11440cb216e4814b267ec79312f039e\setup.py", line 37, in <module>
  Exception:
  
  The "tensorflow-gpu" package has been removed!
  
  Please install "tensorflow" instead.
  
  Other than the name, the two packages have been identical
  since TensorFlow 2.1, or roughly since Sep 2019. For more
  information, see: pypi.org/project/tensorflow-gpu
  
  
  [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
  ERROR: Failed building wheel for tensorflow-gpu
  error: subprocess-exited-with-error
  
  Running setup.py install for tensorflow-gpu did not run successfully.
  exit code: 1
  
  [18

In [11]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
import tensorflow as tf

# Load the datasets
gunao = pd.read_csv('gunao_surface.csv')
tikob = pd.read_csv('tikub_surface_bottom.csv')

# Filter for surface data in tikob dataset
tikob_surface = tikob[tikob['COLLECTION'] == 'Surface']

# Columns to exclude
columns_to_exclude = ['DATE', 'MONTH', 'YEAR', 'STATION', 'REPLICATE', 'COLLECTION', 'Latitude', 'Longtitude']

# Filter columns for both datasets
tikob_fil = tikob_surface.drop(columns=columns_to_exclude)
gunao_fil = gunao.drop(columns=columns_to_exclude)

# Define feature columns and target column
feature_columns = [
    'pH', 'DO (mg/L)', 'TDS (mg/L)', 'Salinity (ppt)', 'Cond (uS/cm)', 'Temp (°C)', 'TSS (mg/L)', 
    'NO2 (ppm)', 'NO3 (ppm)', 'PO4  (ppm)', 'NH4 (ppm)', 'TN (ppm)', 'TP (ppm)', 'BGA-PC (ug/L)', 
    'Chlorophyll (ug/L)', 'Turbidity (FNU)', 'Coliform (CFU/100ml)', 'Cu (ppm)', 'Fe (ppm)', 
    'Mn(ppm)', 'Zn(ppm)', 'Cr(ppm)', 'Cd(ppm)', 'Hg(ppm)', 'As(ppm)', 'Pb(ppm)'
]
target_column = 'BOD (mg/L)'

# Extract features and target from both datasets
X_tikob = tikob_fil[feature_columns]
y_tikob = tikob_fil[target_column]
X_gunao = gunao_fil[feature_columns]
y_gunao = gunao_fil[target_column]

# Combine the datasets
X_combined = pd.concat([X_tikob, X_gunao], axis=0)
y_combined = pd.concat([y_tikob, y_gunao], axis=0)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_combined)

# Train-test split for full dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_combined, test_size=0.2, random_state=1)

# Define the model-building function for keras-tuner
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units1', min_value=32, max_value=512, step=32), input_dim=X_train.shape[1], activation='relu'))
    for i in range(hp.Int('num_layers', 1, 4)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), activation='sigmoid'))
    model.add(Dense(1, activation='linear'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), 
                  loss='mean_squared_error')
    return model

# Use a distribution strategy for parallelism
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    # Instantiate the tuner
    tuner = kt.Hyperband(
        build_model,
        objective='val_loss',
        max_epochs=100,
        factor=3,
        directory='my_dir',
        project_name='ann_opt2',
        executions_per_trial=2  # Run multiple trials at the same time
    )

    # Perform the hyperparameter search
    tuner.search(X_train, y_train, epochs=100, validation_split=0.2, verbose=1)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters
model = tuner.hypermodel.build(best_hps)

# Train the model
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, verbose=1)

# Evaluate on the test set
test_predictions = model.predict(X_test).flatten()
test_mse = mean_squared_error(y_test, test_predictions)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(y_test, test_predictions)
test_r2 = r2_score(y_test, test_predictions)
test_mape = np.mean(np.abs((y_test - test_predictions) / y_test)) * 100

print('Test Results:')
print('MSE:', test_mse)
print('RMSE:', test_rmse)
print('MAE:', test_mae)
print('R^2:', test_r2)
print('MAPE:', test_mape, '%')

model.save('ANN_TUNED1.h5')


Trial 254 Complete [00h 00m 21s]
val_loss: 0.34654878079891205

Best val_loss So Far: 0.15153104811906815
Total elapsed time: 00h 21m 31s


IndexError: pop from empty list