Testing Different Models

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from xgboost import XGBRegressor

# Load datasets with specified encoding
merged_df = pd.read_csv('merged_dataset.csv')

# Select relevant features and target
features = ['pH (units)', 'Nitrate (mg/L)', 'Dissolved Oxygen (mg/l)']
target = 'Phytoplankton (cells/ml)'

merged_df = merged_df.dropna()

# Split data into training and testing sets
X = merged_df[features]
y = merged_df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define and compile the ANN model
ann_model = Sequential()
ann_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))  # Input layer
ann_model.add(Dense(32, activation='relu'))  # Hidden layer
ann_model.add(Dense(1))  # Output layer

ann_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the ANN model
history = ann_model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Make predictions and evaluate ANN
y_pred_ann = ann_model.predict(X_test_scaled).flatten()  # Flatten to convert from 2D to 1D array
mse_ann = mean_squared_error(y_test, y_pred_ann)
mae_ann = mean_absolute_error(y_test, y_pred_ann)
r2_ann = r2_score(y_test, y_pred_ann)

print(f'ANN - Mean Squared Error: {mse_ann}')
print(f'ANN - Mean Absolute Error: {mae_ann}')
print(f'ANN - R^2 Score: {r2_ann}')

# Gradient Boosting Machine (XGBoost)
xgb_model = XGBRegressor()
xgb_model.fit(X_train_scaled, y_train)
y_pred_xgb = xgb_model.predict(X_test_scaled)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f'XGBoost - Mean Squared Error: {mse_xgb}')
print(f'XGBoost - Mean Absolute Error: {mae_xgb}')
print(f'XGBoost - R^2 Score: {r2_xgb}')

# Random Forest
rf_model = RandomForestRegressor()
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)
mse_rf = mean_squared_error(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest - Mean Squared Error: {mse_rf}')
print(f'Random Forest - Mean Absolute Error: {mae_rf}')
print(f'Random Forest - R^2 Score: {r2_rf}')

# Elastic Net
en_model = ElasticNet()
en_model.fit(X_train_scaled, y_train)
y_pred_en = en_model.predict(X_test_scaled)
mse_en = mean_squared_error(y_test, y_pred_en)
mae_en = mean_absolute_error(y_test, y_pred_en)
r2_en = r2_score(y_test, y_pred_en)

print(f'Elastic Net - Mean Squared Error: {mse_en}')
print(f'Elastic Net - Mean Absolute Error: {mae_en}')
print(f'Elastic Net - R^2 Score: {r2_en}')

# K-Nearest Neighbors (KNN)
knn_model = KNeighborsRegressor()
knn_model.fit(X_train_scaled, y_train)
y_pred_knn = knn_model.predict(X_test_scaled)
mse_knn = mean_squared_error(y_test, y_pred_knn)
mae_knn = mean_absolute_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print(f'KNN - Mean Squared Error: {mse_knn}')
print(f'KNN - Mean Absolute Error: {mae_knn}')
print(f'KNN - R^2 Score: {r2_knn}')

# Polynomial Regression
poly_features = PolynomialFeatures(degree=2)  # Adjust the degree as needed
poly_pipeline = Pipeline([
    ('poly_features', poly_features),
    ('lin_reg', LinearRegression())
])
poly_pipeline.fit(X_train_scaled, y_train)
y_pred_poly = poly_pipeline.predict(X_test_scaled)
mse_poly = mean_squared_error(y_test, y_pred_poly)
mae_poly = mean_absolute_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print(f'Polynomial Regression - Mean Squared Error: {mse_poly}')
print(f'Polynomial Regression - Mean Absolute Error: {mae_poly}')
print(f'Polynomial Regression - R^2 Score: {r2_poly}')

# Lasso Regression
lasso_model = Lasso(alpha=0.1)  # Adjust alpha as needed
lasso_model.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_model.predict(X_test_scaled)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f'Lasso Regression - Mean Squared Error: {mse_lasso}')
print(f'Lasso Regression - Mean Absolute Error: {mae_lasso}')
print(f'Lasso Regression - R^2 Score: {r2_lasso}')

# Ridge Regression
ridge_model = Ridge(alpha=0.1)  # Adjust alpha as needed
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f'Ridge Regression - Mean Squared Error: {mse_ridge}')
print(f'Ridge Regression - Mean Absolute Error: {mae_ridge}')
print(f'Ridge Regression - R^2 Score: {r2_ridge}')

# Stacking Regressor
base_models = [
    ('rf', RandomForestRegressor()),
    ('xgb', XGBRegressor())
]
stacked_model = StackingRegressor(estimators=base_models, final_estimator=LinearRegression())
stacked_model.fit(X_train_scaled, y_train)
y_pred_stacked = stacked_model.predict(X_test_scaled)
mse_stacked = mean_squared_error(y_test, y_pred_stacked)
mae_stacked = mean_absolute_error(y_test, y_pred_stacked)
r2_stacked = r2_score(y_test, y_pred_stacked)

print(f'Stacking Regressor - Mean Squared Error: {mse_stacked}')
print(f'Stacking Regressor - Mean Absolute Error: {mae_stacked}')
print(f'Stacking Regressor - R^2 Score: {r2_stacked}')


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 14635640832.0000 - val_loss: 10775567360.0000
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 10809259008.0000 - val_loss: 10775544832.0000
Epoch 3/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 17688870912.0000 - val_loss: 10775519232.0000
Epoch 4/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 20033884160.0000 - val_loss: 10775491584.0000
Epoch 5/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 10468716544.0000 - val_loss: 10775459840.0000
Epoch 6/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15074183168.0000 - val_loss: 10775416832.0000
Epoch 7/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 13356825600.0000 - val_loss: 10775365632.0000
Epoch 8/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0