In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, StackingRegressor, VotingRegressor
from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from xgboost import XGBRegressor
import pickle 

# Load datasets with specified encoding
merged_df = pd.read_csv('New_Merged.csv')
merged_df.dropna(inplace=True)

# Select relevant features and target
features = ['Temperature', 'Humidity', 'Wind', 'Wind Speed', 'Condition', 'pH (units)', 'Ammonia (mg/L)', 'Nitrate (mg/L)', 'Inorganic Phosphate (mg/L)', 'BOD (mg/l)', 'Dissolved Oxygen (mg/l)', 'Total coliforms (MPN/100ml)']
target = 'Phytoplankton (cells/ml)'

merged_df = merged_df.dropna()

# Split data into training and testing sets
X = merged_df[features]
y = merged_df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Gradient Boosting Machine (XGBoost)
xgb_model = XGBRegressor()
xgb_model.fit(X_train_scaled, y_train)
y_pred_xgb = xgb_model.predict(X_test_scaled)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f'XGBoost - Mean Squared Error: {mse_xgb}')
print(f'XGBoost - Mean Absolute Error: {mae_xgb}')
print(f'XGBoost - R^2 Score: {r2_xgb}')

XGBoost - Mean Squared Error: 3458734844.3228154
XGBoost - Mean Absolute Error: 34150.73689038826
XGBoost - R^2 Score: 0.5542632298490805


In [13]:

# Save the trained model to a file using pickle
with open('xgb_model.pkl', 'wb') as file:
    pickle.dump(xgb_model, file)

print("Model saved to 'xgb_model.pkl'")

# Example: Loading and using the saved model
# Load the model from the file
with open('xgb_model.pkl', 'rb') as file:
    loaded_svr_model = pickle.load(file)

# Predict using the loaded model
y_pred_loaded_model = loaded_svr_model.predict(X_test_scaled)

# Evaluate predictions (if necessary)
mse_loaded_model = mean_squared_error(y_test, y_pred_loaded_model)
mae_loaded_model = mean_absolute_error(y_test, y_pred_loaded_model)
r2_loaded_model = r2_score(y_test, y_pred_loaded_model)

print(f'Loaded Model - Mean Squared Error: {mse_loaded_model}')
print(f'Loaded Model - Mean Absolute Error: {mae_loaded_model}')
print(f'Loaded Model - R^2 Score: {r2_loaded_model}')

Model saved to 'xgb_model.pkl'
Loaded Model - Mean Squared Error: 3458734844.3228154
Loaded Model - Mean Absolute Error: 34150.73689038826
Loaded Model - R^2 Score: 0.5542632298490805


In [17]:
with open('xgb_scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)