In [37]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import pyswarms as ps
from pyswarms.single.global_best import GlobalBestPSO

In [23]:
# Load dataset
data = pd.read_csv("dataset_1.csv")

In [25]:
# Feature selection
X = data[["Depth", "WOB", "SURF_RPM", "PHIF", "VSH"]]  # Input features
y = data["ROP_AVG"]  # Target feature

In [27]:
# Normalize/Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [29]:
# Train an XGBoost surrogate model
surrogate_model = XGBRegressor(n_estimators=100, random_state=42)
surrogate_model.fit(X_scaled, y)

In [45]:
def objective_function(x):
    X_new = scaler.transform(x)  
    rop = surrogate_model.predict(X_new)
    return -rop 

In [47]:
# Define parameter bounds
lb = [data["Depth"].min(), data["WOB"].min(), data["SURF_RPM"].min(), data["PHIF"].min(), data["VSH"].min()]  # Lower bounds
ub = [data["Depth"].max(), data["WOB"].max(), data["SURF_RPM"].max(), data["PHIF"].max(), data["VSH"].max()]  # Upper bounds

In [49]:
# Set up PSO
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}  # PSO hyperparameters
n_particles = 30  # Number of particles
n_dimensions = 5  # Number of parameters (Depth, WOB, SURF_RPM, PHIF, VSH)

In [51]:
# Initialize PSO
optimizer = GlobalBestPSO(n_particles=n_particles, dimensions=n_dimensions, options=options, bounds=(lb, ub))

In [92]:
# Run PSO
best_rop, best_params = optimizer.optimize(objective_function, iters=100)

2025-02-12 00:03:28,957 - pyswarms.single.global_best - INFO - Optimize for 100 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|█████████████████████████████████████████████████████████|100/100, best_cost=-0.00857
2025-02-12 00:03:29,861 - pyswarms.single.global_best - INFO - Optimization finished | best cost: -0.008572020567953587, best pos: [3.91332961e+03 1.73497942e+04 2.36139079e+00 1.20551758e-01
 3.49183784e-01]


In [104]:
# Convert the best ROP back to positive (since we minimized the negative)
best_rop = -best_rop

In [112]:
# Print results
print("Best Parameters (Depth, WOB, SURF_RPM, PHIF, VSH):", best_params)
print("Optimized ROP_AVG:", -best_rop)

Best Parameters (Depth, WOB, SURF_RPM, PHIF, VSH): [3.91332961e+03 1.73497942e+04 2.36139079e+00 1.20551758e-01
 3.49183784e-01]
Optimized ROP_AVG: 0.008572020567953587


In [121]:
Depth = 3.91332049e+03 
WOB = 1.73498444e+04 
SURF_RPM = 2.60404087e+00  
PHIF = 1.26000152e-01  
VSH= 7.05599683e-01

print(f" Best Parameters (Depth, WOB, SURF_RPM, PHIF, VSH): {Depth}, {WOB}, {SURF_RPM}, {PHIF}, {VSH}")

 Best Parameters (Depth, WOB, SURF_RPM, PHIF, VSH): 3913.32049, 17349.8444, 2.60404087, 0.126000152, 0.705599683


In [69]:
# Evaluate baseline performance on the test set
from sklearn.model_selection import train_test_split

In [71]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [73]:
# Predict ROP_AVG for the test dataset
y_pred_test = surrogate_model.predict(X_test)

In [75]:
# Calculate Baseline ROP_AVG (mean of predictions on the test set)
baseline_rop_avg = np.mean(y_pred_test)

In [77]:
# Print baseline performance
print("Baseline ROP_AVG (Test Set):", baseline_rop_avg)

Baseline ROP_AVG (Test Set): 0.008342685
