In [12]:
import random
import numpy as np
from deap import base, creator, tools, algorithms
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [14]:
# Load dataset
data = pd.read_csv("dataset_1.csv")  # Replace with your file path

In [16]:
# Feature selection
X = data[["Depth", "WOB", "SURF_RPM", "PHIF", "VSH"]]  # Input features
y = data["ROP_AVG"]  # Target feature

In [18]:
# Normalize/Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [20]:
# Train an XGBoost surrogate model
surrogate_model = XGBRegressor(n_estimators=100, random_state=42)
surrogate_model.fit(X_scaled, y)

In [22]:
# Define the objective function for Genetic Algorithm
def objective_function(individual):
    # Scale the input
    X_new = scaler.transform([individual])
    # Predict ROP_AVG using the XGBoost model
    rop = surrogate_model.predict(X_new)
    return rop[0],  # Return the predicted ROP_AVG (as a tuple for DEAP)


In [24]:
# Define parameter bounds
param_bounds = {
    "Depth": (data["Depth"].min(), data["Depth"].max()),
    "WOB": (data["WOB"].min(), data["WOB"].max()),
    "SURF_RPM": (data["SURF_RPM"].min(), data["SURF_RPM"].max()),
    "PHIF": (data["PHIF"].min(), data["PHIF"].max()),
    "VSH": (data["VSH"].min(), data["VSH"].max())
}

In [26]:
# Set up Genetic Algorithm
creator.create("FitnessMax", base.Fitness, weights=(1.0,))  # Maximize ROP_AVG
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

In [28]:
# Define genes (parameters) with their bounds
for i, (key, (low, high)) in enumerate(param_bounds.items()):
    toolbox.register(f"attr_{i}", random.uniform, low, high)

In [30]:
# Create an individual with 5 genes (Depth, WOB, SURF_RPM, PHIF, VSH)
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_0, toolbox.attr_1, toolbox.attr_2, toolbox.attr_3, toolbox.attr_4), n=1)

In [32]:
# Create a population
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [34]:
# Register the objective function
toolbox.register("evaluate", objective_function)

In [36]:
# Register genetic operators
toolbox.register("mate", tools.cxBlend, alpha=0.5)  # Blend crossover
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)  # Gaussian mutation
toolbox.register("select", tools.selTournament, tournsize=3)  # Tournament selection

In [38]:
# Create initial population
population = toolbox.population(n=50)  # Population size = 50

In [40]:
# Run the Genetic Algorithm
ngen = 40  # Number of generations
cxpb = 0.7  # Crossover probability
mutpb = 0.2  # Mutation probability

In [42]:
# Run the algorithm
final_pop = algorithms.eaSimple(population, toolbox, cxpb, mutpb, ngen, verbose=True)



gen	nevals
0  	50    
1  	32    
2  	42    
3  	35    
4  	38    




5  	42    
6  	41    
7  	43    
8  	36    




9  	39    
10 	36    
11 	41    
12 	39    
13 	36    




14 	43    
15 	36    
16 	42    
17 	35    




18 	32    
19 	38    
20 	34    
21 	33    




22 	40    
23 	43    
24 	35    
25 	41    




26 	37    
27 	40    
28 	39    




29 	43    
30 	34    
31 	35    
32 	43    




33 	33    
34 	38    
35 	35    
36 	35    
37 	33    




38 	39    
39 	40    
40 	40    




In [44]:
# Get the best individual
best_individual = tools.selBest(population, k=1)[0]
best_rop = objective_function(best_individual)[0]



In [46]:
print("Best Individual (Optimized Parameters):", best_individual)
print("Optimized ROP_AVG:", best_rop)

Best Individual (Optimized Parameters): [3786.2959738161917, 19016.657530064545, 2.565001782686284, 5.506333622414232, 4.309113568916649]
Optimized ROP_AVG: 0.008572021


---

The **magnitude of improvement** on ROP_AVG using **Genetic Algorithm** is calculated as follows:

- **Improvement** = Optimized ROP_AVG (GA) - Baseline ROP_AVG  
  \( 0.008572021 - 0.00779683 = 0.000775191 \)

- **Percentage Improvement** = \( \frac{{\text{Improvement}}}{{\text{Baseline ROP_AVG}}} \times 100 \)  
  \( \frac{{0.000775191}}{{0.00779683}} \times 100 \approx 9.95\% \)

A **9.95% improvement** using the **Genetic Algorithm** is also significant, offering potential **cost savings** and **efficiency improvements** in industrial drilling operations.

Again, to ensure the best results, we will compare this with the performance of **other Optimization techniques**.

---