In [68]:
from sklearn.ensemble import RandomForestRegressor  #Regression Model
import pandas as pd # For data wrangling
from sklearn.model_selection import train_test_split # For data splitting
from sklearn.metrics import mean_squared_error,r2_score # For model evaluation
import numpy as np # for mathematical manipulation
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DataConversionWarning)

In [119]:
data=pd.read_excel(r"C:\Users\USER\Desktop\Biodiesel Yield\data 1.xlsx")
data.head()

Unnamed: 0,X1,X2,X3,X4,Y
0,80,5,105,10.5,51.68
1,60,3,105,10.5,59.89
2,60,3,105,10.5,61.09
3,40,3,60,10.5,49.56
4,40,3,150,10.5,58.02


In [120]:
X=data.drop(columns="Y")
y=data["Y"]

In [121]:
X1=X.values
y1=y.values.reshape(-1,1)
X_train1, X_test1, y_train1, y_test1= train_test_split(X1, y1, test_size=0.2, random_state=8)

print(X_train1.shape)
print(y_train1.shape)

(23, 4)
(23, 1)


In [122]:
# Define the RF fitness function
def ran_forest_fitness(solution):
    n_estimators = int(solution[0])
    max_features = int(solution[1])
    min_samples_leaf = int(solution[2])
    max_depth = int(solution[3])
        
    rf = RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, 
                               min_samples_leaf=min_samples_leaf, max_depth=max_depth)
    rf.fit(X_train1, y_train1.ravel())
    y_predict = rf.predict(X_test1)
    fitness = -r2_score(y_test1, y_predict)  # Minimize negative R^2 score
    return fitness

In [123]:
# Define the hyperparameter bounds
param_bounds = {
    "n_estimators": list(range(100,501,100)),  
    "max_features": [1, 2, 3, 4],
    "min_samples_leaf": [1,2,3],
    "max_depth":  list(range(1, 6))
}
param_bounds

{'n_estimators': [100, 200, 300, 400, 500],
 'max_features': [1, 2, 3, 4],
 'min_samples_leaf': [1, 2, 3],
 'max_depth': [1, 2, 3, 4, 5]}

In [124]:
lb = [min(param_bounds["n_estimators"]), min(param_bounds["max_features"]), 
      min(param_bounds["min_samples_leaf"]), min(param_bounds["max_depth"])]

ub = [max(param_bounds["n_estimators"]), max(param_bounds["max_features"]), 
      max(param_bounds["min_samples_leaf"]), max(param_bounds["max_depth"])]

In [125]:
def genetic_algorithm(pop_size, num_generations, mutation_rate):
    # Initialize population
    population = np.random.uniform(low=lb, high=ub, size=(pop_size, len(lb)))
    
    # Main loop
    for generation in range(num_generations):
        # Evaluate fitness
        fitness = np.array([ran_forest_fitness(solution) for solution in population])
        
        # Find best solution in this generation
        best_index = np.argmax(fitness)
        best_fitness = fitness[best_index]
        
        print(f"Generation {generation + 1}: Best Fitness = {best_fitness:.4f}")
        
        # Select indices for parents based on fitness
        parent_indices = np.argsort(fitness)[:pop_size // 2]
        
        # Get parents from the population array
        parents = population[parent_indices]

        # Create offspring through crossover and mutation
        offspring = []
        for _ in range(pop_size // 2):
            parent1, parent2 = np.random.choice(parent_indices, size=2, replace=False)
            parent1 = population[parent1]
            parent2 = population[parent2]
            crossover_point = np.random.randint(1, len(parent1) - 1)
            child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
            offspring.append(child)

        offspring = np.array(offspring)

        # Apply mutation
        mask = np.random.random(size=offspring.shape) < mutation_rate
        mutation = np.random.uniform(low=lb, high=ub, size=offspring.shape)
        offspring[mask] = mutation[mask]

        # Combine parents and offspring to form next generation
        population = np.vstack((parents, offspring))
    
    # Find best solution overall
    best_index = np.argmax(fitness)
    best_solution = population[best_index]
    
    return best_solution

In [126]:
# Hyperparameters for the genetic algorithm
population_size = 100
generations = 100
mutation_rate = 0.05

In [127]:
# Run the genetic algorithm
best_solution = genetic_algorithm(population_size, generations, mutation_rate)
best_fitness = ran_forest_fitness(best_solution)
print("Best Fitness:", best_fitness)
print("Optimal Solution:", best_solution)

Generation 1: Best Fitness = -0.4567
Generation 2: Best Fitness = -0.7183
Generation 3: Best Fitness = -0.8177
Generation 4: Best Fitness = -0.6849
Generation 5: Best Fitness = -0.8236
Generation 6: Best Fitness = -0.7604
Generation 7: Best Fitness = -0.6349
Generation 8: Best Fitness = -0.7836
Generation 9: Best Fitness = -0.7847
Generation 10: Best Fitness = -0.7622
Generation 11: Best Fitness = -0.6739
Generation 12: Best Fitness = -0.8510
Generation 13: Best Fitness = -0.6649
Generation 14: Best Fitness = -0.6335
Generation 15: Best Fitness = -0.6717
Generation 16: Best Fitness = -0.8346
Generation 17: Best Fitness = -0.8196
Generation 18: Best Fitness = -0.6698
Generation 19: Best Fitness = -0.6808
Generation 20: Best Fitness = -0.7197
Generation 21: Best Fitness = -0.6651
Generation 22: Best Fitness = -0.8321
Generation 23: Best Fitness = -0.7270
Generation 24: Best Fitness = -0.7790
Generation 25: Best Fitness = -0.7489
Generation 26: Best Fitness = -0.7132
Generation 27: Best F

In [12]:
best_num_estimator = int(best_solution[0])
best_max_features = int(best_solution[1])
best_max_sample_leaf = int(best_solution[2])
best_max_depth = int(best_solution[3])

In [14]:
print("Best Hyperparameters using Genetic Algorithm:")
print("n_estimators:", best_num_estimator)
print("max_features:", best_max_features)
print("max_samples_leaf:", best_max_sample_leaf)
print("max_depth:", best_max_depth)

Best Hyperparameters using Genetic Algorithm:
n_estimators: 418
max_features: 3
max_samples_leaf: 1
max_depth: 3


In [27]:
rf_optimized = RandomForestRegressor(n_estimators=best_num_estimator, 
                                     max_features=best_max_features, 
                                     min_samples_leaf=best_max_sample_leaf,
                                     max_depth=best_max_depth)

In [32]:
rf_optimized.fit(X_train1, y_train1.ravel())

In [33]:
pred_test = rf_optimized.predict(X_test1)

In [116]:
from sklearn.metrics import r2_score
print("R^2 score of Test set prediction using GA:", r2_score(y_test1,pred_test))

R^2 score of Test set prediction using GA: 0.8955155506798935


In [117]:
pred_train = rf_optimized.predict(X_train1)
print("R^2 score of Train set prediction using GA:", r2_score(y_train1,pred_train))

R^2 score of Train set prediction using GA: 0.822749219941723


In [105]:
model = rf_optimized.fit(X, y)

In [106]:
# Create a function to be optimized by the algorithms
def obj_fun(X):
    results = model.predict(X.reshape(1, -1))  # Reshape X to match the number of features
    return -results  # Negative to maximize the prediction (Yield)

In [107]:
def genetic_algorithm(pop_size, num_generations, mutation_rate):
    # Initialize population
    population = np.random.uniform(low=lb, high=ub, size=(pop_size, len(lb)))
    
    # Main loop
    for generation in range(num_generations):
        # Evaluate fitness
        fitness = np.array([obj_fun(solution) for solution in population])
        
        # Find best solution in this generation
        best_index = np.argmax(fitness)
        best_fitness = fitness[best_index]
        
        print(f"Generation {generation + 1}: Best Fitness = {best_fitness[0]:.4f}")
        
        # Select indices for parents based on fitness
        parent_indices = np.argsort(fitness)[:pop_size // 2].ravel()
        
        # Get parents from the population array
        parents = population[parent_indices]
        
        # Create offspring through crossover and mutation
        offspring = []
        for _ in range(pop_size // 2):
            parent1 = np.random.choice(parent_indices, size=1, replace=False)[0]
            parent2 = np.random.choice(parent_indices, size=1, replace=False)[0]
            parent1 = population[parent1]
            parent2 = population[parent2]
            crossover_point = np.random.randint(1, len(parent1) - 1)
            child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
            offspring.append(child)

        offspring = np.array(offspring)

        # Apply mutation
        mask = np.random.random(size=offspring.shape) < mutation_rate
        mutation = np.random.uniform(low=lb, high=ub, size=offspring.shape)
        offspring[mask] = mutation[mask]

        # Combine parents and offspring to form next generation
        population = np.vstack((parents, offspring))
    
    # Find best solution overall
    best_index = np.argmax(fitness)
    best_solution = population[best_index]
    
    return best_solution

In [108]:
lb = [data['X1'].min(), data['X2'].min(),data['X3'].min(), data['X4'].min()]
ub = [data['X1'].max(), data['X2'].max(),data['X3'].max(), data['X4'].max()]
lb,ub

([40, 1, 60, 6.0], [80, 5, 150, 15.0])

In [112]:
# Hyperparameters for the genetic algorithm
population_size = 100
generations = 100
mutation_rate = 0.05

In [113]:
# Sort the DataFrame by the specified column in descending order
sorted_data = data.sort_values(by='Y', ascending=False)
# Select the first row (which will have the maximum value)
max_row = sorted_data.iloc[0]
max_row

X1     60.00
X2      3.00
X3    150.00
X4      6.00
Y      73.14
Name: 10, dtype: float64

In [114]:
# Run the genetic algorithm
op_best_solution = genetic_algorithm(population_size, generations, mutation_rate)
op_best_fitness = obj_fun(op_best_solution)
print("Best Fitness:", op_best_fitness)
print("Optimal Solution:", op_best_solution)

Generation 1: Best Fitness = -43.7289
Generation 2: Best Fitness = -43.7753
Generation 3: Best Fitness = -45.7482
Generation 4: Best Fitness = -44.8461
Generation 5: Best Fitness = -45.7482
Generation 6: Best Fitness = -43.7753
Generation 7: Best Fitness = -43.7753
Generation 8: Best Fitness = -45.7482
Generation 9: Best Fitness = -45.7482
Generation 10: Best Fitness = -45.6869
Generation 11: Best Fitness = -45.6869
Generation 12: Best Fitness = -45.7482
Generation 13: Best Fitness = -45.7482
Generation 14: Best Fitness = -45.7482
Generation 15: Best Fitness = -44.8461
Generation 16: Best Fitness = -44.8461
Generation 17: Best Fitness = -45.7482
Generation 18: Best Fitness = -43.7753
Generation 19: Best Fitness = -43.7753
Generation 20: Best Fitness = -43.7753
Generation 21: Best Fitness = -43.7753
Generation 22: Best Fitness = -45.6869
Generation 23: Best Fitness = -44.8461
Generation 24: Best Fitness = -45.6869
Generation 25: Best Fitness = -45.7482
Generation 26: Best Fitness = -45.

In [104]:
print("Best Solution:")
print("X1:", op_best_solution[0])
print("X2:", op_best_solution[1])
print("X3:", op_best_solution[2])
print("X4:", op_best_solution[3])
print("Best Fitness(Y):", -op_best_fitness)  # Convert back to positive as it was negated for maximization

Best Solution:
X1: 65.99529341672861
X2: 2.0358318246588905
X3: 130.71732522072298
X4: 12.279217713594019
Best Fitness(Y): [60.32652172]
