In [138]:
import numpy as np

In [155]:
# Define the fitness function to be optimized
def fitness_function(x):
    return x ** 2 

In [156]:
# Genetic Algorithm
def genetic_algorithm(pop_size, num_generations, mutation_rate):
    # Initialize a random population of solutions
    population = np.random.uniform(-1, 1, size=(pop_size,))
    
    for generation in range(num_generations):
        # Evaluate fitness for each solution in the population
        fitness = np.array([fitness_function(x) for x in population])
        
        # Select parents for reproduction based on fitness
        parents = np.random.choice(population, size=pop_size // 2, p=fitness / np.sum(fitness))
        
        # Perform crossover to create offspring
        offspring = []
        for _ in range(pop_size // 2):
            parent1, parent2 = np.random.choice(parents, size=2, replace=False)
            crossover_point = np.random.randint(1, 2)  # Use a fixed crossover point since parents are scalars
            child = (parent1 + parent2) / 2  # Simple average crossover
            offspring.append(child)
        
        # Perform mutation on offspring
        for i in range(pop_size // 3):
            if np.random.rand() < mutation_rate:
                offspring[i] = np.random.uniform(-10, 10)
        
        # Replace old population with new offspring
        population[:pop_size // 2] = parents
        population[pop_size // 2:] = offspring
        
        # Display the best solution in this generation
        best_fitness = fitness.min()
        best_solution = population[np.argmax(fitness)]
        print(f"Generation {generation+1}: Best Fitness = {best_fitness:.4f}, Best Solution = {best_solution:.4f}")
    
    return best_solution

In [159]:
# Parameters
population_size = 200
generations = 200
mutation_rate = 0.9

In [160]:
# Run the genetic algorithm
best_solution = genetic_algorithm(population_size, generations, mutation_rate)
print("Optimal Solution:", best_solution)

Generation 1: Best Fitness = 0.0000, Best Solution = -4.5353
Generation 2: Best Fitness = 0.0002, Best Solution = 6.9609
Generation 3: Best Fitness = 0.0000, Best Solution = 9.3599
Generation 4: Best Fitness = 0.0000, Best Solution = -9.7452
Generation 5: Best Fitness = 0.0000, Best Solution = -3.4433
Generation 6: Best Fitness = 0.0000, Best Solution = 9.4158
Generation 7: Best Fitness = 0.0014, Best Solution = -8.6009
Generation 8: Best Fitness = 0.0004, Best Solution = 9.6375
Generation 9: Best Fitness = 0.0046, Best Solution = -1.9771
Generation 10: Best Fitness = 0.0029, Best Solution = -8.6419
Generation 11: Best Fitness = 0.0000, Best Solution = -4.6316
Generation 12: Best Fitness = 0.0000, Best Solution = -8.8688
Generation 13: Best Fitness = 0.0041, Best Solution = -9.4647
Generation 14: Best Fitness = 0.0001, Best Solution = 8.7044
Generation 15: Best Fitness = 0.0004, Best Solution = -5.4551
Generation 16: Best Fitness = 0.0001, Best Solution = 9.4447
Generation 17: Best Fit

Generation 166: Best Fitness = 0.0000, Best Solution = -8.1629
Generation 167: Best Fitness = 0.0013, Best Solution = 9.8655
Generation 168: Best Fitness = 0.0001, Best Solution = -8.5600
Generation 169: Best Fitness = 0.0001, Best Solution = 7.6836
Generation 170: Best Fitness = 0.0030, Best Solution = -9.3295
Generation 171: Best Fitness = 0.0003, Best Solution = -7.9239
Generation 172: Best Fitness = 0.0023, Best Solution = -9.4175
Generation 173: Best Fitness = 0.0042, Best Solution = -9.3748
Generation 174: Best Fitness = 0.0020, Best Solution = 2.3426
Generation 175: Best Fitness = 0.0038, Best Solution = -9.3748
Generation 176: Best Fitness = 0.0009, Best Solution = 0.2201
Generation 177: Best Fitness = 0.0008, Best Solution = -9.0453
Generation 178: Best Fitness = 0.0002, Best Solution = -8.5437
Generation 179: Best Fitness = 0.0007, Best Solution = 8.8162
Generation 180: Best Fitness = 0.0034, Best Solution = -5.1306
Generation 181: Best Fitness = 0.0004, Best Solution = -6.57

In [44]:
from sklearn.svm import SVR #Regression Model
import pandas as pd # For data wrangling
from sklearn.model_selection import train_test_split # For data splitting
from sklearn.preprocessing import StandardScaler # For normal distribution of the data 
from sklearn.metrics import mean_squared_error,r2_score # For model evaluation
import numpy as np # for mathematical manipulation

In [133]:
data=pd.read_excel(r"C:\Users\USER\Desktop\Biodiesel Yield\data 1.xlsx")
data.head()

Unnamed: 0,X1,X2,X3,X4,Y
0,80,5,105,10.5,51.68
1,60,3,105,10.5,59.89
2,60,3,105,10.5,61.09
3,40,3,60,10.5,49.56
4,40,3,150,10.5,58.02


In [85]:
X=data.drop(columns="Y")
y=data["Y"]

In [213]:
X1=X
y1=y.values.reshape(-1,1)
X_train1, X_test1, y_train1, y_test1= train_test_split(X1, y1, test_size=0.2, random_state=8)

sc=StandardScaler() # For normal distribution of data
X_train1=sc.fit_transform(X_train1)
X_test1=sc.transform(X_test1)
x_df=sc.transform(X1)

sc_y=StandardScaler()
y_train1=sc_y.fit_transform(y_train1)
y_test1=sc_y.transform(y_test1)
y_df=sc_y.transform(y1)

print(X_train1.shape)
print(y_train1.shape)

(23, 4)
(23, 1)


In [196]:
# Define the SVR fitness function
def svr_fitness(solution):
    kernel_encoded = int(solution[0])
    c = solution[1]
    gamma = solution[2]
    epsilon = solution[3]
    
    kernel_decoded = ['linear', 'poly', 'rbf', 'precomputed'][kernel_encoded]
    
    svr = SVR(C=c, kernel=kernel_decoded, gamma=gamma, epsilon=epsilon)
    svr.fit(X_train1, y_train1.ravel())
    y_predict = svr.predict(X_test1)
    fitness = -r2_score(y_test1, y_predict)  # Minimize negative R^2 score
    return fitness

In [197]:
# Define the hyperparameter bounds
param_bounds = {
    "kernel": [0, 1, 2, 3],  # 0: linear, 1: poly, 2: rbf, 3: precomputed
    "C": list(range(100, 1001, 100)),
    "gamma": list(np.logspace(-10, 0, 10)),
    "epsilon": list(np.logspace(-3, 0, 10))
}

In [198]:
lb = [min(param_bounds["kernel"]), min(param_bounds["C"]), min(param_bounds["gamma"]), min(param_bounds["epsilon"])]
ub = [max(param_bounds["kernel"]), max(param_bounds["C"]), max(param_bounds["gamma"]), max(param_bounds["epsilon"])]

In [199]:
def genetic_algorithm(pop_size, num_generations, mutation_rate):
    # Initialize population
    population = np.random.uniform(low=lb, high=ub, size=(pop_size, len(lb)))

    # Main loop
    for generation in range(num_generations):
        # Evaluate fitness
        fitness = np.array([svr_fitness(solution) for solution in population])
        
        # Find best solution in this generation
        best_index = np.argmax(fitness)
        best_fitness = fitness[best_index]
        
        print(f"Generation {generation + 1}: Best Fitness = {best_fitness:.4f}")
        
        # Select indices for parents based on fitness
        parent_indices = np.argsort(fitness)[:pop_size // 2]
        
        # Get parents from the population array
        parents = population[parent_indices]

        # Create offspring through crossover and mutation
        offspring = []
        for _ in range(pop_size // 2):
            parent1, parent2 = np.random.choice(parent_indices, size=2, replace=False)
            parent1 = population[parent1]
            parent2 = population[parent2]
            crossover_point = np.random.randint(1, len(parent1) - 1)
            child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
            offspring.append(child)

        offspring = np.array(offspring)

        # Apply mutation
        mask = np.random.random(size=offspring.shape) < mutation_rate
        mutation = np.random.uniform(low=lb, high=ub, size=offspring.shape)
        offspring[mask] = mutation[mask]

        # Combine parents and offspring to form next generation
        population = np.vstack((parents, offspring))
    
    # Find best solution overall
    best_index = np.argmax(fitness)
    best_solution = population[best_index]
    
    return best_solution

In [200]:
# Hyperparameters for the genetic algorithm
population_size = 100
generations = 100
mutation_rate = 0.05

In [201]:
# Run the genetic algorithm
best_solution = genetic_algorithm(population_size, generations, mutation_rate)
best_fitness = svr_fitness(best_solution)
print("Best Fitness:", best_fitness)
print("Optimal Solution:", best_solution)

Generation 1: Best Fitness = 2.7615
Generation 2: Best Fitness = 0.0338
Generation 3: Best Fitness = 0.0013
Generation 4: Best Fitness = 0.0038
Generation 5: Best Fitness = -0.0669
Generation 6: Best Fitness = -0.0272
Generation 7: Best Fitness = -0.3003
Generation 8: Best Fitness = -0.3489
Generation 9: Best Fitness = -0.2821
Generation 10: Best Fitness = 2.3147
Generation 11: Best Fitness = -0.0183
Generation 12: Best Fitness = 0.2640
Generation 13: Best Fitness = 0.0148
Generation 14: Best Fitness = 0.2640
Generation 15: Best Fitness = -0.7715
Generation 16: Best Fitness = 2.1985
Generation 17: Best Fitness = -0.0400
Generation 18: Best Fitness = -0.0971
Generation 19: Best Fitness = 2.1985
Generation 20: Best Fitness = -0.0862
Generation 21: Best Fitness = -0.5968
Generation 22: Best Fitness = 2.3147
Generation 23: Best Fitness = 1.9647
Generation 24: Best Fitness = -0.0665
Generation 25: Best Fitness = 1.9647
Generation 26: Best Fitness = 1.9647
Generation 27: Best Fitness = 1.964

In [202]:
best_kernel_encoded = int(best_solution[0])
best_c = best_solution[1]
best_gamma = best_solution[2]
best_epsilon = best_solution[3]
best_kernel_decoded = ['linear', 'poly', 'rbf', 'precomputed'][best_kernel_encoded]

In [203]:
print("Best Hyperparameters using Genetic Algorithm:")
print("Kernel:", best_kernel_decoded)
print("C:", best_c)
print("Gamma:", best_gamma)
print("Epsilon:", best_epsilon)

Best Hyperparameters using Genetic Algorithm:
Kernel: rbf
C: 170.37683617147707
Gamma: 0.017923420520929097
Epsilon: 0.03995924011626218


In [214]:
svr_optimized = SVR(kernel=best_kernel_decoded,C=best_c, gamma =best_gamma,epsilon = best_epsilon)

In [215]:
svr_optimized.fit(X_train1, y_train1.ravel())

In [216]:
pred_test = svr_optimized.predict(X_test1)

In [217]:
from sklearn.metrics import r2_score
print("R^2 score of Test set prediction using GA:", r2_score(y_test1,pred_test))

R^2 score of Test set prediction using GA: 0.9787141115745573


In [232]:
model = svr_optimized.fit(X, y)

In [233]:
# Create a function to be optimized by the algorithms
def obj_fun(X):
    results = model.predict(X.reshape(1, -1))  # Reshape X to match the number of features
    return -results  # Negative to maximize the prediction (Yield)

In [5]:
def genetic_algorithm(pop_size, num_generations, mutation_rate):
    # Initialize population
    population = np.random.uniform(low=lb, high=ub, size=(pop_size, len(lb)))

    # Main loop
    for generation in range(num_generations):
        # Evaluate fitness
        fitness = np.array([obj_fun(solution) for solution in population])
        
        # Find best solution in this generation
        best_index = np.argmax(fitness)
        best_fitness = fitness[best_index]
        
        print(f"Generation {generation + 1}: Best Fitness = {best_fitness:.4f}")
        
        # Select indices for parents based on fitness
        parent_indices = np.argsort(fitness)[:pop_size // 2].ravel()
        
        # Get parents from the population array
        parents = population[parent_indices]
        print("parent_indices shape:", parent_indices.shape)

        # Create offspring through crossover and mutation
        offspring = []
        for _ in range(pop_size // 2):
            parent1, parent2 = np.random.choice(parent_indices, size=2, replace=False)
            parent1 = population[parent1]
            parent2 = population[parent2]
            crossover_point = np.random.randint(1, len(parent1) - 1)
            child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
            offspring.append(child)

        offspring = np.array(offspring)

        # Apply mutation
        mask = np.random.random(size=offspring.shape) < mutation_rate
        mutation = np.random.uniform(low=lb, high=ub, size=offspring.shape)
        offspring[mask] = mutation[mask]

        # Combine parents and offspring to form next generation
        population = np.vstack((parents, offspring))
    
    # Find best solution overall
    best_index = np.argmax(fitness)
    best_solution = population[best_index]
    
    return best_solution


In [6]:
lb = [data['X1'].min(), data['X2'].min(),data['X3'].min(), data['X4'].min()]
ub = [data['X1'].max(), data['X2'].max(),data['X3'].max(), data['X4'].max()]


NameError: name 'data' is not defined

In [3]:
# Hyperparameters for the genetic algorithm
population_size = 200
generations = 100
mutation_rate = 0.05

In [4]:
# Run the genetic algorithm
op_best_solution = genetic_algorithm(population_size, generations, mutation_rate)
op_best_fitness = obj_fun(op_best_solution)
print("Best Fitness:", op_best_fitness)
print("Optimal Solution:", op_best_solution)

NameError: name 'np' is not defined

In [226]:
print("Best Solution:")
print("X1:", op_best_solution[0])
print("X2:", op_best_solution[1])
print("X3:", op_best_solution[2])
print("X4:", op_best_solution[3])
print("Best Fitness:", -op_best_fitness)  # Convert back to positive as it was negated for maximization

Best Solution:
X1: 2.0012563124190677
X2: 894.0842180036801
X3: 0.016766258983957918
X4: 0.04230525251466458
Best Fitness: [58.91492847]
