In [6]:
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


random.seed(0)
np.random.seed(0)

# Load dataset
X, y = load_diabetes(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Train Random Forest
r = RandomForestRegressor()
r.fit(x_train, y_train)
print(r2_score(y_test, r.predict(x_test)))

0.26865181564422547


In [11]:
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
import random
from deap import base, creator, tools, gp
from sympy import sympify

X, y = load_diabetes(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

def protected_div(a, b):
    return a / b if b != 0 else 1

pset = gp.PrimitiveSet("MAIN", X.shape[1])
pset.addPrimitive(np.add, 2)
pset.addPrimitive(np.subtract, 2)
pset.addPrimitive(np.multiply, 2)
pset.addPrimitive(protected_div, 2)
pset.addPrimitive(np.sin, 1)
pset.addPrimitive(np.cos, 1)
pset.addPrimitive(np.tan, 1)
pset.addEphemeralConstant("rand", lambda: random.uniform(-1, 1))

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=3)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("compile", gp.compile, pset=pset)

def eval_individual(individual):
    func = toolbox.compile(expr=individual)
    
    try:
        new_feature_train = np.array([func(*row) for row in x_train])
        x_train_augmented = np.hstack((x_train, new_feature_train.reshape(-1, 1)))

        new_feature_test = np.array([func(*row) for row in x_test])
        x_test_augmented = np.hstack((x_test, new_feature_test.reshape(-1, 1)))

        model = RandomForestRegressor(random_state=0)
        model.fit(x_train_augmented, y_train)
        predictions = model.predict(x_test_augmented)
        return r2_score(y_test, predictions),
    except Exception as e:
        return -np.inf,  

toolbox.register("evaluate", eval_individual)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)

population = toolbox.population(n=50)
hof = tools.HallOfFame(1)

stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)

from deap import algorithms
population, log = algorithms.eaSimple(
    population, toolbox, cxpb=0.5, mutpb=0.2, ngen=20, stats=stats, halloffame=hof, verbose=True
)

best_individual = hof[0]
print("Best individual (raw):", best_individual)
best_func = toolbox.compile(expr=best_individual)

best_expression = str(best_individual)
print("Best individual (mathematical expression):", sympify(best_expression))

new_feature_train = np.array([best_func(*row) for row in x_train])
x_train_augmented = np.hstack((x_train, new_feature_train.reshape(-1, 1)))

new_feature_test = np.array([best_func(*row) for row in x_test])
x_test_augmented = np.hstack((x_test, new_feature_test.reshape(-1, 1)))

model = RandomForestRegressor(random_state=0)
model.fit(x_train_augmented, y_train)
predictions = model.predict(x_test_augmented)
final_r2 = r2_score(y_test, predictions)

print("Final R^2 with best feature:", final_r2)



gen	nevals	avg     	std       	min    	max     
0  	50    	0.256816	0.00911257	0.23719	0.286963
1  	24    	0.260597	0.00947549	0.226444	0.286963
2  	31    	0.260225	0.0108163 	0.225278	0.280593
3  	30    	0.261585	0.0137027 	0.205938	0.2855  
4  	34    	0.262875	0.0134    	0.227855	0.287499
5  	30    	0.263246	0.0144062 	0.218877	0.287499
6  	30    	0.261349	0.0174305 	0.224526	0.287499
7  	36    	0.265161	0.0176257 	0.218607	0.293688
8  	31    	0.270102	0.0150256 	0.23616 	0.301103
9  	38    	0.26651 	0.0176929 	0.220502	0.300258
10 	29    	0.270609	0.017064  	0.224079	0.300258
11 	33    	0.271385	0.0169711 	0.217566	0.300258
12 	23    	0.276369	0.0155945 	0.231021	0.295937
13 	38    	0.27605 	0.0184236 	0.213377	0.297502
14 	29    	0.277992	0.0172498 	0.22579 	0.305733
15 	27    	0.27804 	0.0173448 	0.235827	0.295937
16 	31    	0.27686 	0.019193  	0.234147	0.297911
17 	36    	0.268344	0.0233197 	0.223012	0.298557
18 	23    	0.280448	0.018659  	0.232718	0.308114
19 	24    	0.282027	0.