# Prueba con GPLearn

In [1]:
import pandas as pd
import matplotlib 
from gplearn.genetic import SymbolicRegressor
from gplearn.functions import make_function
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.utils.random import check_random_state
from sklearn.metrics import mean_squared_error, r2_score
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
#import graphviz
import pandas as pd

### Coeficiente de arrastre

In [2]:
# cargando datos

# importacion de DF
file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_25.txt'
df_cdrag_25 = pd.read_csv(file_path, delimiter=',')

file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_53.txt'
df_cdrag_53 = pd.read_csv(file_path, delimiter=',')

file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_74.txt'
df_cdrag_74 = pd.read_csv(file_path, delimiter=',')

file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_102.txt'
df_cdrag_102 = pd.read_csv(file_path, delimiter=',')

In [3]:
# definiendo conjuntos de train y test

# definiendo conjunto de train
df_cdrag_train = pd.concat([df_cdrag_25, df_cdrag_74], ignore_index=True)

# separando entre x e y
y_train = df_cdrag_train.drop(columns=['Current','K','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido','n_celda','Rem','colIndex'])
X_train = df_cdrag_train.drop(columns=['Current','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido',
                                    'n_celda','colIndex','cdrag'])


# definiendo conjunto de test

y_test = df_cdrag_53.drop(columns=['Current','K','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido','n_celda','Rem','colIndex'])
X_test = df_cdrag_53.drop(columns=['Current','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido',
                                    'n_celda','colIndex','cdrag'])

In [4]:
# creando funciones auxiliares

# S ^ (-0.6)
def pot1(x1):
    with np.errstate(divide='ignore', invalid='ignore'):
        result = np.where(x1 > 0, np.power(x1, -0.6), 0)
    return result

pot1_fn = make_function(function=pot1, 
                        name='pot1', 
                        arity=1)

# 5*Re ^ (-0.23)
def pot2(x1):
    with np.errstate(divide='ignore', invalid='ignore'):
        result = np.where(x1 > 0, 5*np.power(x1, -0.23), 0)
    return result

pot2_fn = make_function(function=pot2, 
                        name='pot2', 
                        arity=1)



In [5]:
# entrenando modelo 

# Modelo
model = SymbolicRegressor(population_size=5000,
                           generations=30, 
                           stopping_criteria=0.01,
                           p_crossover=0.7, 
                           p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, 
                           p_point_mutation=0.1,
                           max_samples=0.9, 
                           verbose=1,
                           parsimony_coefficient=0.01, 
                           random_state=123,
                           function_set=['add', 'sub', 'mul', 'div', 'sqrt', 'log',pot1_fn,pot2_fn])

model.fit(X_train, y_train)

# Obtener la expresión simbólica
expression = model._program
print("Expresión:", expression)


  y = column_or_1d(y, warn=True)


    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    13.80      4.44235e+07        5        0.0727931        0.0695565     17.80m
   1     9.11      3.59451e+07        5        0.0723137        0.0738714     11.47m
   2     6.64          25901.6        5        0.0720671        0.0760905     17.76m
   3     6.11          12859.9        9        0.0698535        0.0740827     14.66m
   4     6.26          37314.4        5        0.0718866        0.0777156     14.45m
   5     5.32          22445.9        8        0.0717459        0.0728612     11.61m
   6     5.20          23145.1        5        0.0717727        0.0787408      8.53m
   7     5.17      1.83698e+07        5         0.071877         0.077802     10.05m
   8     5.18          22152.1        5        0.0718211        0.0783049  

In [6]:
# Predecir en los datos de prueba
y_pred = model.predict(X_test)

# Calcular métricas
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R^2:", r2)

MSE: 0.011588230963563853
R^2: 0.8904926780136748


In [7]:
# comparar con modelo de Rafael

def cdrag(S, Re):
    return S**(-0.6) + 5*Re**(-0.23)

def cdrag_gp(S, Re):
    return S**(-0.6) + 5*Re**(-0.23)

In [8]:
X_test['cdrag_pred'] = X_test.apply(lambda row: cdrag(row['K'], row['Rem']), axis=1)
X_test['cdrag_gp_pred'] = X_test.apply(lambda row: cdrag_gp(row['K'], row['Rem']), axis=1)

r2_cdrag = r2_score(y_test['cdrag'], X_test['cdrag_pred'])
mse_cdrag = mean_squared_error(y_test['cdrag'], X_test['cdrag_pred'])

# Calcular R^2 y MSE para cdrag_gp
r2_cdrag_gp = r2_score(y_test['cdrag'], X_test['cdrag_gp_pred'])
mse_cdrag_gp = mean_squared_error(y_test['cdrag'], X_test['cdrag_gp_pred'])

print(f"R^2 cdrag: {r2_cdrag}, MSE cdrag: {mse_cdrag}")
print(f"R^2 cdrag_gp: {r2_cdrag_gp}, MSE cdrag_gp: {mse_cdrag_gp}")

R^2 cdrag: 0.8904926780136748, MSE cdrag: 0.011588230963563853
R^2 cdrag_gp: 0.8904926780136748, MSE cdrag_gp: 0.011588230963563853
