# Coeficiente de Arrastre con PySR 

In [22]:
## Importando Librerias 
import numpy as np
import pandas as pd
from pysr import PySRRegressor
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score

### Cargando datos

In [4]:
# cargando datos

# importacion de DF
file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_25.txt'
df_cdrag_25 = pd.read_csv(file_path, delimiter=',')

file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_53.txt'
df_cdrag_53 = pd.read_csv(file_path, delimiter=',')

file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_74.txt'
df_cdrag_74 = pd.read_csv(file_path, delimiter=',')

file_path = 'D:\\CODES\\LIB_SR\\Dataset_coeficientes\\df_cdrag_102.txt'
df_cdrag_102 = pd.read_csv(file_path, delimiter=',')

In [5]:
# definiendo conjuntos de train y test

# definiendo conjunto de train
df_cdrag_train = pd.concat([df_cdrag_25, df_cdrag_74], ignore_index=True)

# separando entre x e y
y_train = df_cdrag_train.drop(columns=['Current','K','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido','n_celda','Rem','colIndex'])
X_train = df_cdrag_train.drop(columns=['Current','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido',
                                    'n_celda','colIndex','cdrag'])


# definiendo conjunto de test

y_test = df_cdrag_53.drop(columns=['Current','K','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido','n_celda','Rem','colIndex'])
X_test = df_cdrag_53.drop(columns=['Current','Flujo','t_viento','Diametro','col_fluido','col_celda','n_fluido',
                                    'n_celda','colIndex','cdrag'])


# renombrando
X_train.rename(columns={'Rem':'Re'}, inplace=True)
X_test.rename(columns={'Rem':'Re'}, inplace=True)

### Entrenando Modelo

In [16]:
model = PySRRegressor(
    niterations=20,
    binary_operators=["+", "-", "*", "/", "pow"],
    unary_operators=[],
    population_size=200,
    verbosity=1,
    maxsize=12,
    parsimony=1e-3,
    progress=True
)

model.fit(X_train, y_train)



[ Info: Note: you are running with more than 10,000 datapoints. You should consider turning on batching (`options.batching`), and also if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form.
[ Info: Started!



Expressions evaluated per second: 0.000e+00
Head worker occupation: 0.0%
Progress: 0 / 300 total iterations (0.000%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to stop execution early.

Expressions evaluated per second: 5.220e+01
Head worker occupation: 0.0%
Progress: 1 / 300 total iterations (0.333%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
3           2.420e-01  5.314e+00  y = 1.4701 / K
5           2.853e-02  1.069e+00  y = (K - -0.73896) / K
7           1.974e-02  1.841e-01  y = ((K - -0.44692) / K) * 1.2488
9           1.974e-02  5.126e-05  y = ((K - (K * -0.24826)) - -0.55744) / K
--------------------------------------------------

In [17]:
print(model)

PySRRegressor.equations_ = [
	   pick         score                                           equation  \
	0        0.000000e+00                                          1.9019274   
	1        7.119589e-01                                      2.8730948 - K   
	2        8.915279e-02                      1.8230312 * (K ^ -0.37245628)   
	3        1.033141e-01               (3.00863 - K) + (-4.0316354e-5 * Re)   
	4  >>>>  2.259772e-01  ((-4.4643166e-5 * Re) - -1.9767536) * (K ^ -0....   
	5        7.392998e-07  (((Re - K) * -4.464311e-5) - -1.9767032) * (K ...   
	
	       loss  complexity  
	0  0.090810           1  
	1  0.021864           3  
	2  0.018293           5  
	3  0.014878           7  
	4  0.009468           9  
	5  0.009468          11  
]


In [30]:
best_equation = model.equations_.iloc[4]['equation']
print(best_equation)

((-4.4643166e-5 * Re) - -1.9767536) * (K ^ -0.34891888)


In [20]:
y_pred = model.predict(X_test)

In [21]:
# comparar con modelo de Rafael


def cdrag(S, Re):
    return S**(-0.6) + 5*Re**(-0.23)

def cdrag_gp(S, Re):
    return ((-4.4643166e-5 * Re) - -1.9767536) * (S **( -0.34891888))



In [25]:
# Calcular cdrag_pred y cdrag_gp_pred en vectores separados
cdrag_pred = X_test.apply(lambda row: cdrag(row['K'], row['Re']), axis=1).values
cdrag_gp_pred = X_test.apply(lambda row: cdrag_gp(row['K'], row['Re']), axis=1).values

# Calcular R^2 y MSE para cdrag
r2_cdrag = r2_score(y_test['cdrag'], cdrag_pred)
mse_cdrag = mean_squared_error(y_test['cdrag'], cdrag_pred)

# Calcular MAE y MAPE para cdrag
mae_cdrag = mean_absolute_error(y_test['cdrag'], cdrag_pred)
mape_cdrag = mean_absolute_percentage_error(y_test['cdrag'], cdrag_pred)

# Calcular R^2 y MSE para cdrag_gp
r2_cdrag_gp = r2_score(y_test['cdrag'], cdrag_gp_pred)
mse_cdrag_gp = mean_squared_error(y_test['cdrag'], cdrag_gp_pred)

# Calcular MAE y MAPE para cdrag_gp
mae_cdrag_gp = mean_absolute_error(y_test['cdrag'], cdrag_gp_pred)
mape_cdrag_gp = mean_absolute_percentage_error(y_test['cdrag'], cdrag_gp_pred)

#  Mostrar Metricas 

print(f"R^2 cdrag original: {r2_cdrag}, R^2 cdrag GPLearn: {r2_cdrag_gp} ")
print(f"MSE cdrag original: {mse_cdrag}, MSE cdrag GPLearn: {mse_cdrag_gp} ")
print(f"MAE cdrag original: {mae_cdrag}, MAE cdrag GPLearn: {mae_cdrag_gp} ")
print(f"MAPE cdrag original: {mape_cdrag*100}*100, MAPE cdrag GPLearn: {mape_cdrag_gp*100}*100 ")

R^2 cdrag original: 0.8904926780136748, R^2 cdrag GPLearn: 0.900584838387332 
MSE cdrag original: 0.011588230963563853, MSE cdrag GPLearn: 0.010520263240402195 
MAE cdrag original: 0.07580331737181549, MAE cdrag GPLearn: 0.07454400166450104 
MAPE cdrag original: 3.7448292462961463*100, MAPE cdrag GPLearn: 3.723570512405093*100 
