In [34]:
from sklearn.tree import DecisionTreeRegressor  
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential  
from keras.layers import Dense
import numpy as np
import pandas as pd
#from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import r2_score
from pandas import DataFrame
from sklearn.preprocessing import OrdinalEncoder
from pandas.api.types import is_string_dtype

In [None]:
# De los csv solo deben convertirse las variables categóricas (texto) a numéricas, usando el método que considere más adecuado.
np.random.seed(357823)
arbol = DecisionTreeRegressor(max_depth=3)  # Evitar sobreajuste con árboles pequeños 

In [None]:
def crear_red(input_shape):  
    model = Sequential([  
        Dense(16, activation='relu', input_shape=(input_shape,)),  
        Dense(1)  # Salida lineal para regresión  
    ])  
    model.compile(optimizer='adam', loss='mse')  
    return model  

# Envolver la red en un estimador Scikit-learn  
red_neuronal = KerasRegressor(  
    build_fn=lambda: crear_red(X_train.shape[1]),  
    epochs=50,  
    batch_size=32,  
    verbose=0  
)  

In [None]:
'''Inicializar la primera predicción con pred0
 2. predactual = pred0
 3. Por cada i en n_estimators:
 1. residuoi = y - predactual
 2. entrenar estimadori usando residuoi como variable objetivo
 3. obtener las predicciones, predi, de estimadori
 4. predactual = predactual  + predi*lr
 4. Devolver el conjunto de modelos entrenados
 '''

In [None]:

def transfrom_csv(csv:str) -> DataFrame:
    data = pd.read_csv(csv)
    atributos_discretos = []
    atributos_continuos = []
    codificador_atributos_discretos = OrdinalEncoder()
    for nombre_columna in data.columns:
        if (is_string_dtype(data[nombre_columna])):
            atributos_discretos.append(nombre_columna)
        else:
            atributos_continuos.append(nombre_columna)
    atributos = data.iloc[:, 0:-1]
    codificador_atributos_discretos.fit(atributos[atributos_discretos])
    atributos[atributos_discretos] = codificador_atributos_discretos.transform(atributos[atributos_discretos])
    return (atributos,data.iloc[:, -1])
    


class EnsembleSecuencial(BaseEstimator, RegressorMixin):  
    def __init__(self, trainingModel, csv:str, n_estimators:int = 15, lr:float = 0.01, sample_size:float = 0.75) -> None:  
        self.modelos = []
        self.n_estimators = n_estimators
        self.trainingModel = trainingModel  
        self.lr = lr  
        self.sample_size = sample_size  
        self.originalData = transfrom_csv(csv)[0]
        self.trainingData = []
        self.evaluationData = []
        self.individualObjective = transfrom_csv(csv)[1]


    def fit(self):  
        pred_actual = np.mean(self.individualObjective) * np.ones_like(self.individualObjective)  # Predicción inicial  
        for i in range(self.n_estimators):
            # 0. Poner csv de entrenamiento
            self.trainingData = self.originalData.sample(frac=self.sample_size) #random_state=42
            # 1. Calcular residuos  
            residuos = self.individualObjective - pred_actual  
            # 2. Muestreo aleatorio  
            idx = np.random.choice(len(self.trainingData), int(len(self.trainingData) * self.sample_size), replace=False)  
            X_muestra, residuo_muestra = self.trainingData[idx], residuos[idx]  
            # 3. Entrenar modelo base  
            self.modelo.fit(X_muestra, residuo_muestra)  
            # 4. Actualizar predicciones  
            pred_actual += self.lr * self.modelo.predict(self.originalData)  
            # 5. Guardar modelo  
            self.modelos.append(self.modelo)  

    def predict(self, X):  
        pred = np.zeros(len(X))  
        for modelo in self.modelos:  
            pred += self.lr * modelo.predict(X)  
        return pred  

In [75]:
transfrom_csv('csv/house_prices.csv')[0]

Unnamed: 0,GarageCars,Condition2,YearBuilt,GarageYrBlt,LandContour,LowQualFinSF,HouseStyle,GarageType,MSSubClass,WoodDeckSF,FireplaceQu,BsmtFinSF2,Alley,MSZoning,OverallCond,EnclosedPorch,CentralAir,LotFrontage,GarageCond,Exterior2nd,GarageArea,BsmtFinType2,GarageQual,ExterQual,PavedDrive,LotShape,KitchenQual,SaleType,MiscVal,BsmtExposure,OpenPorchSF,ExterCond,Fireplaces,FullBath,BsmtQual,MiscFeature,PoolQC
0,2,2.0,1962,1977.0,3.0,0,2.0,5.0,20,0,5.0,0,2.0,3.0,5,0,1.0,70.0,4.0,6.0,576,5.0,3.0,3.0,2.0,3.0,3.0,7.0,0,3.0,0,3.0,0,1,3.0,4.0,2.0
1,0,2.0,1914,0.0,3.0,0,4.0,6.0,75,0,2.0,0,0.0,4.0,6,134,1.0,35.0,5.0,12.0,0,5.0,4.0,3.0,2.0,3.0,3.0,7.0,0,3.0,291,3.0,1,2,3.0,4.0,2.0
2,2,2.0,1999,1999.0,3.0,0,2.0,1.0,20,0,5.0,0,2.0,3.0,5,0,1.0,68.0,4.0,10.0,666,5.0,3.0,2.0,2.0,0.0,2.0,7.0,0,0.0,35,3.0,0,2,2.0,4.0,2.0
3,1,2.0,1948,1948.0,0.0,0,5.0,1.0,20,103,5.0,0,2.0,3.0,8,0,1.0,78.0,4.0,6.0,230,5.0,3.0,3.0,2.0,3.0,0.0,7.0,0,3.0,0,1.0,0,3,3.0,4.0,2.0
4,2,2.0,1950,1950.0,3.0,0,2.0,5.0,20,0,5.0,0,2.0,3.0,5,0,1.0,60.0,4.0,11.0,420,6.0,3.0,3.0,2.0,3.0,3.0,7.0,0,3.0,29,3.0,0,1,4.0,4.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555,1,2.0,1959,1959.0,3.0,0,7.0,1.0,80,86,4.0,294,2.0,3.0,6,0,1.0,70.0,4.0,4.0,796,1.0,3.0,3.0,2.0,3.0,3.0,7.0,0,0.0,0,1.0,1,1,3.0,4.0,2.0
556,1,2.0,1934,1939.0,1.0,0,5.0,5.0,70,0,2.0,0,2.0,3.0,7,0,1.0,74.0,4.0,9.0,240,5.0,3.0,3.0,2.0,0.0,3.0,7.0,0,3.0,0,3.0,1,1,3.0,4.0,2.0
557,2,2.0,1882,1925.0,3.0,0,5.0,1.0,70,0,2.0,0,0.0,4.0,9,0,1.0,121.0,4.0,11.0,424,5.0,3.0,2.0,1.0,3.0,2.0,7.0,0,3.0,169,1.0,1,1,3.0,4.0,2.0
558,1,2.0,1953,1953.0,3.0,0,2.0,1.0,20,0,5.0,0,2.0,3.0,7,0,1.0,66.0,4.0,11.0,240,5.0,3.0,3.0,2.0,3.0,2.0,7.0,0,3.0,18,3.0,0,1,3.0,4.0,2.0


In [None]:
# Opción 1: Acceder a la primera fila como una serie
#primera_fila = data.iloc[0]
#print(primera_fila)

# Opción 2: Mostrar la primera fila como DataFrame
#primera_fila_df = data.head(1)
#print(primera_fila_df)
originalData = pd.read_csv('csv/house_prices.csv')
#data.iloc[0]
#data.iloc[0]['GarageCars']
originalData.columns
#data.iloc[0,1]
#key = data.iloc[0,:1]
#key
#is_string_dtype(data[data.columns[4]])
#is_string_dtype(data[data.columns[4]])
#data.columns[0]
originalData["PoolQC"]

0      none
1      none
2      none
3      none
4      none
       ... 
555    none
556    none
557    none
558    none
559    none
Name: PoolQC, Length: 560, dtype: object