In [1]:
import pandas as pd

In [2]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

def cart_feature_selection(df, target_column, n_features=5):
    X = df.drop(target_column, axis=1)
    y = df[target_column]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    cart = DecisionTreeClassifier(random_state=42)
    cart.fit(X_train, y_train)
    
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': cart.feature_importances_
    }).sort_values('importance', ascending=False)
    
    selected_features = feature_importance['feature'][:n_features].tolist()
    
    return selected_features



In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from deap import creator, base, tools, algorithms

def genetic_feature_selection(df, target_column, n_generations=50, population_size=50):
    X = df.drop(target_column, axis=1)
    y = df[target_column]

    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()
    toolbox.register("attr_bool", np.random.randint, 0, 2)
    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=len(X.columns))
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def evaluate(individual):
        selected_features = X.columns[np.array(individual, dtype=bool)]
        if len(selected_features) == 0:
            return 0,
        clf = RandomForestClassifier(n_estimators=100, random_state=42)
        scores = cross_val_score(clf, X[selected_features], y, cv=5)
        return np.mean(scores),

    toolbox.register("evaluate", evaluate)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    toolbox.register("select", tools.selTournament, tournsize=3)

    population = toolbox.population(n=population_size)
    algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=n_generations, verbose=False)

    best_individual = tools.selBest(population, k=1)[0]
    selected_features = X.columns[np.array(best_individual, dtype=bool)].tolist()

    return selected_features

In [9]:
df_1 = pd.read_csv('test_data_1')
df_0 = pd.read_csv('test_data_0_2')
df_1 = df_1.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])
df_0 = df_0.drop(columns=['Unnamed: 0'])
df_1['Valor'] = 1
df_0['Valor'] = 0
df_1 = df_1.fillna(df_1.mean())
df_0 = df_0.fillna(df_0.mean())
df_1.shape, df_0.shape

((602, 136), (602, 136))

In [15]:
df = pd.concat([df_0,df_1])
df = df.drop(columns=['Longitud', 'Latitud'])
df_x = df.drop(columns='Valor')
df_y = df.Valor
df_x.shape, df_y.shape

((1204, 133), (1204,))

In [16]:
# Usar CART para seleccionar características
cart_features = cart_feature_selection(df, 'Valor', n_features=5)
print("Características seleccionadas por CART:", cart_features)

# Usar algoritmo genético para seleccionar características
genetic_features = genetic_feature_selection(df, 'Valor')
print("Características seleccionadas por el algoritmo genético:", genetic_features)

Características seleccionadas por CART: ['PIRange_Bulkd.5-15cm.tif', 'PIRange_Clay.0-5cm.tif', 'ksat_30-60cm.tif', 'ksat_5-15cm.tif', 'PIRange_Sand.5-15cm.tif']
Características seleccionadas por el algoritmo genético: ['PIRange_Bulkd.0-5cm.tif', 'PIRange_Bulkd.100-200cm.tif', 'PIRange_Bulkd.15-30cm.tif', 'PIRange_Bulkd.30-60cm.tif', 'PIRange_Bulkd.5-15cm.tif', 'PIRange_Bulkd.60-100cm.tif', 'PIRange_Clay.100-200cm.tif', 'PIRange_Clay.15-30cm.tif', 'PIRange_Clay.30-60cm.tif', 'PIRange_Clay.5-15cm.tif', 'alpha_0-5cm.tif', 'alpha_5-15cm.tif', 'AvMoist.100-200cm.tif', 'AvMoist.15-30cm.tif', 'AvMoist.5-15cm.tif', 'AvMoist.60-100cm.tif', 'AWC_0-5cm.tif', 'AWC_100-200cm.tif', 'AWC_30-60cm.tif', 'AWC_5-15cm.tif', 'AWC_60-100cm.tif', 'FC.60-100cm.tif', 'ksat_5-15cm.tif', 'n_0-5cm.tif', 'n_100-200cm.tif', 'n_5-15cm.tif', 'PWP.0-5cm.tif', 'PWP.100-200cm.tif', 'PWP.15-30cm.tif', 'PWP.30-60cm.tif', 'PWP.60-100cm.tif', 'theta_r_0-5cm.tif', 'theta_r_100-200cm.tif', 'theta_r_15-30cm.tif', 'theta_r_30-6

In [22]:
cart_features = cart_feature_selection(df, 'Valor', n_features=10)
print("Características seleccionadas por CART:", cart_features)

Características seleccionadas por CART: ['PIRange_Bulkd.5-15cm.tif', 'PIRange_Clay.0-5cm.tif', 'ksat_30-60cm.tif', 'ksat_5-15cm.tif', 'PIRange_Sand.5-15cm.tif', 'PWP.0-5cm.tif', 'n_15-30cm.tif', 'PIRange_Bulkd.30-60cm.tif', 'PIRange_Clay.15-30cm.tif', 'n_30-60cm.tif']


In [24]:
len(cart_features)

10