In [3]:
import pandas as pd
import os
path = r"/data/nisla/copernicus2/vale/raw/"

os.listdir(path)

['train_data_1125.csv', 'test_data_1125.csv']

In [4]:
train = pd.read_csv(path + "train_data_1125.csv")
test = pd.read_csv(path + "test_data_1125.csv")

## Vale ClasificaciÃ³n -> RegresiÃ³n

### ClasificaciÃ³n

In [11]:
# train.columns
# eliminar columna ID_unico
train = train.drop(columns=["ID_unico"])
test = test.drop(columns=["ID_unico"])

In [15]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier

# ============================
# 1. Preparar los datos
# ============================

# Copias limpias
train_df = train.copy()
test_df = test.copy()

# Codificar la clase (Name)
le = LabelEncoder()
train_df["label"] = le.fit_transform(train_df["Name"])
test_df["label"] = le.transform(test_df["Name"])

feature_cols = [c for c in train_df.columns if c not in ["Name", "label"]]

X_train = train_df[feature_cols]
y_train = train_df["label"]

X_test = test_df[feature_cols]
y_test = test_df["label"]

# ============================
# 2. Entrenar Random Forest
# ============================

rf = RandomForestClassifier(
    n_estimators=1600,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features="sqrt",
    bootstrap=True,
    class_weight="balanced_subsample",
    n_jobs=-1,
    random_state=42
)

rf.fit(X_train, y_train)

# ============================
# 3. Predicciones
# ============================

y_pred = rf.predict(X_test)

# ============================
# 4. MÃ©tricas
# ============================

acc = accuracy_score(y_test, y_pred)
f1_macro = f1_score(y_test, y_pred, average="macro")
f1_weighted = f1_score(y_test, y_pred, average="weighted")

print(f"\nðŸ“Œ Accuracy: {acc:.4f}")
print(f"ðŸ“Œ Macro F1: {f1_macro:.4f}")
print(f"ðŸ“Œ Weighted F1: {f1_weighted:.4f}\n")

# Reporte detallado
print("\n===== Classification Report =====\n")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Matriz de confusiÃ³n
cm = confusion_matrix(y_test, y_pred)
print("\n===== Confusion Matrix =====\n")
print(cm)

# ============================
# 5. Importancia de features
# ============================

importances = pd.DataFrame({
    "feature": feature_cols,
    "importance": rf.feature_importances_
}).sort_values("importance", ascending=False)

print("\n===== Feature Importances =====")
print(importances)


ðŸ“Œ Accuracy: 0.4833
ðŸ“Œ Macro F1: 0.4781
ðŸ“Œ Weighted F1: 0.4711


===== Classification Report =====

              precision    recall  f1-score   support

        BN01       0.56      0.76      0.64        66
        BN02       0.93      1.00      0.96        76
        BN03       0.44      0.28      0.34       120
        BN04       0.36      0.60      0.45       209
        BN05       0.38      0.35      0.37       141
        DX01       0.61      0.53      0.57        64
        DX02       0.92      0.99      0.95        69
        MT01       0.29      0.16      0.20        64
        MT02       0.24      0.26      0.25        78
        MT03       0.37      0.34      0.36        67
        MT04       0.47      0.38      0.42        66
        MT05       0.74      0.91      0.82        46
        MT06       0.59      0.77      0.67        65
        MT07       0.79      0.68      0.73        68
        MT08       0.61      0.66      0.64        71
        PCH1       0.45     

### Mapeo

In [16]:
class_to_reg = {
    "PCH1": (0.684, 3928, 0.018880),
    "PCH2": (0.527, 3928, 0.016027),
    "PCH3": (0.918, 3928, 0.010235),
    "PCH4": (0.617, 3928, 0.008690),
    "PCH5": (0.649, 3800, 0.001009),

    "MT01": (2.923, 4693, 0.007603),
    "MT02": (1.910, 4693, 0.008147),
    "MT03": (3.308, 4572, 0.001672),
    "MT04": (1.383, 4572, 0.004886),
    "MT05": (3.029, 5000, 0.010321),
    "MT06": (3.529, 5087, 0.009234),
    "MT07": (3.189, 4500, 0.001787),
    "MT08": (1.903, 4500, 0.004342),

    "BN01": (2.624, 4600, 0.002249),
    "BN02": (2.310, 4550, 0.001441),
    "BN03": (3.544, 4452, 0.000979),
    "BN04": (2.164, 4452, 0.001556),
    "BN05": (1.954, 4452, 0.002365),

    "PL01": (0.838, 4399, 0.013174),
    "PL02": (3.019, 4870, 0.005973),
    "PL03": (3.333, 4870, 0.002481),
    "PL04": (3.249, 4870, 0.002712),
    "PL05": (4.087, 4870, 0.006516),
    "PL06": (3.714, 4870, 0.003255),
    "PL07": (4.063, 4870, 0.002596),
    "PL08": (0.905, 4372, 0.009777),
    "PL09": (3.164, 4816, 0.005429),
    "PL10": (2.742, 4816, 0.003799),
    "PL11": (2.464, 4684, 0.001325),

    "DX01": (8.250, 4746, 0.002134),
    "DX02": (7.125, 4652, 0.001903),

    "SV01": (0.0, 0.0, 0.0),
    "SV02": (0.0, 0.0, 0.0),
    "SV03": (0.0, 0.0, 0.0),
}

In [18]:
import numpy as np
import pandas as pd

def map_to_reg_tuple(name):
    return class_to_reg.get(name, (np.nan, np.nan, np.nan))

# aplicar mapeo
mapped = test["Name"].apply(map_to_reg_tuple)
mapped_df = mapped.apply(pd.Series)
mapped_df.columns = ["w", "H", "r"]

# aÃ±adir columnas
test = pd.concat([test, mapped_df], axis=1)

print(test.head())

   Name  blue_oto  green_oto   nir_oto   red_oto  swir1_oto  swir2_oto  \
0  BN03  0.014486   0.027810  0.302164  0.021210   0.105291   0.042756   
1  BN03  0.015517   0.026187  0.258865  0.024757   0.120182   0.043513   
2  BN03  0.013991   0.026504  0.157087  0.020192   0.067479   0.030147   
3  BN03  0.032512   0.041752  0.306605  0.036967   0.082315   0.033282   
4  BN03  0.017291   0.036899  0.320396  0.031385   0.125875   0.052574   

   NDMI_oto  NDVI_oto  SAVI_oto  ...  B5_diss_prim  B5_ent_prim  B5_idm_prim  \
0  0.505879  0.845815  0.502046  ...      0.000000     0.000000     1.000000   
1  0.367889  0.826161  0.449747  ...      0.000000     0.000000     1.000000   
2  0.399027  0.771704  0.302986  ...     20.426587     0.906099     0.814318   
3  0.570575  0.786836  0.459589  ...      0.000000     0.000000     1.000000   
4  0.427661  0.823504  0.513412  ...      0.000000     0.000000     1.000000   

   bioma   fuel    heat    spread      w       H         r  
0     60  3.5

In [29]:
y_pred_int = rf.predict(X_test)
y_pred_class = le.inverse_transform(y_pred_int)

In [31]:
y_pred_class

array(['BN03', 'BN04', 'BN04', ..., 'SV03', 'SV02', 'PL08'],
      shape=(2659,), dtype=object)

In [21]:
import numpy as np

def get_reg_values(cl):
    return class_to_reg.get(cl, (np.nan, np.nan, np.nan))

In [42]:
# REALES
true_reg = np.array([get_reg_values(c) for c in test["Name"]])
test["W_true"] = true_reg[:, 0]
test["H_true"] = true_reg[:, 1]  # OJO: cambia si tu orden es W, H, R
test["R_true"] = true_reg[:, 2]

# PREDICHOS
pred_reg = np.array([get_reg_values(c) for c in y_pred_class])
test["W_pred"] = pred_reg[:, 0]
test["H_pred"] = pred_reg[:, 1]
test["R_pred"] = pred_reg[:, 2]

In [43]:
test

Unnamed: 0,Name,blue_oto,green_oto,nir_oto,red_oto,swir1_oto,swir2_oto,NDMI_oto,NDVI_oto,SAVI_oto,...,bioma,fuel,heat,spread,W_true,H_true,R_true,W_pred,H_pred,R_pred
0,BN03,0.014486,0.027810,0.302164,0.021210,0.105291,0.042756,0.505879,0.845815,0.502046,...,60,3.544,4452.0,0.000979,3.544,4452.0,0.000979,3.544,4452.0,0.000979
1,BN03,0.015517,0.026187,0.258865,0.024757,0.120182,0.043513,0.367889,0.826161,0.449747,...,60,3.544,4452.0,0.000979,3.544,4452.0,0.000979,2.164,4452.0,0.001556
2,BN03,0.013991,0.026504,0.157087,0.020192,0.067479,0.030147,0.399027,0.771704,0.302986,...,60,3.544,4452.0,0.000979,3.544,4452.0,0.000979,2.164,4452.0,0.001556
3,BN03,0.032512,0.041752,0.306605,0.036967,0.082315,0.033282,0.570575,0.786836,0.459589,...,60,3.544,4452.0,0.000979,3.544,4452.0,0.000979,2.164,4452.0,0.001556
4,BN03,0.017291,0.036899,0.320396,0.031385,0.125875,0.052574,0.427661,0.823504,0.513412,...,59,3.544,4452.0,0.000979,3.544,4452.0,0.000979,1.954,4452.0,0.002365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2654,SV03,0.039236,0.052725,0.185908,0.062831,0.176131,0.119110,-0.051994,0.442407,0.218776,...,18,0.000,0.0,0.000000,0.000,0.0,0.000000,0.000,0.0,0.000000
2655,SV03,0.048297,0.051158,0.089630,0.064000,0.057372,0.055310,0.219435,0.166829,0.058818,...,23,0.000,0.0,0.000000,0.000,0.0,0.000000,0.000,0.0,0.000000
2656,SV03,0.044777,0.056740,0.177685,0.095350,0.191353,0.120017,-0.100597,0.325057,0.177423,...,9,0.000,0.0,0.000000,0.000,0.0,0.000000,0.000,0.0,0.000000
2657,SV03,0.068757,0.108605,0.221603,0.136462,0.231750,0.175870,-0.022383,0.237778,0.146401,...,24,0.000,0.0,0.000000,0.000,0.0,0.000000,0.000,0.0,0.000000


In [44]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def regression_metrics(y_true, y_pred, name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)

    print(f"\n=== MÃ‰TRICAS PARA {name} ===")
    print(f"MAE : {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"RÂ²  : {r2:.4f}")

In [45]:
regression_metrics(test["W_true"], test["W_pred"], "W (combustible)")
regression_metrics(test["H_true"], test["H_pred"], "H (poder calorÃ­fico)")
regression_metrics(test["R_true"], test["R_pred"], "R (propagaciÃ³n)")


=== MÃ‰TRICAS PARA W (combustible) ===
MAE : 0.6149
RMSE: 1.2697
RÂ²  : 0.3903

=== MÃ‰TRICAS PARA H (poder calorÃ­fico) ===
MAE : 254.7307
RMSE: 840.2075
RÂ²  : 0.3363

=== MÃ‰TRICAS PARA R (propagaciÃ³n) ===
MAE : 0.0021
RMSE: 0.0041
RÂ²  : 0.1252


## Vale RegresiÃ³n 1 modelo

In [None]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# ============================================
# 1) Mapeo clase -> (W, H, R)
# ============================================
class_to_reg = {
    "PCH1": (0.684, 3928, 0.018880),
    "PCH2": (0.527, 3928, 0.016027),
    "PCH3": (0.918, 3928, 0.010235),
    "PCH4": (0.617, 3928, 0.008690),
    "PCH5": (0.649, 3800, 0.001009),

    "MT01": (2.923, 4693, 0.007603),
    "MT02": (1.910, 4693, 0.008147),
    "MT03": (3.308, 4572, 0.001672),
    "MT04": (1.383, 4572, 0.004886),
    "MT05": (3.029, 5000, 0.010321),
    "MT06": (3.529, 5087, 0.009234),
    "MT07": (3.189, 4500, 0.001787),
    "MT08": (1.903, 4500, 0.004342),

    "BN01": (2.624, 4600, 0.002249),
    "BN02": (2.310, 4550, 0.001441),
    "BN03": (3.544, 4452, 0.000979),
    "BN04": (2.164, 4452, 0.001556),
    "BN05": (1.954, 4452, 0.002365),

    "PL01": (0.838, 4399, 0.013174),
    "PL02": (3.019, 4870, 0.005973),
    "PL03": (3.333, 4870, 0.002481),
    "PL04": (3.249, 4870, 0.002712),
    "PL05": (4.087, 4870, 0.006516),
    "PL06": (3.714, 4870, 0.003255),
    "PL07": (4.063, 4870, 0.002596),
    "PL08": (0.905, 4372, 0.009777),
    "PL09": (3.164, 4816, 0.005429),
    "PL10": (2.742, 4816, 0.003799),
    "PL11": (2.464, 4684, 0.001325),

    "DX01": (8.250, 4746, 0.002134),
    "DX02": (7.125, 4652, 0.001903),

    "SV01": (0.0, 0.0, 0.0),
    "SV02": (0.0, 0.0, 0.0),
    "SV03": (0.0, 0.0, 0.0),
}

# Lo pasamos a DataFrame para mergear fÃ¡cil
map_df = (
    pd.DataFrame.from_dict(class_to_reg, orient="index", columns=["W", "H", "R"])
    .reset_index()
    .rename(columns={"index": "Name"})
)

# ============================================
# 2) Unir mapeo con train y test
# ============================================
# Asumo que train_df y test_df ya existen
train_merged = train_df.merge(map_df, on="Name", how="inner")
test_merged  = test_df.merge(map_df, on="Name", how="inner")

print("Train merged shape:", train_merged.shape)
print("Test merged shape :", test_merged.shape)

# Si quieres ver si hay clases sin mapeo:
if len(train_df) != len(train_merged):
    print("âš  Ojo: algunas filas de train no tienen mapeo en class_to_reg")
if len(test_df) != len(test_merged):
    print("âš  Ojo: algunas filas de test no tienen mapeo en class_to_reg")

# ============================================
# 3) Preparar X (features) e y (targets W,H,R)
# ============================================
# Columnas a excluir explÃ­citamente como no-features
exclude_cols = ["Name", "label", "W", "H", "R"]

feature_cols = [
    c for c in train_merged.columns
    if c not in exclude_cols
]

# Nos quedamos solo con columnas numÃ©ricas por seguridad
X_train = train_merged[feature_cols].select_dtypes(include=["number"])
X_test  = test_merged[feature_cols].select_dtypes(include=["number"])

y_train = train_merged[["W", "H", "R"]].values
y_test  = test_merged[["W", "H", "R"]].values

print("N features:", X_train.shape[1])

# ============================================
# 4) RandomForestRegressor multi-output
# ============================================
rf_reg = RandomForestRegressor(
    n_estimators=1600,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features="sqrt",
    bootstrap=True,
    n_jobs=-1,
    random_state=42
)


rf_reg.fit(X_train, y_train)

y_pred = rf_reg.predict(X_test)  # shape: (n_samples, 3)

# Lo pasamos a DataFrame por legibilidad
y_test_df = pd.DataFrame(y_test, columns=["W", "H", "R"])
y_pred_df = pd.DataFrame(y_pred, columns=["W", "H", "R"])

# ============================================
# 5) FunciÃ³n helper para mÃ©tricas
# ============================================
def print_reg_metrics(y_true, y_pred, name):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    print(f"\n=== MÃ‰TRICAS PARA {name} ===")
    print(f"MAE : {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"RÂ²  : {r2:.4f}")

# ============================================
# 6) MÃ©tricas por variable (W, H, R)
# ============================================
print_reg_metrics(y_test_df["W"], y_pred_df["W"], "W (combustible)")
print_reg_metrics(y_test_df["H"], y_pred_df["H"], "H (poder calorÃ­fico)")
print_reg_metrics(y_test_df["R"], y_pred_df["R"], "R (propagaciÃ³n)")

# ============================================
# 7) Importancia de features (global para las 3 salidas)
# ============================================
importances = pd.DataFrame({
    "feature": X_train.columns,
    "importance": rf_reg.feature_importances_
}).sort_values("importance", ascending=False)

print("\nTop 20 features importantes (regresiÃ³n multi-output):")
print(importances.head(20))

Train merged shape: (6260, 49)
Test merged shape : (2659, 49)
N features: 44

=== MÃ‰TRICAS PARA W (combustible) ===
MAE : 0.7717
RMSE: 1.4153
RÂ²  : 0.4647

=== MÃ‰TRICAS PARA H (poder calorÃ­fico) ===
MAE : 363.9265
RMSE: 602333.1140
RÂ²  : 0.4337

=== MÃ‰TRICAS PARA R (propagaciÃ³n) ===
MAE : 0.0025
RMSE: 0.0000
RÂ²  : 0.3196

Top 20 features importantes (regresiÃ³n multi-output):
            feature  importance
9          blue_pri    0.054263
10        green_pri    0.051041
22              dem    0.040110
43            bioma    0.035683
19   shortvegheight    0.035384
6          NDMI_oto    0.034587
12          red_pri    0.033546
28  brightness_prim    0.030148
13        swir1_pri    0.029020
16         NDVI_pri    0.028141
23            slope    0.028120
17         SAVI_pri    0.027766
11          nir_pri    0.026370
15         NDMI_pri    0.026273
30     wetness_prim    0.025411
14        swir2_pri    0.025312
29   greenness_prim    0.024745
1         green_oto    0.023155
7    

## Vale RegresiÃ³n 3 modelos

In [52]:
train_df

Unnamed: 0,Name,blue_oto,green_oto,nir_oto,red_oto,swir1_oto,swir2_oto,NDMI_oto,NDVI_oto,SAVI_oto,...,B5_ent_oton,B5_idm_oton,B5_asm_prim,B5_contrast_prim,B5_corr_prim,B5_diss_prim,B5_ent_prim,B5_idm_prim,bioma,label
0,BN03,0.028415,0.036802,0.308227,0.028690,0.089080,0.035153,0.547320,0.829691,0.501013,...,0.577879,0.891034,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,60,2
1,BN03,0.023492,0.035537,0.186320,0.027782,0.078355,0.036115,0.433231,0.740475,0.333014,...,3.013063,0.288068,0.431067,8181.468254,0.266461,46.912698,1.501572,0.664889,60,2
2,BN03,0.011227,0.016755,0.101950,0.008285,0.022998,0.015380,0.631885,0.863389,0.218838,...,4.091020,0.145391,0.025831,304617.470238,0.382639,368.517857,4.078550,0.136110,59,2
3,BN03,0.060672,0.082191,0.342465,0.079029,0.192054,0.134703,0.281346,0.624756,0.428739,...,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,59,2
4,BN03,0.010994,0.021375,0.172859,0.014899,0.058610,0.023273,0.493018,0.827588,0.319291,...,3.505576,0.301951,0.963016,506.968254,0.240455,3.091270,0.101056,0.981151,60,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6255,SV03,0.060783,0.084776,0.172611,0.113871,0.297461,0.198434,-0.239322,0.206924,0.112013,...,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,24,33
6256,SV03,0.016067,0.015655,0.140477,0.021182,0.054870,0.023987,0.433463,0.737938,0.270445,...,3.286752,0.291356,0.208534,138277.713294,0.436204,211.610119,2.549641,0.484611,15,33
6257,SV03,0.029927,0.043788,0.181370,0.054595,0.172872,0.089025,0.100940,0.534044,0.274006,...,2.856348,0.423189,0.233877,26029.238095,0.352496,96.988095,2.541638,0.491430,60,33
6258,SV03,0.065856,0.095584,0.194653,0.114848,0.267514,0.200441,-0.147076,0.241201,0.128085,...,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,18,33


In [57]:
# Lo pasamos a DataFrame para mergear fÃ¡cil
map_df = (
    pd.DataFrame.from_dict(class_to_reg, orient="index",  columns=["W", "H", "R"])  # columns W,H,R
    .reset_index()
    .rename(columns={"index": "Name"})
)

# =====================================================
# 1) Unir mapeo con train y test
# =====================================================
# train_df y test_df ya existen y tienen columna "Name"
train_merged = train_df.merge(map_df, on="Name", how="inner")
test_merged  = test_df.merge(map_df, on="Name", how="inner")

print("Train merged shape:", train_merged.shape)
print("Test merged shape :", test_merged.shape)

# =====================================================
# 2) Definir targets (las columnas mapeadas)
# =====================================================
target_cols = ["W", "H", "R"]

# =====================================================
# 3) Seleccionar features numÃ©ricos
# =====================================================
exclude_cols = ["Name"] + target_cols  # no usamos Name ni los targets como features

feature_cols = [c for c in train_merged.columns if c not in exclude_cols]

train_X = train_merged[feature_cols].select_dtypes(
    include=["float32", "float64", "int32", "int64"]
).copy()
test_X = test_merged[feature_cols].select_dtypes(
    include=["float32", "float64", "int32", "int64"]
).copy()

print("Features usados:", train_X.columns.tolist())
print("Shape X_train:", train_X.shape)
print("Shape X_test :", test_X.shape)

# =====================================================
# 4) FunciÃ³n de mÃ©tricas
# =====================================================
def print_reg_metrics(y_true, y_pred, name):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    print(f"\n=== MÃ‰TRICAS PARA {name} ===")
    print(f"MAE : {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"RÂ²  : {r2:.4f}")

# =====================================================
# 5) ConfiguraciÃ³n comÃºn del Random Forest
# =====================================================
rf_params = dict(
    n_estimators=1600,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features="sqrt",
    bootstrap=True,
    n_jobs=-1,
    random_state=42,
)

# =====================================================
# 6) Entrenar 3 modelos (uno por W, H, R)
# =====================================================
models = {}
feature_importances = {}

for target in target_cols:
    print("\n==============================")
    print(f" Entrenando modelo para {target}")
    print("==============================")

    y_train = train_merged[target].astype(float).values
    y_test  = test_merged[target].astype(float).values

    rf = RandomForestRegressor(**rf_params)
    rf.fit(train_X, y_train)

    models[target] = rf

    y_pred = rf.predict(test_X)
    print_reg_metrics(y_test, y_pred, target)

    importances = pd.DataFrame({
        "feature": train_X.columns,
        "importance": rf.feature_importances_
    }).sort_values("importance", ascending=False)

    feature_importances[target] = importances

    print(f"\nTop 20 features para {target}:")
    print(importances.head(20))

Train merged shape: (6260, 49)
Test merged shape : (2659, 49)
Features usados: ['blue_oto', 'green_oto', 'nir_oto', 'red_oto', 'swir1_oto', 'swir2_oto', 'NDMI_oto', 'NDVI_oto', 'SAVI_oto', 'blue_pri', 'green_pri', 'nir_pri', 'red_pri', 'swir1_pri', 'swir2_pri', 'NDMI_pri', 'NDVI_pri', 'SAVI_pri', 'aspect', 'shortvegheight', 'chili', 'landforms', 'dem', 'slope', 'mtpi', 'brightness_oton', 'greenness_oton', 'wetness_oton', 'brightness_prim', 'greenness_prim', 'wetness_prim', 'B5_asm_oton', 'B5_contrast_oton', 'B5_corr_oton', 'B5_diss_oton', 'B5_ent_oton', 'B5_idm_oton', 'B5_asm_prim', 'B5_contrast_prim', 'B5_corr_prim', 'B5_diss_prim', 'B5_ent_prim', 'B5_idm_prim', 'bioma', 'label']
Shape X_train: (6260, 45)
Shape X_test : (2659, 45)

 Entrenando modelo para W

=== MÃ‰TRICAS PARA W ===
MAE : 0.5108
RMSE: 0.6163
RÂ²  : 0.7669

Top 20 features para W:
            feature  importance
44            label    0.270448
22              dem    0.039545
11          nir_pri    0.032702
29   greenne

In [63]:
# Lo pasamos a DataFrame para mergear fÃ¡cil
map_df = (
    pd.DataFrame.from_dict(class_to_reg, orient="index",  columns=["W", "H", "R"])  # columns W,H,R
    .reset_index()
    .rename(columns={"index": "Name"})
)

# =====================================================
# 1) Unir mapeo con train y test
# =====================================================
# train_df y test_df ya existen y tienen columna "Name"
train_merged = train_df.merge(map_df, on="Name", how="inner")
test_merged  = test_df.merge(map_df, on="Name", how="inner")

print("Train merged shape:", train_merged.shape)
print("Test merged shape :", test_merged.shape)

# =====================================================
# 2) Definir targets (las columnas mapeadas)
# =====================================================
target_cols = ["W", "H", "R"]

# =====================================================
# 3) Seleccionar features numÃ©ricos
# =====================================================
exclude_cols = ["Name"] + target_cols  # no usamos Name ni los targets como features

feature_cols = [c for c in train_merged.columns if c not in exclude_cols]

train_X = train_merged[feature_cols].select_dtypes(
    include=["float32", "float64", "int32", "int64"]
).copy()
test_X = test_merged[feature_cols].select_dtypes(
    include=["float32", "float64", "int32", "int64"]
).copy()

print("Features usados:", train_X.columns.tolist())
print("Shape X_train:", train_X.shape)
print("Shape X_test :", test_X.shape)

# =====================================================
# 4) FunciÃ³n de mÃ©tricas
# =====================================================
def print_reg_metrics(y_true, y_pred, name):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    print(f"\n=== MÃ‰TRICAS PARA {name} ===")
    print(f"MAE : {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"RÂ²  : {r2:.4f}")

# =====================================================
# 5) ConfiguraciÃ³n comÃºn del Random Forest
# =====================================================
rf_params = dict(
    n_estimators=1600,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features="sqrt",
    bootstrap=True,
    n_jobs=-1,
    random_state=42,
)

# =====================================================
# 6) Entrenar 3 modelos (uno por W, H, R)
# =====================================================
models = {}
feature_importances = {}

for target in target_cols:
    print("\n==============================")
    print(f" Entrenando modelo para {target}")
    print("==============================")

    y_train = train_merged[target].astype(float).values
    y_test  = test_merged[target].astype(float).values

    rf = RandomForestRegressor(**rf_params)
    rf.fit(train_X, y_train)

    models[target] = rf

    y_pred = rf.predict(test_X)
    print_reg_metrics(y_test, y_pred, target)

    importances = pd.DataFrame({
        "feature": train_X.columns,
        "importance": rf.feature_importances_
    }).sort_values("importance", ascending=False)

    feature_importances[target] = importances

    print(f"\nTop 20 features para {target}:")
    print(importances.head(20))

Train merged shape: (6260, 49)
Test merged shape : (2659, 58)
Features usados: ['blue_oto', 'green_oto', 'nir_oto', 'red_oto', 'swir1_oto', 'swir2_oto', 'NDMI_oto', 'NDVI_oto', 'SAVI_oto', 'blue_pri', 'green_pri', 'nir_pri', 'red_pri', 'swir1_pri', 'swir2_pri', 'NDMI_pri', 'NDVI_pri', 'SAVI_pri', 'aspect', 'shortvegheight', 'chili', 'landforms', 'dem', 'slope', 'mtpi', 'brightness_oton', 'greenness_oton', 'wetness_oton', 'brightness_prim', 'greenness_prim', 'wetness_prim', 'B5_asm_oton', 'B5_contrast_oton', 'B5_corr_oton', 'B5_diss_oton', 'B5_ent_oton', 'B5_idm_oton', 'B5_asm_prim', 'B5_contrast_prim', 'B5_corr_prim', 'B5_diss_prim', 'B5_ent_prim', 'B5_idm_prim', 'bioma', 'label']
Shape X_train: (6260, 45)
Shape X_test : (2659, 45)

 Entrenando modelo para W

=== MÃ‰TRICAS PARA W ===
MAE : 0.5108
RMSE: 0.6163
RÂ²  : 0.7669

Top 20 features para W:
            feature  importance
44            label    0.270448
22              dem    0.039545
11          nir_pri    0.032702
29   greenne

In [65]:
# quitar label de dataframe train
train_df = train_df.drop(columns=["label"])


In [66]:
# Lo pasamos a DataFrame para mergear fÃ¡cil
map_df = (
    pd.DataFrame.from_dict(class_to_reg, orient="index",  columns=["W", "H", "R"])  # columns W,H,R
    .reset_index()
    .rename(columns={"index": "Name"})
)

# =====================================================
# 1) Unir mapeo con train y test
# =====================================================
# train_df y test_df ya existen y tienen columna "Name"
train_merged = train_df.merge(map_df, on="Name", how="inner")
test_merged  = test_df.merge(map_df, on="Name", how="inner")

print("Train merged shape:", train_merged.shape)
print("Test merged shape :", test_merged.shape)

# =====================================================
# 2) Definir targets (las columnas mapeadas)
# =====================================================
target_cols = ["W", "H", "R"]

# =====================================================
# 3) Seleccionar features numÃ©ricos
# =====================================================
exclude_cols = ["Name"] + target_cols  # no usamos Name ni los targets como features

feature_cols = [c for c in train_merged.columns if c not in exclude_cols]

train_X = train_merged[feature_cols].select_dtypes(
    include=["float32", "float64", "int32", "int64"]
).copy()
test_X = test_merged[feature_cols].select_dtypes(
    include=["float32", "float64", "int32", "int64"]
).copy()

print("Features usados:", train_X.columns.tolist())
print("Shape X_train:", train_X.shape)
print("Shape X_test :", test_X.shape)

# =====================================================
# 4) FunciÃ³n de mÃ©tricas
# =====================================================
def print_reg_metrics(y_true, y_pred, name):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    print(f"\n=== MÃ‰TRICAS PARA {name} ===")
    print(f"MAE : {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"RÂ²  : {r2:.4f}")

# =====================================================
# 5) ConfiguraciÃ³n comÃºn del Random Forest
# =====================================================
rf_params = dict(
    n_estimators=1600,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features="sqrt",
    bootstrap=True,
    n_jobs=-1,
    random_state=42,
)

# =====================================================
# 6) Entrenar 3 modelos (uno por W, H, R)
# =====================================================
models = {}
feature_importances = {}

for target in target_cols:
    print("\n==============================")
    print(f" Entrenando modelo para {target}")
    print("==============================")

    y_train = train_merged[target].astype(float).values
    y_test  = test_merged[target].astype(float).values

    rf = RandomForestRegressor(**rf_params)
    rf.fit(train_X, y_train)

    models[target] = rf

    y_pred = rf.predict(test_X)
    print_reg_metrics(y_test, y_pred, target)

    importances = pd.DataFrame({
        "feature": train_X.columns,
        "importance": rf.feature_importances_
    }).sort_values("importance", ascending=False)

    feature_importances[target] = importances

    print(f"\nTop 20 features para {target}:")
    print(importances.head(20))

Train merged shape: (6260, 48)
Test merged shape : (2659, 58)
Features usados: ['blue_oto', 'green_oto', 'nir_oto', 'red_oto', 'swir1_oto', 'swir2_oto', 'NDMI_oto', 'NDVI_oto', 'SAVI_oto', 'blue_pri', 'green_pri', 'nir_pri', 'red_pri', 'swir1_pri', 'swir2_pri', 'NDMI_pri', 'NDVI_pri', 'SAVI_pri', 'aspect', 'shortvegheight', 'chili', 'landforms', 'dem', 'slope', 'mtpi', 'brightness_oton', 'greenness_oton', 'wetness_oton', 'brightness_prim', 'greenness_prim', 'wetness_prim', 'B5_asm_oton', 'B5_contrast_oton', 'B5_corr_oton', 'B5_diss_oton', 'B5_ent_oton', 'B5_idm_oton', 'B5_asm_prim', 'B5_contrast_prim', 'B5_corr_prim', 'B5_diss_prim', 'B5_ent_prim', 'B5_idm_prim', 'bioma']
Shape X_train: (6260, 44)
Shape X_test : (2659, 44)

 Entrenando modelo para W

=== MÃ‰TRICAS PARA W ===
MAE : 0.7651
RMSE: 1.3700
RÂ²  : 0.4819

Top 20 features para W:
            feature  importance
22              dem    0.061077
11          nir_pri    0.040864
29   greenness_prim    0.038453
10        green_pri  