In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# üîπ Ruta base LOCAL (no Kaggle)
BASE_DIR = "data/csiro-biomass"
TRAIN_CSV = os.path.join(BASE_DIR, "train.csv")
TEST_CSV  = os.path.join(BASE_DIR, "test.csv")

train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

train_df.head()


Unnamed: 0,sample_id,image_path,Sampling_Date,State,Species,Pre_GSHH_NDVI,Height_Ave_cm,target_name,target
0,ID1011485656__Dry_Clover_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Clover_g,0.0
1,ID1011485656__Dry_Dead_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Dead_g,31.9984
2,ID1011485656__Dry_Green_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Green_g,16.2751
3,ID1011485656__Dry_Total_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Total_g,48.2735
4,ID1011485656__GDM_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,GDM_g,16.275


In [2]:
IMG_SIZE = 224
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def build_full_path(path_series, base_dir=BASE_DIR):
    """
    Convierte image_path relativo (train/xxx.jpg) en path absoluto.
    """
    return path_series.apply(lambda p: os.path.join(base_dir, p))

def load_image(path, label=None):
    """
    Lee una imagen desde disco, la redimensiona y la normaliza.
    Si label es None, devuelve solo la imagen (para test).
    """
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    if label is None:
        return img
    return img, label


In [3]:
from sklearn.model_selection import train_test_split

def make_datasets_for_target(train_df, target_name, base_dir=BASE_DIR):
    """
    Filtra train_df por un target_name y arma ds_train / ds_val.
    """
    df_t = train_df[train_df["target_name"] == target_name].copy()
    
    # Paths completos a las im√°genes
    paths = build_full_path(df_t["image_path"], base_dir=base_dir)
    paths = paths.values  # array de strings
    y = df_t["target"].values.astype("float32")
    
    X_train, X_val, y_train, y_val = train_test_split(
        paths, y, test_size=0.2, random_state=42
    )
    
    ds_train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    ds_train = ds_train.map(
        lambda p, t: load_image(p, t),
        num_parallel_calls=AUTOTUNE
    ).shuffle(512).batch(BATCH_SIZE).prefetch(AUTOTUNE)
    
    ds_val = tf.data.Dataset.from_tensor_slices((X_val, y_val))
    ds_val = ds_val.map(
        lambda p, t: load_image(p, t),
        num_parallel_calls=AUTOTUNE
    ).batch(BATCH_SIZE).prefetch(AUTOTUNE)
    
    return ds_train, ds_val, df_t


In [4]:
ds_train_ex, ds_val_ex, df_ex = make_datasets_for_target(train_df, "Dry_Total_g")
len(df_ex), ds_train_ex


(357,
 <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>)

In [5]:
def build_cnn_model(img_size=IMG_SIZE):
    base_model = keras.applications.EfficientNetB0(
        include_top=False,
        weights="imagenet",
        input_shape=(img_size, img_size, 3),
        pooling="avg",
    )
    base_model.trainable = False  # primero congelado

    inputs = keras.Input(shape=(img_size, img_size, 3))
    x = base_model(inputs, training=False)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, dtype="float32")(x)  # regresi√≥n

    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="mse",
        metrics=[keras.metrics.RootMeanSquaredError(name="rmse")]
    )
    return model


In [6]:
target_example = "Dry_Total_g"

ds_train, ds_val, df_total = make_datasets_for_target(train_df, target_example)

model_example = build_cnn_model()

history = model_example.fit(
    ds_train,
    validation_data=ds_val,
    epochs=5,  # empieza con pocos epochs para probar
    callbacks=[
        keras.callbacks.EarlyStopping(
            monitor="val_rmse",
            patience=2,
            restore_best_weights=True
        )
    ],
    verbose=1
)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 0us/step
Epoch 1/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m23s[0m 1s/step - loss: 2625.8274 - rmse: 51.2037 - val_loss: 1526.7179 - val_rmse: 39.0732
Epoch 2/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m8s[0m 825ms/step - loss: 1739.7795 - rmse: 41.5815 - val_loss: 861.9598 - val_rmse: 29.3592
Epoch 3/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m9s[0m 835ms/step - loss: 1004.5005 - rmse: 31.6664 - val_loss: 587.2973 - val_rmse: 24.2342
Epoch 4/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m8s[0m 837ms/step - loss: 781.8272 - rmse: 27.8231 - val_loss: 640.0671 - val_rms

## Entrenar modelo por cada componente de biomasa

In [7]:
TARGETS = ["Dry_Clover_g", "Dry_Dead_g", "Dry_Green_g", "Dry_Total_g", "GDM_g"]

models = {}

for tn in TARGETS:
    print(f"\n===== Entrenando modelo para {tn} =====")
    ds_train, ds_val, df_t = make_datasets_for_target(train_df, tn)

    model = build_cnn_model()

    history = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=5,  # si ves que va bien y tienes tiempo, puedes subir a 8‚Äì10
        callbacks=[
            keras.callbacks.EarlyStopping(
                monitor="val_rmse",
                patience=2,
                restore_best_weights=True
            )
        ],
        verbose=1
    )

    # Guardamos el modelo en memoria
    models[tn] = model

    # Opcional: guardar a disco por si quieres reusar luego
    model_path = f"model_{tn}.keras"
    model.save(model_path)
    print(f"Modelo para {tn} guardado en {model_path}")



===== Entrenando modelo para Dry_Clover_g =====
Epoch 1/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m23s[0m 1s/step - loss: 156.9745 - rmse: 12.3663 - val_loss: 177.9947 - val_rmse: 13.3415
Epoch 2/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m9s[0m 848ms/step - loss: 152.5043 - rmse: 12.3090 - val_loss: 181.8421 - val_rmse: 13.4849
Epoch 3/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m9s[0m 873ms/step - loss: 138.8971 - rmse: 11.7774 - val_loss: 184.0837 - val_rmse: 13.5677
Modelo para Dry_Clover_g guardado en model_Dry_Clover_g.keras

===== Entrenando modelo para Dry_Dead_g =====
Epoch 1/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m22s[0m 1s/step - loss: 258.2769 - rmse: 16.0060 - val_loss: 146.6577 - val_rmse: 12.1102
Epoch 2/5
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚

In [8]:
preds = []

for tn in TARGETS:
    print(f"\n>>> Prediciendo para {tn}")
    df_tst = test_df[test_df["target_name"] == tn].copy()
    if df_tst.empty:
        print(f"  No hay filas en test para {tn}, se omite.")
        continue

    paths_tst = build_full_path(df_tst["image_path"], base_dir=BASE_DIR)
    paths_tst = paths_tst.values

    ds_tst = tf.data.Dataset.from_tensor_slices(paths_tst)
    ds_tst = ds_tst.map(load_image, num_parallel_calls=AUTOTUNE)
    ds_tst = ds_tst.batch(BATCH_SIZE).prefetch(AUTOTUNE)

    y_pred_tn = models[tn].predict(ds_tst).reshape(-1)
    df_tst["target"] = y_pred_tn

    preds.append(df_tst[["sample_id", "target"]])

submission_cnn = pd.concat(preds, axis=0).sort_values("sample_id")
submission_cnn.head()



>>> Prediciendo para Dry_Clover_g
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m3s[0m 3s/step

>>> Prediciendo para Dry_Dead_g
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 2s/step

>>> Prediciendo para Dry_Green_g
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 2s/step

>>> Prediciendo para Dry_Total_g
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 2s/step

>>> Prediciendo para GDM_g
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 2s/step


Unnamed: 0,sample_id,target
0,ID1001187975__Dry_Clover_g,7.893603
1,ID1001187975__Dry_Dead_g,12.304406
2,ID1001187975__Dry_Green_g,22.664375
3,ID1001187975__Dry_Total_g,44.478161
4,ID1001187975__GDM_g,26.902075


In [9]:
submission_cnn.to_csv("submission_cnn_per_target.csv", index=False)
print("‚úÖ submission_cnn_per_target.csv generado.")


‚úÖ submission_cnn_per_target.csv generado.
