In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import load_model, Model
from tensorflow.keras import layers, models, regularizers, Input
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K
import tensorflow as tf

IMG_SIZE = (96, 96)
IMG_SHAPE = (*IMG_SIZE, 3)
TARGETS = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']
AUTOENCODER_PATH = 'autoencoder_planttraits.keras'
TRAIN_CSV = 'train.csv'
TRAIN_IMG_DIR = 'train_images/'

df = pd.read_csv(TRAIN_CSV)

features = df.drop(columns=["id"]).iloc[:, :-6]
targets = df[TARGETS]
ids = df['id']

lower = features.quantile(0.05)
upper = features.quantile(0.95)
mask = ((features >= lower) & (features <= upper)).all(axis=1)

features = features[mask]
targets = targets[mask]
ids = ids[mask]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

pca = PCA(n_components=0.70)  # 70% varianza explicada
X_pca = pca.fit_transform(X_scaled)
print(f"Varianza explicada total por PCA: {np.sum(pca.explained_variance_ratio_):.2f}")

autoencoder = load_model(AUTOENCODER_PATH)
encoder_layer_output = autoencoder.layers[3].output  # Ajusta si la capa encoder es diferente
encoder = Model(inputs=autoencoder.input, outputs=encoder_layer_output)
encoder.trainable = False
X_encoded = encoder.predict(X_scaled)

def load_and_process_image(image_id, folder, target_size=IMG_SIZE):
    ext = ".jpg" if os.path.exists(os.path.join(folder, f"{image_id}.jpg")) else ".png"
    path = os.path.join(folder, f"{image_id}{ext}")
    img = load_img(path, target_size=target_size)
    return img_to_array(img) / 255.0

X_images = np.array([load_and_process_image(i, TRAIN_IMG_DIR) for i in ids])

X_img_train, X_img_val, X_tab_train, X_tab_val, y_train, y_val = train_test_split(
    X_images, X_encoded, targets.values, test_size=0.2, random_state=42
)

def r2_score(y_true, y_pred):
    ss_res = K.sum(K.square(y_true - y_pred))
    ss_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return 1 - ss_res / (ss_tot + K.epsilon())

def build_model(img_shape, tab_shape, output_shape):
    # Imagen (CNN)
    base_cnn = MobileNetV2(include_top=False, weights='imagenet', input_shape=img_shape)
    base_cnn.trainable = True
    cnn_model = models.Sequential([
        base_cnn,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))
    ])
    
    # Tabular (codificado)
    tab_input = Input(shape=tab_shape)
    tab_branch = models.Sequential([
        layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Dense(32, activation='relu')
    ])
    
    img_input = Input(shape=img_shape)
    cnn_out = cnn_model(img_input)
    tab_out = tab_branch(tab_input)
    
    fusion = layers.concatenate([cnn_out, tab_out])
    fusion = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(fusion)
    output = layers.Dense(output_shape)(fusion)
    
    model = models.Model(inputs=[img_input, tab_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=[
        tf.keras.metrics.RootMeanSquaredError(), r2_score
    ])
    return model

model = build_model(IMG_SHAPE, X_encoded.shape[1:], len(TARGETS))
early_stop = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

history = model.fit(
    [X_img_train, X_tab_train], y_train,
    validation_data=([X_img_val, X_tab_val], y_val),
    epochs=15,
    batch_size=32,
    callbacks=[early_stop]
)


Varianza explicada total por PCA: 0.74


ValueError: Input 0 of layer "functional" is incompatible with the layer: expected shape=(None, 512, 512, 3), found shape=(32, 169)