<a href="https://colab.research.google.com/github/Matte920/DeepLearning_PracticaFinal/blob/main/Hito1_Modelo1D_Tabular.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers


df = pd.read_csv("HAM10000_metadata.csv")


X = df[["sex", "age", "localization"]].copy()
y = df["dx"].copy()

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_int = le.fit_transform(y)
y_oh = keras.utils.to_categorical(y_int, num_classes=len(le.classes_))


X["sex"] = X["sex"].fillna("unknown")
X["localization"] = X["localization"].fillna("unknown")
X["age"] = X["age"].fillna(X["age"].median())

X = pd.get_dummies(X, columns=["sex", "localization"])

X = X.astype("float32")

X_train, X_test, y_train, y_test = train_test_split(
    X.values, y_oh, test_size=0.2, random_state=42, stratify=y_int
)


scaler = StandardScaler()
X_train[:, 0:1] = scaler.fit_transform(X_train[:, 0:1])
X_test[:, 0:1] = scaler.transform(X_test[:, 0:1])


input_dim = X_train.shape[1]
model_tab = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(32, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(len(le.classes_), activation="softmax")
])

model_tab.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

history = model_tab.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=64,
    verbose=1
)

test_loss, test_acc = model_tab.evaluate(X_test, y_test, verbose=0)
print("Test accuracy (tabular):", test_acc)


Epoch 1/20
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5139 - loss: 1.6075 - val_accuracy: 0.6856 - val_loss: 0.9781
Epoch 2/20
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6667 - loss: 1.0809 - val_accuracy: 0.6974 - val_loss: 0.9220
Epoch 3/20
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6801 - loss: 1.0103 - val_accuracy: 0.6981 - val_loss: 0.9035
Epoch 4/20
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6919 - loss: 0.9855 - val_accuracy: 0.6968 - val_loss: 0.8930
Epoch 5/20
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6825 - loss: 0.9944 - val_accuracy: 0.7006 - val_loss: 0.8901
Epoch 6/20
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6971 - loss: 0.9688 - val_accuracy: 0.6981 - val_loss: 0.8874
Epoch 7/20
[1m101/101[0m 

El modelo 1D basado únicamente en los datos tabulares (sexo, edad, localización) obtiene una accuracy de 0.70 en el conjunto de test.
Cumple el objetivo del Hito 1 y servirá como punto de comparación para los modelos basados en imágenes.



