In [2]:
import pandas as pd
import matplotlib
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [3]:
corpus=[]
target=[]
edad=[]
sexo=[]
severidad=[]
archivo=open("datasets/dataset_elpino.csv",encoding="utf-8") #pon aqui la ruta de tu dataset siendo el root donde se encuentra este codigo
header=archivo.readline().strip().split(";")
features=[]
for col in header:
    col=col.split("-")[0].strip()
    if col.startswith("Diag") or col.startswith("Proc"):
        col=col.split(" ")
        col=col[0]+col[1]
    features.append(col)
for linea in archivo:
    row=[]
    linea=linea.strip().split(";")
    for i in range(len(linea)):
        col=linea[i].split("-")[0].strip()
        if i==67:
            grd=col
            #target.append(grd[-1])
            target.append(grd)
        elif i==66:
            sexo.append(1 if col=="Mujer" else 0)
        elif i==65:
            edad.append(int(col))
        else:
            row.append(col)
    corpus.append(row)

archivo.close()

In [4]:
df=pd.DataFrame(corpus,columns=features[:-3])
df["GRD"]=target

In [5]:
#Junta todas las columnas de codigos en una sola lista
code_columns = [col for col in df.columns if col.startswith("Diag") or col.startswith("Proc")]

#Aplanar los codigos para construir el vocabulario
all_codes = df[code_columns].values.flatten()
unique_codes = pd.Series(all_codes).dropna().unique().tolist()

In [6]:
#Creacion del vocabulario
lookup_layer = tf.keras.layers.StringLookup(vocabulary=unique_codes, oov_token="[UNK]")

#Codigos a tensores de string y aplica StringLookup
X_codes_str = tf.constant(df[code_columns].astype(str).values)
X_codes_idx = lookup_layer(X_codes_str)

#Padding
X_padded = pad_sequences(X_codes_idx.numpy(), padding='post')

#Codifica los GRD como enteros
grd_lookup = tf.keras.layers.StringLookup(oov_token="[UNK]")
grd_lookup.adapt(df["GRD"])
y = grd_lookup(df["GRD"])

In [7]:
#Separar datos
X_train, X_test, y_train, y_test = train_test_split(X_padded, y.numpy(), test_size=0.3, random_state=42)


#Define modelo en Keras
vocab_size = lookup_layer.vocabulary_size()
num_classes = grd_lookup.vocabulary_size()

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=64),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [8]:
#Entrenar modelo
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=120, batch_size=64)

#Evaluar modelo
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy en test: {accuracy:.2%}")

Epoch 1/120
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.0453 - loss: 5.6137 - val_accuracy: 0.0529 - val_loss: 5.1565
Epoch 2/120
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.0670 - loss: 5.0094 - val_accuracy: 0.0932 - val_loss: 4.6855
Epoch 3/120
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.1078 - loss: 4.4652 - val_accuracy: 0.1291 - val_loss: 4.2605
Epoch 4/120
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.1341 - loss: 4.0516 - val_accuracy: 0.1215 - val_loss: 4.0778
Epoch 5/120
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.1342 - loss: 3.9293 - val_accuracy: 0.1552 - val_loss: 3.9271
Epoch 6/120
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.1585 - loss: 3.6825 - val_accuracy: 0.1598 - val_loss: 3.7736
Epoch 7/120
[1m

In [10]:
#Guarda el modelo
model.save("modelo_general.keras")

In [None]:
from sklearn.metrics import classification_report
import numpy as np

y_pred = model.predict(X_test)
# Convierte predicciones a labels de clase discreta
y_pred_classes = np.argmax(y_pred, axis=1)  # Clase con mayor probabilidad

#clasificacion
print(f"Reporte clasificacion:\n{classification_report(y_test, y_pred_classes)}")