# CREACIÓN DE RED NEURONAL PARA GENERAR EMBEDDINGS CON TENSORFLOW

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
sentences = [
    "I love deep learning",
    "I love machine learning",
    "deep learning is fun"
]

# Tokenización de frases

In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

# Creamos el vocabulario

In [4]:
vocab_size = len(tokenizer.word_index) + 1  # Añadimos 1 para considerar el índice 0 (pad)
print("Vocabulary size:", vocab_size)
tokenizer.word_index

Vocabulary size: 8


{'learning': 1, 'i': 2, 'love': 3, 'deep': 4, 'machine': 5, 'is': 6, 'fun': 7}

# Convertir  frases a secuencias de índices

In [5]:
sequences = tokenizer.texts_to_sequences(sentences)

# Pad sequences para asegurar que todas las secuencias tengan la misma longitud

In [6]:
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

print("Padded sequences:", padded_sequences)

Padded sequences: [[2 3 4 1]
 [2 3 5 1]
 [4 1 6 7]]


# CREAMOS EL MODELO DE RED NEURONAL PARA EMBEDDING

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, GlobalAveragePooling1D

In [9]:
# Definir el modelo
embedding_dim = 8  # Dimensión de los embeddings (cada palabra será representada por un vector de 8 números)

model = Sequential()

# Capa de Embeddings
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))

# Capa de GlobalAveragePooling1D para hacer un resumen de la secuencia
model.add(GlobalAveragePooling1D())

# Capa densa para clasificación (en este caso, un ejemplo de regresión)
model.add(Dense(1, activation='sigmoid'))



# COMPILACIÓN DEL MODELO RNN

In [10]:
# Compilar el modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Resumen del modelo
model.summary()

# ENTRENAMOS EL MODELO

In [11]:
# Etiquetas de ejemplo para clasificación binaria (por ejemplo, 0 o 1)
labels = [1, 0, 1]  # Aquí se podría tener una etiqueta binaria para cada frase

# Convertir las etiquetas en un tensor
labels_tensor = tf.convert_to_tensor(labels)

# Entrenar el modelo
model.fit(padded_sequences, labels_tensor, epochs=5)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.6667 - loss: 0.6871
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.6667 - loss: 0.6859
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.6667 - loss: 0.6847
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.6667 - loss: 0.6835
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.6667 - loss: 0.6822


<keras.src.callbacks.history.History at 0x79fcc5154590>

In [12]:
# Obtener los embeddings aprendidos por la capa
embeddings = model.layers[0].get_weights()[0]
print("Embeddings learned by the model:")
print(embeddings)

Embeddings learned by the model:
[[ 0.02561725 -0.0476102  -0.03989649 -0.00169854  0.01034827 -0.02656494
   0.04809595 -0.04646671]
 [-0.01324978 -0.01181327  0.03928816 -0.01263829 -0.0093311  -0.03737538
   0.03459082 -0.04174419]
 [ 0.01256683 -0.03375946  0.02537136 -0.00834531 -0.00810079 -0.01732033
  -0.01514548 -0.04509872]
 [-0.03274475  0.00202931 -0.00540555 -0.04935608  0.0175754   0.01789708
  -0.0359697  -0.04419999]
 [ 0.05091565 -0.01345094 -0.0204044   0.01809205  0.04503427 -0.0295029
   0.05437132  0.00977688]
 [-0.0295023   0.00936521 -0.03221748 -0.02606449 -0.02413855 -0.05492676
  -0.04995645 -0.01647404]
 [ 0.04642065 -0.03591892 -0.04301032  0.01511547 -0.04381833  0.02638231
  -0.03489088  0.02451028]
 [ 0.03276312  0.01402203 -0.04695943  0.03606012  0.01020911  0.01180781
  -0.00050765 -0.02107422]]
