# CREAR UNA RED NEURONAL DE EMBEDDINGS CON PYTORCH

# PASO 1 - IMPORTAMOS LIBRERIAS

In [1]:
import torch
import torch.nn as nn

# PASO 2 - CREAMOS DATASET DE EJEMPLO

In [2]:
# Dataset de ejemplo
sentences = [
    ["I", "love", "deep", "learning"],
    ["I", "love", "machine", "learning"],
    ["deep", "learning", "is", "fun"]
]

In [3]:
# Crear un vocabulario simple
vocab = {"<PAD>": 0, "I": 1, "love": 2, "deep": 3, "learning": 4, "machine": 5, "is": 6, "fun": 7}

In [4]:
# Convertir las palabras a índices en el vocabulario
indexed_sentences = [[vocab[word] for word in sentence] for sentence in sentences]

In [5]:
print("Indexed sentences:", indexed_sentences)

Indexed sentences: [[1, 2, 3, 4], [1, 2, 5, 4], [3, 4, 6, 7]]


In [6]:
# Parámetros de la capa de embeddings
embedding_dim = 8
vocab_size = len(vocab)

# CREAMOS UN CAPA PARA LOS EMBEDDINGS

In [7]:
# Crear la capa de embeddings
embedding_layer = nn.Embedding(vocab_size, embedding_dim)
# Ver los parámetros de la capa de embeddings
print("Embeddings matrix shape:", embedding_layer.weight.shape)

Embeddings matrix shape: torch.Size([8, 8])


In [8]:
# Convertir las frases indexadas en tensores de PyTorch
input_tensor = torch.tensor(indexed_sentences)

In [9]:
# Obtener las representaciones de las palabras
embedded_sentences = embedding_layer(input_tensor)

In [10]:
print("Embedded sentences shape:", embedded_sentences.shape)
print("Embedded sentences:", embedded_sentences)

Embedded sentences shape: torch.Size([3, 4, 8])
Embedded sentences: tensor([[[ 1.9598e+00, -8.3978e-01, -6.2272e-01, -1.8037e-01,  5.8631e-01,
          -4.8347e-01,  4.9962e-01,  2.1633e-01],
         [-2.6413e-01, -1.2024e-01,  1.9054e+00, -4.6415e-02,  8.0756e-01,
          -1.2444e+00,  1.4288e+00, -1.4756e+00],
         [ 2.0102e+00,  2.5519e+00, -1.4236e+00, -8.2239e-01,  4.1805e-01,
          -2.5368e+00,  3.8962e-01, -3.0630e-01],
         [-1.2283e+00,  3.1134e-01, -8.0621e-01,  2.0322e-03, -1.8690e-01,
          -2.3735e+00, -2.1261e-01,  3.0772e-01]],

        [[ 1.9598e+00, -8.3978e-01, -6.2272e-01, -1.8037e-01,  5.8631e-01,
          -4.8347e-01,  4.9962e-01,  2.1633e-01],
         [-2.6413e-01, -1.2024e-01,  1.9054e+00, -4.6415e-02,  8.0756e-01,
          -1.2444e+00,  1.4288e+00, -1.4756e+00],
         [-2.1859e+00, -6.9722e-01,  1.2839e+00, -7.7897e-01,  1.5431e+00,
           8.4018e-01,  8.3933e-01, -2.1179e+00],
         [-1.2283e+00,  3.1134e-01, -8.0621e-01,  2.032

# CREAMOS AL RED NEURONAL DE EMBEDDING

In [11]:
class SimpleModel(nn.Module):
  def __init__(self,vocab_size,embedding_dim):
    super(SimpleModel,self).__init__()
    self.embeddings = nn.Embedding(vocab_size,embedding_dim)
    self.fc = nn.Linear(embedding_dim,1) # clasificador básico

  def forward(self,x):
    x = self.embeddings(x)
    x = x.mean(dim=1)
    return self.fc(x)

# CONFIGURAMOS EL MODELO DE RED NEURONAL

In [13]:
model = SimpleModel(vocab_size,embedding_dim)
output = model(input_tensor)
print("Output model:", output)
print("Output shape :",output.shape)

Output model: tensor([[ 0.3041],
        [ 0.3237],
        [-0.0665]], grad_fn=<AddmmBackward0>)
Output shape : torch.Size([3, 1])
