In [2]:
from numpy import array
from tensorflow.keras.preprocessing import text
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten, Embedding
from keras.callbacks import LambdaCallback
from tensorflow.keras.models import Model

In [3]:
# define documents
docs = ['Well done!',
		'Good work',
		'Great effort',
		'nice work',
		'Excellent!',
		'Weak',
		'Poor effort!',
		'not good',
		'poor work',
		'Could have done better.']
# define class labels
labels = array([1,1,1,1,1,0,0,0,0,0])

In [4]:
# tokenize documents
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(docs)
word2idx = tokenizer.word_index

# integer encode the documents
encoded_docs = [[word2idx[w] for w in text.text_to_word_sequence(doc)] for doc in docs]
print(encoded_docs)

# pad documents to a max length of 4 words
max_length = 4
vocab_size = 15 
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

[[6, 2], [3, 1], [7, 4], [8, 1], [9], [10], [5, 4], [11, 3], [5, 1], [12, 13, 2, 14]]
[[ 6  2  0  0]
 [ 3  1  0  0]
 [ 7  4  0  0]
 [ 8  1  0  0]
 [ 9  0  0  0]
 [10  0  0  0]
 [ 5  4  0  0]
 [11  3  0  0]
 [ 5  1  0  0]
 [12 13  2 14]]


In [5]:
# define the model
model = Sequential()
model.add(Embedding(vocab_size, 2, input_length=max_length ))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# summarize the model



In [6]:
model.summary()

In [9]:
# fit the model
model.fit(padded_docs, labels, epochs=10, verbose=1 )
# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6000 - loss: 0.6825
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6000 - loss: 0.6817
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.6000 - loss: 0.6809
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7000 - loss: 0.6801
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7000 - loss: 0.6793
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8000 - loss: 0.6785
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9000 - loss: 0.6777
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9000 - loss: 0.6769
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [10]:
embedding_layer = model.layers[0]
model2 = Sequential()
model2.add(model.layers[0])

In [13]:
i=4
model2.predict(padded_docs[i:i+1]), padded_docs[i:i+1], docs[i:i+1]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


(array([[[ 0.00501636,  0.0392581 ],
         [-0.02427742, -0.03882178],
         [-0.00132937,  0.02699922],
         [-0.00132937,  0.02699922]]], dtype=float32),
 array([[7, 4, 0, 0]]),
 ['Great effort'])

In [15]:
embedding_layer.weights[0].shape

TensorShape([15, 2])

# Positional Encoding

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Input, Add
from tensorflow.keras.models import Model

In [None]:
def positional_encoding(max_length, d_model):
    pos = np.arange(max_length)[:, np.newaxis]
    i = np.arange(d_model)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    angle_rads = pos * angle_rates

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)


In [None]:
# Example parameters
vocab_size = 100  # Example vocab size
max_length = 10   # Example maximum sequence length
embedding_dim = 16  # Example embedding dimension

In [None]:
# Define input
inputs = Input(shape=(max_length,))

# Embedding layer
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length)
embeddings = embedding_layer(inputs)

# Generate positional encodings
pos_encoding = positional_encoding(max_length, embedding_dim)

# Add positional encodings to embeddings
positional_embeddings = Add()([embeddings, pos_encoding[:, :max_length, :]])

# Define the model that outputs positional embeddings
embedding_model = Model(inputs=inputs, outputs=positional_embeddings)
embedding_model.summary()


# Positional Embedding

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Add, Input
from tensorflow.keras.models import Model

In [None]:
vocab_size = 100  # Example vocab size
max_length = 10   # Example maximum sequence length
embedding_dim = 16  # Example embedding dimension

In [None]:
inputs = Input(shape=(max_length,))

# Token embeddings
token_embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim)
token_embeddings = token_embedding_layer(inputs)

# Positional embeddings
position_embedding_layer = Embedding(input_dim=max_length, output_dim=embedding_dim)
position_indices = tf.range(start=0, limit=max_length, delta=1)
position_indices = position_indices[tf.newaxis, :]
position_embeddings = position_embedding_layer(position_indices)

# Add token embeddings and positional embeddings
embeddings = Add()([token_embeddings, position_embeddings])

# Define the model that outputs combined embeddings
embedding_model = Model(inputs=inputs, outputs=embeddings)
embedding_model.summary()


# Summary
## Positional Encoding:
* Deterministic, fixed patterns using sine and cosine functions.
* Not learned, and the same for any given position across different runs.
* Introduced in the original transformer architecture.
## Positional Embedding:
* Learned embeddings for positions, similar to how token embeddings are learned.
* Trainable, can adapt and improve during training based on the data.
* Potentially more flexible and powerful, as they can capture more nuanced positional relationships.
# When to Use Each
## Positional Encoding: 
### Suitable when you want a simple, deterministic method to inject positional information, ensuring that the model has a clear and periodic positional signal.

## ositional Embedding: 
### Useful when the positional relationships are complex and might benefit from being learned from data. They can adapt to specific tasks and datasets, potentially leading to better performance.

# What GPT-3 is using
* https://chatgpt.com/share/e0209da0-1049-4632-a52c-31c592cc0bc7