In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Lambda
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import re

In [2]:
data=""" We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""

In [3]:
sentences=data.split(".")
sentences


[' We are about to study the idea of a computational process',
 '\nComputational processes are abstract beings that inhabit computers',
 '\nAs they evolve, processes manipulate other abstract things called data',
 '\nThe evolution of a process is directed by a pattern of rules\ncalled a program',
 ' People create programs to direct processes',
 ' In effect,\nwe conjure the spirits of the computer with our spells',
 '']

In [7]:
clean_sentence=[]

for sentence in sentences:
    if sentence=="" :
            continue

    sentence=re.sub("[^A-Z0-9a-z]+", " ",sentence)

    sentence=re.sub(r"(?:^| )\w(?:$| )"," ",sentence).strip()

    sentence=sentence.lower()
    clean_sentence.append(sentence)

clean_sentence


['we are about to study the idea of computational process',
 'computational processes are abstract beings that inhabit computers',
 'as they evolve processes manipulate other abstract things called data',
 'the evolution of process is directed by pattern of rules called program',
 'people create programs to direct processes',
 'in effect we conjure the spirits of the computer with our spells']

In [8]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(clean_sentence)
sequences=tokenizer.texts_to_sequences(clean_sentence)
sequences

[[4, 5, 11, 6, 12, 1, 13, 2, 7, 8],
 [7, 3, 5, 9, 14, 15, 16, 17],
 [18, 19, 20, 3, 21, 22, 9, 23, 10, 24],
 [1, 25, 2, 8, 26, 27, 28, 29, 2, 30, 10, 31],
 [32, 33, 34, 6, 35, 3],
 [36, 37, 4, 38, 1, 39, 2, 1, 40, 41, 42, 43]]

In [9]:
index_to_word_map={}
word_to_index_map={}

for i, sequence in enumerate(sequences):
    words_in_sentence=clean_sentence[i].split()

    for j, value in enumerate(sequence):      #j represents the index within the sentence, and value is the word's numerical index from sequence.
        index_to_word_map[value]=words_in_sentence[j]
        word_to_index_map[words_in_sentence[j]]=value

index_to_word_map
word_to_index_map

{'we': 4,
 'are': 5,
 'about': 11,
 'to': 6,
 'study': 12,
 'the': 1,
 'idea': 13,
 'of': 2,
 'computational': 7,
 'process': 8,
 'processes': 3,
 'abstract': 9,
 'beings': 14,
 'that': 15,
 'inhabit': 16,
 'computers': 17,
 'as': 18,
 'they': 19,
 'evolve': 20,
 'manipulate': 21,
 'other': 22,
 'things': 23,
 'called': 10,
 'data': 24,
 'evolution': 25,
 'is': 26,
 'directed': 27,
 'by': 28,
 'pattern': 29,
 'rules': 30,
 'program': 31,
 'people': 32,
 'create': 33,
 'programs': 34,
 'direct': 35,
 'in': 36,
 'effect': 37,
 'conjure': 38,
 'spirits': 39,
 'computer': 40,
 'with': 41,
 'our': 42,
 'spells': 43}

In [10]:
vocab_size=len(tokenizer.word_index)+1
emb_size=10
context_size=2

In [11]:
contexts=[]
targets=[]
for sequence in sequences:
  for i in range(context_size, len(sequence)-context_size):
    target=sequence[i]
    context=sequence[i-2],sequence[i-1],sequence[i+1],sequence[i+2]
    contexts.append(context)
    targets.append(target)


for i in range(5):
  words=[]

  target=index_to_word_map.get(targets[i])

  for j in contexts[i]:
    words.append(index_to_word_map.get(j))

  print(words ,"=>", target)


['we', 'are', 'to', 'study'] => about
['are', 'about', 'study', 'the'] => to
['about', 'to', 'the', 'idea'] => study
['to', 'study', 'idea', 'of'] => the
['study', 'the', 'of', 'computational'] => idea


In [12]:
X=np.array(contexts)
Y=np.array(targets)

In [13]:
model=Sequential([
    Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=2*context_size),
    Lambda(lambda x: tf.reduce_mean(x, axis=1) ),
    Dense(256, activation='relu'),
    Dense(512, activation='relu'),
    Dense(units=vocab_size,activation='softmax')
])



In [14]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [15]:
model.fit(X,Y,epochs=200)

Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.0300 - loss: 3.7841
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.1501 - loss: 3.7757
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1097 - loss: 3.7695
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1097 - loss: 3.7623
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.1201 - loss: 3.7529
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1201 - loss: 3.7419 
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1201 - loss: 3.7303
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1201 - loss: 3.7162 
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x78469bfd07f0>

In [18]:
test_sentenses = [
    "we are to study",
    "create programs direct processes",
    "spirits process study program",
    "idea study people create"
]

In [20]:
for test_sentense in test_sentenses:
    test_words = test_sentense.split(" ")

    x_test = []
    for i in test_words:
        x_test.append(word_to_index_map.get(i))
    x_test = np.array([x_test])

    test_predictions = model.predict(x_test)
    y_pred = np.argmax(test_predictions[0])
    print("Predictons: ", test_words, " => ", index_to_word_map.get(y_pred))
    print("\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
Predictons:  ['we', 'are', 'to', 'study']  =>  about


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predictons:  ['create', 'programs', 'direct', 'processes']  =>  to


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predictons:  ['spirits', 'process', 'study', 'program']  =>  about


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predictons:  ['idea', 'study', 'people', 'create']  =>  to


