In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Lambda
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import re

In [None]:
data=""" We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""

In [None]:
sentences=data.split(".")
sentences

[' We are about to study the idea of a computational process',
 '\nComputational processes are abstract beings that inhabit computers',
 '\nAs they evolve, processes manipulate other abstract things called data',
 '\nThe evolution of a process is directed by a pattern of rules\ncalled a program',
 ' People create programs to direct processes',
 ' In effect,\nwe conjure the spirits of the computer with our spells',
 '']

In [None]:
clean_sentence=[]

for sentence in sentences:
    if sentence=="" :
            continue

    sentence=re.sub("[^A-Z0-9a-z]+", " ",(sentence) )

    sentence=re.sub("r[?:^| ]w/[?:$| ]"," ",(sentence)).strip()

    sentence=sentence.lower()
    clean_sentence.append(sentence)

clean_sentence


['we are about to study the idea of a computational process',
 'computational processes are abstract beings that inhabit computers',
 'as they evolve processes manipulate other abstract things called data',
 'the evolution of a process is directed by a pattern of rules called a program',
 'people create programs to direct processes',
 'in effect we conjure the spirits of the computer with our spells']

In [None]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(clean_sentence)
sequences=tokenizer.texts_to_sequences(clean_sentence)
sequences

[[5, 6, 12, 7, 13, 1, 14, 2, 3, 8, 9],
 [8, 4, 6, 10, 15, 16, 17, 18],
 [19, 20, 21, 4, 22, 23, 10, 24, 11, 25],
 [1, 26, 2, 3, 9, 27, 28, 29, 3, 30, 2, 31, 11, 3, 32],
 [33, 34, 35, 7, 36, 4],
 [37, 38, 5, 39, 1, 40, 2, 1, 41, 42, 43, 44]]

In [None]:
index_to_word_map={}
word_to_index_map={}

for i, sequence in enumerate(sequences):
    words_in_sentence=clean_sentence[i].split()

    for j, value in enumerate(sequence):
        index_to_word_map[value]=words_in_sentence[j]
        word_to_index_map[words_in_sentence[j]]=value

index_to_word_map
word_to_index_map

{'we': 5,
 'are': 6,
 'about': 12,
 'to': 7,
 'study': 13,
 'the': 1,
 'idea': 14,
 'of': 2,
 'a': 3,
 'computational': 8,
 'process': 9,
 'processes': 4,
 'abstract': 10,
 'beings': 15,
 'that': 16,
 'inhabit': 17,
 'computers': 18,
 'as': 19,
 'they': 20,
 'evolve': 21,
 'manipulate': 22,
 'other': 23,
 'things': 24,
 'called': 11,
 'data': 25,
 'evolution': 26,
 'is': 27,
 'directed': 28,
 'by': 29,
 'pattern': 30,
 'rules': 31,
 'program': 32,
 'people': 33,
 'create': 34,
 'programs': 35,
 'direct': 36,
 'in': 37,
 'effect': 38,
 'conjure': 39,
 'spirits': 40,
 'computer': 41,
 'with': 42,
 'our': 43,
 'spells': 44}

In [None]:
vocab_size=len(tokenizer.word_index)+1
emb_size=10
context_size=2

In [None]:
contexts=[]
targets=[]
for sequence in sequences:
  for i in range(context_size, len(sequence)-context_size):
    target=sequence[i]
    context=sequence[i-2],sequence[i-1],sequence[i+1],sequence[i+2]
    contexts.append(context)
    targets.append(target)


for i in range(5):
  words=[]

  target=index_to_word_map.get(targets[i])

  for j in contexts[i]:
    words.append(index_to_word_map.get(j))

  print(words ,"=>", target)


['we', 'are', 'to', 'study'] => about
['are', 'about', 'study', 'the'] => to
['about', 'to', 'the', 'idea'] => study
['to', 'study', 'idea', 'of'] => the
['study', 'the', 'of', 'a'] => idea


In [None]:
X=np.array(contexts)
Y=np.array(targets)

In [None]:
model=Sequential([
    Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=2*context_size),
    Lambda(lambda x: tf.reduce_mean(x, axis=1) ),
    Dense(256, activation='relu'),
    Dense(512, activation='relu'),
    Dense(units=vocab_size,activation='softmax')
])



In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [None]:
model.fit(X,Y,epochs=200)

Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.0000e+00 - loss: 3.8063
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1118 - loss: 3.7971 
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1014 - loss: 3.7884 
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1118 - loss: 3.7780 
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1118 - loss: 3.7657 
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1014 - loss: 3.7515 
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1118 - loss: 3.7341 
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0806 - loss: 3.7141 
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x7f56b81e35b0>

In [None]:
test_sentenses = [
    "we are to study",
    "create programs direct processes",
    "spirits process study program",
    "idea study people create"
]

In [None]:
for test_sentense in test_sentences:
    test_words = test_sentense.split(" ")

    x_test = []
    for i in test_words:
        index = word_to_index_map.get(i, 0)  # Using 0 as the default index for unknown words
        x_test.append(index)
    x_test = np.array([x_test])

    test_predictions = model.predict(x_test)
    y_pred = np.argmax(test_predictions[0])
    print("Predictons: ", test_words, " => ", index_to_word_map.get(y_pred))
    print("\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Predictons:  ['known', 'as', 'structured', 'learning']  =>  of


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predictons:  ['transformers', 'have', 'applied', 'to']  =>  about


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Predictons:  ['where', 'they', 'produced', 'results']  =>  of


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predictons:  ['cases', 'surpassing', 'expert', 'performance']  =>  idea


