In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Lambda
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import re


data=""" We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""


sentences=data.split(".")
sentences


clean_sentence=[]

for sentence in sentences:
    if sentence=="" :
            continue

    sentence=re.sub("[^A-Z0-9a-z]+", " ",sentence)

    sentence=re.sub(r"(?:^| )\w(?:$| )"," ",sentence).strip()

    sentence=sentence.lower()
    clean_sentence.append(sentence)

clean_sentence


tokenizer=Tokenizer()
tokenizer.fit_on_texts(clean_sentence)
sequences=tokenizer.texts_to_sequences(clean_sentence)
sequences

index_to_word_map={}
word_to_index_map={}

for i, sequence in enumerate(sequences):
    words_in_sentence=clean_sentence[i].split()

    for j, value in enumerate(sequence):      #j represents the index within the sentence, and value is the word's numerical index from sequence.
        index_to_word_map[value]=words_in_sentence[j]
        word_to_index_map[words_in_sentence[j]]=value

index_to_word_map
word_to_index_map


vocab_size=len(tokenizer.word_index)+1
emb_size=10
context_size=2


contexts=[]
targets=[]
for sequence in sequences:
  for i in range(context_size, len(sequence)-context_size):
    target=sequence[i]
    context=sequence[i-2],sequence[i-1],sequence[i+1],sequence[i+2]
    contexts.append(context)
    targets.append(target)


for i in range(5):
  words=[]

  target=index_to_word_map.get(targets[i])

  for j in contexts[i]:
    words.append(index_to_word_map.get(j))

  print(words ,"=>", target)



X=np.array(contexts)
Y=np.array(targets)

model=Sequential([
    Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=2*context_size),
    Lambda(lambda x: tf.reduce_mean(x, axis=1) ),
    Dense(256, activation='relu'),
    Dense(512, activation='relu'),
    Dense(units=vocab_size,activation='softmax')
])


model.compile(loss='sparse_categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

model.fit(X,Y,epochs=200)

test_sentenses = [
    "we are to study",
    "create programs direct processes",
    "spirits process study program",
    "idea study people create"
]


for test_sentense in test_sentenses:
    test_words = test_sentense.split(" ")

    x_test = []
    for i in test_words:
        x_test.append(word_to_index_map.get(i))
    x_test = np.array([x_test])

    test_predictions = model.predict(x_test)
    y_pred = np.argmax(test_predictions[0])
    print("Predictons: ", test_words, " => ", index_to_word_map.get(y_pred))
    print("\n")


