In [1]:
faqs = """The major portion of the brain is the cerebrum, which divides the left and right cerebral hemispheres, both of which have numerous folds and convolutions present on their surface. 
Between these convolutions are ridges known as gyri. 
Small grooves that are present between the gyri are known as the plural of sulcus or sulci, whereas larger grooves are referred to as fissures.
The right and left cerebral hemispheres, both of which are covered in the cerebral cortex that is otherwise known as gray matter, are joined together by the corpus callosum. 
Whereas the left hemisphere controls speech and abstract thinking, the right hemisphere controls spatial thinking.
The frontal, parietal, temporal, and occipital lobes are the four lobes that make up the cerebrum. 
The frontal lobes, which are present directly behind the forehead, are the largest lobes of the human brain. 
The frontal lobes are primarily responsible for controlling language, motor function, and various cognitive processes including self-awareness, mood, affect, memory, attention, as well as both social and moral reasoning.
Within the frontal lobe is Broca’s area, which is responsible for speech production. 
The parietal lobes, who can be found near the center of the brain between the frontal and occipital lobes, are responsible for interpreting different sensory and memory functions.
The temporal lobes, which is commonly referred to as the neocortex, is located close to the base of the skull. 
Within the temporal lobe is the Wernicke area, which allows individuals to understand both spoken and written language. 
In addition to processing speech, the temporal lobe also processes sensory information that contributes to the retention of memories, languages, and emotions.
The fourth and final lobe of the cerebrum is the occipital lobe, which is the smallest lobe of the cerebrum and forms the caudal part of the brain. 
The primary function of the occipital lobe is the interpretation of visual information.The cerebellum and spinal cord are connected to the cerebral hemispheres by the brainstem. 
The brain stem can be classified into four distinct sections that include the diencephalon, midbrain, pons, and medulla oblongata. 
The diencephalon, which is the most superior portion of the brainstem, is further subdivided into four portions that include the epithalamus, subthalamus, hypothalamus, and thalamus.
The thalamus, which is the largest portion of the diencephalon, serves as a relay point for all sensory information that enters the cortex and eventually gets transmitted to the cerebrum for processing. 
The hypothalamus also processes incoming sensory information; however, all of the information processed by the hypothalamus is derived from the autonomic nervous system (ANS).
As a result, the hypothalamus maintains eating habits, sexual behavior, and sleep patterns in addition to maintaining an individual’s body temperature. 
Additionally, the secretions of the pituitary gland, which develops from a downward extension of the hypothalamus, is controlled by the hypothalamus.
The midbrain, which connects the diencephalon to the pons, controls ocular motion, whereas the pons is involved in the regulation of eye and facial movements, hearing and balance, as well as all sensory information processed by the facial nerves.
The medulla oblongata, which is located between the pons and the spinal cord and is therefore the most inferior portion of the brainstem, controls autonomic functions such as breathing, blood pressure, cardiac rhythms, and swallowing. 
Notably, brain death of patients in a clinical setting is declared when there is significant destruction of the medulla oblongata.

"""

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

In [3]:
tokenizer = Tokenizer()

In [4]:
tokenizer.fit_on_texts([faqs])

In [5]:
len(tokenizer.word_index)

232

In [6]:
input_sequences = []
for sentence in faqs.split('\n'):
  tokenized_sentence = tokenizer.texts_to_sequences([sentence])[0]

  for i in range(1,len(tokenized_sentence)):
    input_sequences.append(tokenized_sentence[:i+1])

In [7]:
input_sequences

[[1, 78],
 [1, 78, 21],
 [1, 78, 21, 3],
 [1, 78, 21, 3, 1],
 [1, 78, 21, 3, 1, 12],
 [1, 78, 21, 3, 1, 12, 4],
 [1, 78, 21, 3, 1, 12, 4, 1],
 [1, 78, 21, 3, 1, 12, 4, 1, 15],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32, 22],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32, 22, 33],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32, 22, 33, 23],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32, 22, 33, 23, 3],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32, 22, 33, 23, 3, 5],
 [1, 78, 21, 3, 1, 12, 4, 1, 15, 5, 79, 1, 31, 2, 32, 22, 33, 23, 3, 5, 80],
 [1,
  78,
  21,
  3,
  1,
  12,
  4,
  1,
  15,
  5,
  79,
  1,
  31,
  2,
  32,
  22,
  33,
  23,
  3,
  5,
  80,
  81],
 [1,
  78,
  21,

In [8]:
max_len = max([len(x) for x in input_sequences])

In [9]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded_input_sequences = pad_sequences(input_sequences, maxlen = max_len, padding='pre')

In [10]:
padded_input_sequences

array([[  0,   0,   0, ...,   0,   1,  78],
       [  0,   0,   0, ...,   1,  78,  21],
       [  0,   0,   0, ...,  78,  21,   3],
       ...,
       [  0,   0,   0, ..., 232,   3,   1],
       [  0,   0,   0, ...,   3,   1,  42],
       [  0,   0,   0, ...,   1,  42,  43]])

In [11]:
X = padded_input_sequences[:,:-1]

In [12]:
y = padded_input_sequences[:,-1]

In [13]:
X.shape

(538, 38)

In [14]:
y.shape

(538,)

In [15]:
from tensorflow.keras.utils import to_categorical
y = to_categorical(y,num_classes=233)

In [16]:
y.shape

(538, 233)

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [18]:
model = Sequential()
model.add(Embedding(233, 100, input_length=38))
model.add(LSTM(150))
model.add(Dense(233, activation='softmax'))



In [19]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [20]:
model.summary()

In [21]:
model.fit(X,y,epochs=100)

Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.0606 - loss: 5.4262
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.1083 - loss: 4.9170
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.1265 - loss: 4.7100
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.1190 - loss: 4.7605
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.1248 - loss: 4.7261
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.1004 - loss: 4.6860
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.1037 - loss: 4.6551
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.1191 - loss: 4.5297
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x228bdf5a790>

In [22]:
import time
import numpy as np
text = "the brainstem "

for i in range(100):
  # tokenize
  token_text = tokenizer.texts_to_sequences([text])[0]
  # padding
  padded_token_text = pad_sequences([token_text], maxlen=56, padding='pre')
  # predict
  pos = np.argmax(model.predict(padded_token_text))

  for word,index in tokenizer.word_index.items():
    if index == pos:
      text = text + " " + word
      print(text)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step
the brainstem  also
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
the brainstem  also processes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
the brainstem  also processes incoming
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
the brainstem  also processes incoming sensory
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
the brainstem  also processes incoming sensory information
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
the brainstem  also processes incoming sensory information however
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
the brainstem  also processes incoming sensory information however all
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
the brainstem  also processes incoming sensory information however all of
[1m1/1[0m [32

KeyboardInterrupt: 