In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

with open("math.txt", 'r', encoding='utf-8') as myfile:
    mytext = myfile.read()

In [3]:
mytokenizer = Tokenizer()
mytokenizer.fit_on_texts([mytext])
total_words = len(mytokenizer.word_index) + 1

In [4]:
mytokenizer.word_index

{'the': 1,
 'a': 2,
 'of': 3,
 'in': 4,
 'and': 5,
 'for': 6,
 'find': 7,
 'solve': 8,
 'area': 9,
 'calculate': 10,
 'an': 11,
 'given': 12,
 'determine': 13,
 'equation': 14,
 'involving': 15,
 'unknown': 16,
 'its': 17,
 'with': 18,
 'radius': 19,
 'using': 20,
 'volume': 21,
 'you': 22,
 'height': 23,
 'function': 24,
 'how': 25,
 'do': 26,
 'surface': 27,
 'terms': 28,
 'length': 29,
 'to': 30,
 'trigonometric': 31,
 'system': 32,
 'equations': 33,
 'square': 34,
 'is': 35,
 'what': 36,
 'two': 37,
 'known': 38,
 'circle': 39,
 'base': 40,
 'cone': 41,
 'roots': 42,
 'side': 43,
 'sum': 44,
 'both': 45,
 'logarithmic': 46,
 'triangle': 47,
 'perimeter': 48,
 'derivative': 49,
 'functions': 50,
 'formula': 51,
 'exponential': 52,
 'value': 53,
 'sphere': 54,
 'probability': 55,
 'line': 56,
 'quadratic': 57,
 'rational': 58,
 'between': 59,
 'variable': 60,
 'angle': 61,
 'by': 62,
 'compute': 63,
 '𝑛': 64,
 'n': 65,
 'linear': 66,
 'when': 67,
 'prism': 68,
 'rectangular': 69,
 'f

In [5]:
my_input_sequences = []
for line in mytext.split('\n'):
    token_list = mytokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        my_n_gram_sequence = token_list[:i+1]
        my_input_sequences.append(my_n_gram_sequence)

In [6]:
max_sequence_len = max([len(seq) for seq in my_input_sequences])
input_sequences = np.array(pad_sequences(my_input_sequences, maxlen=max_sequence_len, padding='pre'))

In [7]:
input_sequences[1]

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  7,
        1, 71], dtype=int32)

In [8]:
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

In [9]:
X[1]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 1], dtype=int32)

In [10]:
y

array([  1,  71,   3, ...,   2, 198, 205], dtype=int32)

In [11]:
y = np.array(tf.keras.utils.to_categorical(y, num_classes=total_words))

In [12]:
y[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [13]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))
print(model.summary())

2024-12-01 17:31:07.351341: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-12-01 17:31:07.351374: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-12-01 17:31:07.351381: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-12-01 17:31:07.351628: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-01 17:31:07.352052: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 18, 100)           80100     
                                                                 
 lstm (LSTM)                 (None, 150)               150600    
                                                                 
 dense (Dense)               (None, 801)               120951    
                                                                 
Total params: 351651 (1.34 MB)
Trainable params: 351651 (1.34 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [14]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=100, verbose=1)

Epoch 1/100


2024-12-01 17:31:08.388320: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x312f9b4c0>

In [29]:
input_text = "Find the volume of a cone with "
predict_next_words= 6

for _ in range(predict_next_words):
    token_list = mytokenizer.texts_to_sequences([input_text])[0]
    print(token_list)
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list), axis=-1)
    output_word = ""
    for word, index in mytokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    input_text += " " + output_word

print(input_text)

[7, 1, 21, 3, 2, 41, 18]
[7, 1, 21, 3, 2, 41, 18, 2]
[7, 1, 21, 3, 2, 41, 18, 2, 12]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5, 86]
Find the volume of a cone with  a given radius and slant height


In [39]:
input_text = "Find the volume of a cone with "
predict_next_words= 10

for _ in range(predict_next_words) :
    token_list = mytokenizer.texts_to_sequences([input_text])[0]
    print(token_list)
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list), axis=-1)
    output_word = ""
    for word, index in mytokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    input_text += " " + output_word

print(input_text)

[7, 1, 21, 3, 2, 41, 18]
[7, 1, 21, 3, 2, 41, 18, 2]
[7, 1, 21, 3, 2, 41, 18, 2, 12]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5, 86]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5, 86, 23]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5, 86, 23, 5]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5, 86, 23, 5, 40]
[7, 1, 21, 3, 2, 41, 18, 2, 12, 19, 5, 86, 23, 5, 40, 19]
Find the volume of a cone with  a given radius and slant height and base radius and
