In [3]:
!pip install tensorflow



In [5]:
from numpy import array
import tensorflow
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

In [8]:
text = ''' I specialize in leveraging the power of artificial intelligence to solve complex problems and create innovative solutions.
My expertise lies in building intelligent systems that generate content, automate tasks, and optimize workflows across industries.
From designing state-of-the-art machine learning models to developing advanced NLP systems, my work integrates the latest AI technologies with practical applications.
'''

**Prepare Data**

In [9]:
tokens = text.split()
raw_text = ' '.join(tokens)
print(raw_text)

I specialize in leveraging the power of artificial intelligence to solve complex problems and create innovative solutions. My expertise lies in building intelligent systems that generate content, automate tasks, and optimize workflows across industries. From designing state-of-the-art machine learning models to developing advanced NLP systems, my work integrates the latest AI technologies with practical applications.


**Prepare the Vocabulary**

In [10]:
chars= sorted(list(set(text)))
print(chars)

['\n', ' ', ',', '-', '.', 'A', 'F', 'I', 'L', 'M', 'N', 'P', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [13]:
# c is char, i is index
mapping = dict((c, i) for i, c in enumerate(chars))
print(mapping)
vocab = len(mapping)
print(vocab)

{'\n': 0, ' ': 1, ',': 2, '-': 3, '.': 4, 'A': 5, 'F': 6, 'I': 7, 'L': 8, 'M': 9, 'N': 10, 'P': 11, 'a': 12, 'b': 13, 'c': 14, 'd': 15, 'e': 16, 'f': 17, 'g': 18, 'h': 19, 'i': 20, 'k': 21, 'l': 22, 'm': 23, 'n': 24, 'o': 25, 'p': 26, 'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32, 'x': 33, 'y': 34, 'z': 35}
36


In [15]:
length = 10
sequences = list()
for i in range(length, len(raw_text)):
  seq = raw_text[i-length:i+1]
  sequences.append(seq)
  print(sequences)
  print('Total Sequences: %d' % len(sequences))

['I specializ']
Total Sequences: 1
['I specializ', ' specialize']
Total Sequences: 2
['I specializ', ' specialize', 'specialize ']
Total Sequences: 3
['I specializ', ' specialize', 'specialize ', 'pecialize i']
Total Sequences: 4
['I specializ', ' specialize', 'specialize ', 'pecialize i', 'ecialize in']
Total Sequences: 5
['I specializ', ' specialize', 'specialize ', 'pecialize i', 'ecialize in', 'cialize in ']
Total Sequences: 6
['I specializ', ' specialize', 'specialize ', 'pecialize i', 'ecialize in', 'cialize in ', 'ialize in l']
Total Sequences: 7
['I specializ', ' specialize', 'specialize ', 'pecialize i', 'ecialize in', 'cialize in ', 'ialize in l', 'alize in le']
Total Sequences: 8
['I specializ', ' specialize', 'specialize ', 'pecialize i', 'ecialize in', 'cialize in ', 'ialize in l', 'alize in le', 'lize in lev']
Total Sequences: 9
['I specializ', ' specialize', 'specialize ', 'pecialize i', 'ecialize in', 'cialize in ', 'ialize in l', 'alize in le', 'lize in lev', 'ize in l

**Encode the Text as Integer**

In [16]:
new_sequence = list()
for seq in sequences:
  new_sequence.append([mapping[char] for char in seq])
print(new_sequence)

[[7, 1, 28, 26, 16, 14, 20, 12, 22, 20, 35], [1, 28, 26, 16, 14, 20, 12, 22, 20, 35, 16], [28, 26, 16, 14, 20, 12, 22, 20, 35, 16, 1], [26, 16, 14, 20, 12, 22, 20, 35, 16, 1, 20], [16, 14, 20, 12, 22, 20, 35, 16, 1, 20, 24], [14, 20, 12, 22, 20, 35, 16, 1, 20, 24, 1], [20, 12, 22, 20, 35, 16, 1, 20, 24, 1, 22], [12, 22, 20, 35, 16, 1, 20, 24, 1, 22, 16], [22, 20, 35, 16, 1, 20, 24, 1, 22, 16, 31], [20, 35, 16, 1, 20, 24, 1, 22, 16, 31, 16], [35, 16, 1, 20, 24, 1, 22, 16, 31, 16, 27], [16, 1, 20, 24, 1, 22, 16, 31, 16, 27, 12], [1, 20, 24, 1, 22, 16, 31, 16, 27, 12, 18], [20, 24, 1, 22, 16, 31, 16, 27, 12, 18, 20], [24, 1, 22, 16, 31, 16, 27, 12, 18, 20, 24], [1, 22, 16, 31, 16, 27, 12, 18, 20, 24, 18], [22, 16, 31, 16, 27, 12, 18, 20, 24, 18, 1], [16, 31, 16, 27, 12, 18, 20, 24, 18, 1, 29], [31, 16, 27, 12, 18, 20, 24, 18, 1, 29, 19], [16, 27, 12, 18, 20, 24, 18, 1, 29, 19, 16], [27, 12, 18, 20, 24, 18, 1, 29, 19, 16, 1], [12, 18, 20, 24, 18, 1, 29, 19, 16, 1, 26], [18, 20, 24, 18, 1, 

In [23]:
import numpy
sequences = numpy.array(new_sequence)
x,y = sequences[:,:-1], sequences[:,-1] #we are not taking the last one in x which we will take in y

In [24]:
x


array([[ 7,  1, 28, ..., 12, 22, 20],
       [ 1, 28, 26, ..., 22, 20, 35],
       [28, 26, 16, ..., 20, 35, 16],
       ...,
       [12, 26, 26, ..., 29, 20, 25],
       [26, 26, 22, ..., 20, 25, 24],
       [26, 22, 20, ..., 25, 24, 28]])

In [19]:
y

array([35, 16,  1, 20, 24,  1, 22, 16, 31, 16, 27, 12, 18, 20, 24, 18,  1,
       29, 19, 16,  1, 26, 25, 32, 16, 27,  1, 25, 17,  1, 12, 27, 29, 20,
       17, 20, 14, 20, 12, 22,  1, 20, 24, 29, 16, 22, 22, 20, 18, 16, 24,
       14, 16,  1, 29, 25,  1, 28, 25, 22, 31, 16,  1, 14, 25, 23, 26, 22,
       16, 33,  1, 26, 27, 25, 13, 22, 16, 23, 28,  1, 12, 24, 15,  1, 14,
       27, 16, 12, 29, 16,  1, 20, 24, 24, 25, 31, 12, 29, 20, 31, 16,  1,
       28, 25, 22, 30, 29, 20, 25, 24, 28,  4,  1,  9, 34,  1, 16, 33, 26,
       16, 27, 29, 20, 28, 16,  1, 22, 20, 16, 28,  1, 20, 24,  1, 13, 30,
       20, 22, 15, 20, 24, 18,  1, 20, 24, 29, 16, 22, 22, 20, 18, 16, 24,
       29,  1, 28, 34, 28, 29, 16, 23, 28,  1, 29, 19, 12, 29,  1, 18, 16,
       24, 16, 27, 12, 29, 16,  1, 14, 25, 24, 29, 16, 24, 29,  2,  1, 12,
       30, 29, 25, 23, 12, 29, 16,  1, 29, 12, 28, 21, 28,  2,  1, 12, 24,
       15,  1, 25, 26, 29, 20, 23, 20, 35, 16,  1, 32, 25, 27, 21, 17, 22,
       25, 32, 28,  1, 12

**One hot encoding**

In [26]:
from keras.utils import to_categorical
# sequences = [to_categorical(x, num_classes=vocab) for x in X] check the error
sequences = [to_categorical(x_val, num_classes=vocab) for x_val in x]
x = numpy.array(sequences)
y = to_categorical(y, num_classes=vocab)

In [27]:
x

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

**Define the model**

In [28]:
x.shape

(410, 10, 36)

In [29]:
import keras
import tensorflow as tf
from tensorflow.keras.layers import Bidirectional, LSTM, Dense
from tensorflow.keras.models import Sequential
def create_model(x):
  model = Sequential()
  model.add(Bidirectional(LSTM(70, input_shape=(x.shape[1], x.shape[2]))))
  model.add(Dense(vocab, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  model.summary()
  return model

In [30]:
model= create_model(x)
model.fit(x,y,epochs=200)

  super().__init__(**kwargs)


Epoch 1/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.0407 - loss: 3.5597
Epoch 2/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1380 - loss: 3.4378
Epoch 3/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1189 - loss: 3.1353
Epoch 4/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1394 - loss: 3.0129
Epoch 5/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1739 - loss: 2.9913
Epoch 6/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1595 - loss: 2.8941
Epoch 7/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1966 - loss: 2.9012
Epoch 8/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1784 - loss: 2.8942
Epoch 9/200
[1m13/13[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x79e98c0da8c0>

In [35]:
from tensorflow.keras.preprocessing.sequence import pad_sequences # Import pad_sequences from the correct location

def generate_sequence(model, mapping, max_length, text, n_chars):
  text_t = text
  for _ in range(n_chars):
    encoded = [mapping[char] for char in text_t]
    encoded = pad_sequences([encoded], maxlen=max_length, truncating='pre')
    encoded = to_categorical(encoded, num_classes=vocab)
    yhat = model.predict(encoded)
    yhat = numpy.argmax(yhat, axis=-1)
    output = ''
    for char, index in mapping.items():
      if index == yhat:
        output = char
        break
    text_t += output
  return text_t

In [36]:
generate_sequence(model, mapping,10, 'I specializ', 30)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

'I specialize in leveraging the power of a'

In [38]:
generate_sequence(model, mapping,10, 'intelligent systems', 30)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22

'intelligent systems that generate content, automa'

In [37]:
generate_sequence(model, mapping,10, 'data is an', 30) #giving unknown output due to the input not trained for this text

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23

'data is andte innoaiinnevolveling odguti'