In [24]:
faqs = """Who am I?
I am an AI Engineer with a strong focus on building intelligent systems that solve real-world problems using modern machine learning and software engineering practices.

What are my primary areas of interest?
My key interests lie in Artificial Intelligence, Machine Learning systems, and Game Development. I am particularly passionate about designing systems that combine automation, reasoning, and real-time decision making.

What kind of work do I enjoy doing the most?
I enjoy working on backend AI infrastructure such as Retrieval-Augmented Generation (RAG) pipelines, embedding systems, vector databases, and production-ready ML deployments. I like solving problems related to model reliability, hallucination mitigation, retrieval optimization, and system scalability.

Do I focus more on research or engineering?
My focus is primarily on applied engineering — building production-grade systems that actually work in real environments rather than staying limited to theoretical implementations.

What technologies do I typically work with?
I frequently work with FastAPI, PostgreSQL, vector databases such as pgvector, and modern LLM-based architectures. I am also involved in system design for AI-driven platforms.

Do I have an interest outside traditional AI applications?
Yes. I am passionate about Game Development and enjoy exploring how AI can be integrated into interactive environments, simulations, and intelligent gameplay systems.

What kind of problems motivate me?
I am motivated by problems related to system performance, information retrieval, automation, and intelligent decision-making systems — especially where model outputs must be accurate, explainable, and grounded in real data.

What is my approach to learning new technologies?
I prefer hands-on experimentation and building real projects. I focus on understanding how systems behave under production constraints rather than limiting myself to tutorials or theoretical study.

Do I work on scalable systems?
Yes. I am interested in optimizing performance for AI pipelines, improving retrieval latency, handling embedding mismatches, and deploying scalable backend architectures.

Am I interested in future-oriented technologies?
Absolutely. I am keen on exploring advanced AI applications including autonomous agents, intelligent game environments, and real-time AI-assisted platforms.
"""

In [25]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

In [26]:
tokenizer = Tokenizer()

In [27]:
tokenizer.fit_on_texts([faqs])

In [28]:
len(tokenizer.word_index)

177

In [29]:
input_sequences = []
for sentence in faqs.split('\n'):
  tokenized_sentence = tokenizer.texts_to_sequences([sentence])[0]

  for i in range(1,len(tokenized_sentence)):
    input_sequences.append(tokenized_sentence[:i+1])

In [30]:
input_sequences

[[68, 3],
 [68, 3, 1],
 [1, 3],
 [1, 3, 30],
 [1, 3, 30, 5],
 [1, 3, 30, 5, 69],
 [1, 3, 30, 5, 69, 18],
 [1, 3, 30, 5, 69, 18, 70],
 [1, 3, 30, 5, 69, 18, 70, 71],
 [1, 3, 30, 5, 69, 18, 70, 71, 13],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20, 72],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20, 72, 7],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20, 72, 7, 73],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20, 72, 7, 73, 15],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20, 72, 7, 73, 15, 74],
 [1, 3, 30, 5, 69, 18, 70, 71, 13, 6, 19, 14, 4, 20, 72, 7, 73, 15, 74, 31],
 [1,
  3,
  30,
  5,
  69,
  18,
  70,
  71,
  13,
  6,
  19,
  14,
  4,
  20,
  72,
  7,
  73,
  15,
  74,
  31,
  32],
 [1,
  3,
  30,
  5,
  69,
  18,
  70,
  7

In [31]:
max_len = max([len(x) for x in input_sequences])

In [32]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded_input_sequences = pad_sequences(input_sequences, maxlen = max_len, padding='pre')

In [33]:
padded_input_sequences

array([[  0,   0,   0, ...,   0,  68,   3],
       [  0,   0,   0, ...,  68,   3,   1],
       [  0,   0,   0, ...,   0,   1,   3],
       ...,
       [  0,   0,   0, ...,   7,  38,   5],
       [  0,   0,   0, ...,  38,   5, 177],
       [  0,   0,   0, ...,   5, 177,  59]], dtype=int32)

In [34]:
X = padded_input_sequences[:,:-1]

In [35]:
y = padded_input_sequences[:,-1]

In [36]:
X.shape

(320, 37)

In [37]:
y.shape

(320,)

In [38]:
from tensorflow.keras.utils import to_categorical
y = to_categorical(y,num_classes=283)

In [39]:
y.shape

(320, 283)

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [41]:
# Define model
model = Sequential()
# Embedding layer first (input_length = 37, input_dim = 283 tokens, output_dim = 100 features per token)
model.add(Embedding(input_dim=283, output_dim=100, input_length=37))
# Stacked LSTM layers
model.add(LSTM(150, return_sequences=True))
model.add(LSTM(150, return_sequences=True))
model.add(LSTM(150))
# Output layer (softmax over 283 classes/tokens)
model.add(Dense(283, activation='softmax'))



In [42]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [43]:
model.summary()

In [44]:
# Train the model
model.fit(X, y, epochs=200)


Epoch 1/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.0219 - loss: 5.6306 
Epoch 2/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0424 - loss: 5.2839
Epoch 3/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.0427 - loss: 5.0274  
Epoch 4/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.0560 - loss: 4.9956
Epoch 5/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0439 - loss: 4.9093
Epoch 6/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0586 - loss: 4.8147
Epoch 7/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.0855 - loss: 4.7002
Epoch 8/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0712 - loss: 4.6442
Epoch 9/200
[1m10/10[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7873ac4ffbc0>

In [46]:
import time
import numpy as np
text = "Yes. I am passionate about Game Development"

for i in range(10):
  # tokenize
  token_text = tokenizer.texts_to_sequences([text])[0]
  # padding
  padded_token_text = pad_sequences([token_text], maxlen=56, padding='pre')
  # predict
  pos = np.argmax(model.predict(padded_token_text))

  for word,index in tokenizer.word_index.items():
    if index == pos:
      text = text + " " + word
      print(text)
      time.sleep(2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Yes. I am passionate about Game Development and
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Yes. I am passionate about Game Development and enjoy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Yes. I am passionate about Game Development and enjoy exploring
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Yes. I am passionate about Game Development and enjoy exploring how
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Yes. I am passionate about Game Development and enjoy exploring how ai
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Yes. I am passionate about Game Development and enjoy exploring how ai can
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Yes. I am passionate about Game Development and enjoy exploring how ai can can
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

In [47]:
tokenizer.word_index

{'i': 1,
 'and': 2,
 'am': 3,
 'systems': 4,
 'ai': 5,
 'on': 6,
 'real': 7,
 'in': 8,
 'what': 9,
 'work': 10,
 'do': 11,
 'to': 12,
 'focus': 13,
 'intelligent': 14,
 'problems': 15,
 'my': 16,
 'retrieval': 17,
 'with': 18,
 'building': 19,
 'that': 20,
 'learning': 21,
 'engineering': 22,
 'of': 23,
 'game': 24,
 'enjoy': 25,
 'production': 26,
 'system': 27,
 'environments': 28,
 'technologies': 29,
 'an': 30,
 'modern': 31,
 'machine': 32,
 'interest': 33,
 'development': 34,
 'passionate': 35,
 'about': 36,
 'automation': 37,
 'time': 38,
 'decision': 39,
 'making': 40,
 'kind': 41,
 'backend': 42,
 'such': 43,
 'as': 44,
 'pipelines': 45,
 'embedding': 46,
 'vector': 47,
 'databases': 48,
 'related': 49,
 'model': 50,
 'or': 51,
 'is': 52,
 '—': 53,
 'rather': 54,
 'than': 55,
 'theoretical': 56,
 'architectures': 57,
 'for': 58,
 'platforms': 59,
 'applications': 60,
 'yes': 61,
 'exploring': 62,
 'how': 63,
 'be': 64,
 'performance': 65,
 'scalable': 66,
 'interested': 67,
 '