#### Importing and installing needed libraries

In [None]:
import os

!pip install -U torch
!pip install PyMuPDF # for reading PDFs with Python
!pip install tqdm # for progress bars
!pip install sentence-transformers # for embedding models
!pip install accelerate
!pip install bitsandbytes
!pip install flash-attn --no-build-isolation
!pip install langchain #might not be used but to create pipelines
!pip install openai #same case as langchain
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib
!pip install --upgrade --quiet  google-api-python-client google-auth-httplib2 google-auth-oauthlib


Collecting torch
  Downloading torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cuf

In [None]:
!pip install -q torch transformers transformers accelerate bitsandbytes langchain sentence-transformers faiss-gpu openpyxl pacmap

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m47.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for annoy (setup.py) ... [?25l[?25hdone


#### Get and prepare our data
Get our various documents and put them together

In [None]:
import os
import requests
import langchain
import openai

os.environ["OPENAI_API_KEY"] = ""

In [None]:
#connect google drive
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
#test if the google drive connection has been made
!ls "/content/drive/MyDrive/DME/Build/Train"

cre_pp1.gdoc  CRE.txt	     pp1_pdf.pdf      pp1_pdf_pg.pdf	   text_embeddings.gsheet
cre_pp1.txt   final_123.pdf  pp1_pdf_pg1.pdf  text_embeddings.csv


In [None]:
#set file path for content
path_main_doc = "/content/drive/MyDrive/DME/Build/Train/CRE.txt"
path_text_file1 = "/content/drive/MyDrive/DME/CRE Input text/test.txt"
path_text_file2 = "/content/drive/MyDrive/DME/CRE Input text/test2.txt"

### Preprocessing the data


In [None]:
#function to read, clean, fix up and chunk the first text
def read_and_chunk_text1(path: str)-> list[dict]:
  #create an instance of the splitter
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1200,
    chunk_overlap = 0,
    length_function = len)

  #read the data
  with open(path) as sample:
    contents = sample.read()

  #chunk the data
  contents_split = text_splitter.split_text(contents)

  #fix chunks with overlap
  contents_split[33] = '''
  b)	Effects of sin on Adam and Eve
  i.	They started dying yet they were to live forever
  ii.	They became afraid of God/they hide when He called them.
  iii.	They lost authority over the other creation
  iv.	Man was to rule over the woman/inequality between man and woman set in.
  v.	The woman was to be in pain when giving birth
  vi.	There developed enemity between the human beings and the serpent
  vii.	They became embarrassed because of their nakedness
  viii.	They were expelled out of the Garden of Eden /separated with God
  ix.	They developed mistrust between man and woman. (4x2=8 mks)
  c)	How the church helps to bring back members who have fallen from the faith.
  i.	By visiting the/inviting them to their homes
  ii.	By being patient/forgiving them
  iii.	By evangelizing to them/preach/teaching
  iv.	By guiding and counseling them/referring them to experts according to their needs.
  v.	Praying for them
  vi.	By inviting them back to church
  vii.	By encouraging them to repent/confess
  viii.	By offering material needs/aids	(5x1= 5 mks)
  '''

  contents_split[32] = '''
  1.	The teaching about human beings from the biblical creation accounts
  i.	Human beings are created in the image/likeliness of God
  ii.	They have been given authority /domination over God creation.
  iii.	They communicate /fellowships with God.
  iv.	They are special/the greatest creation of God
  v.	They have the ability to think /reason/make choices/decision sin their lives
  vi.	They are blessed by God
  vii.	They have give a special place to stay/Garden of Eden
  viii.	Human beings are to use other creation/plant for their benefits
  ix.	They are to take care of the creation till the land work
  x.	Human beings are to procreate/multiply through marriage.
  xi.	Man and woman era to compliment/provide companionship for each other.
  xii.	Human beings are Gods creation/male and female.
  xiii.	The woman is created out of hetmans rib (7x1= 7 mks)
  '''

  #delete chunks with little to no data
  del contents_split[98]
  del contents_split[115]

  assert (len(contents_split)) == 115


  text_stat = []
  for i in range(len(contents_split)):
    contents_split[i] = contents_split[i].replace('\uf076', ' ').replace('\t', ' ')
    text_stat.append(
              {
                  "chunk_char_count": len(contents_split[i]),
                  "chunk_word_count": len(contents_split[i].split(" ")),
                  "chunk_token_count": len(contents_split[i]) / 4,
                  "text": contents_split[i]
              })


  return text_stat

In [None]:
#function to read, clean, fix up and chunk the second text
def read_and_chunk_text2(path: str)-> list[dict]:
  #create an instance of the splitter
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1200,
    chunk_overlap = 0,
    length_function = len)

  #read the data
  with open(path) as sample:
    contents = sample.read()

  #chunk the data
  contents_split = text_splitter.split_text(contents)

  #fix chunks with overlap
  contents_split[119] = '''
b)	Outline the events that took place from the time Jesus was arrested up to the time he was
sentenced to die
i.	Jesus was taken to the house of the high priest
ii.	Peter denied Jesus three times
iii.	Jesus was mocked/ beaten /blindfolded
iv.	Jesus was taken to the Sanhendrin /they made religious accusations against him
v.	Jesus was taken to Pilate/they made political accusations against him
vi.	He was sent to Herod who questioned Jesus/ridiculed/dressed him in a royal robe
vii.	Pilate said that he had not found Jesus guilty
viii.	Pilate chose to have Jesus flogged/chastised
ix.	The crowd shouted that Jesus should be crucified/demanded release of Barabas
x.	Pilate surrendered Jesus to be crucified (7x1=7mks)
  '''

  contents_split[120] = '''
 c)	State seven lessons Christians learn from the suffering and death of Jesus
i.	Christians should have faith in God
ii.	They should endure suffering /be ready to be rejected
iii.	They should forgive their enemies
iv.	Christians should repent /confess their sins
v.	They should be obedient /loyal to God
vi.	They should stand for the truth at all times
vii.	Christians should sacrifice for the service of others
viii.	They should be prayerful/pray for others
ix.	Christians should witness /surrender to the Lordship of Christ
x.	Christians should be courageous /brave (7x1=7mks)
  '''

  #delete chunks with little to no data
  del contents_split[87]

  assert (len(contents_split)) == 126

  text_stat = []
  for i in range(len(contents_split)):
    contents_split[i] = contents_split[i].replace('\uf076', ' ').replace('\t', ' ')
    text_stat.append(
              {
                  "chunk_char_count": len(contents_split[i]),
                  "chunk_word_count": len(contents_split[i].split(" ")),
                  "chunk_token_count": len(contents_split[i]) / 4,
                  "text": contents_split[i]
              })

  return text_stat

In [None]:
#a function to read our needed content into a single variable
def read_and_chunk_doc(path: str) -> list[dict]:
  #create an instance of the splitter function
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1100,
    chunk_overlap = 0,
    length_function = len
  )

  #read the data
  with open(path) as train:
    content = train.read()

  content_split = text_splitter.split_text(content)

  #split our data into various dictionaires
  chunks_text = []
  for i in range(len(content_split)):
    chunks_text.append(
              {
                  "chunk_char_count": len(content_split[i]),
                  "chunk_word_count": len(content_split[i].split(" ")),
                  "chunk_token_count": len(content_split[i]) / 4,
                  "text": content_split[i]
              })

  return chunks_text

In [None]:
from tqdm.auto import tqdm
from langchain.text_splitter import RecursiveCharacterTextSplitter

#read data from the various documents and put them all together
chunks_text = read_and_chunk_doc(path_main_doc)
text1 = read_and_chunk_text1(path_text_file1)
text2 = read_and_chunk_text2(path_text_file2)

chunks_text = chunks_text + text1 + text2

In [None]:
print(f"Number of chunks we have: {len(chunks_text)}")

Number of chunks we have: 1165


In [None]:
chunks_text[1164]

{'chunk_char_count': 996,
 'chunk_word_count': 143,
 'chunk_token_count': 249.0,
 'text': 'b) State seven rights of a Kenyan citizen\ni. Right to employment/ a just wage\nii. Right to own/use personal property\niii. Right to freedom of worship/personal freedom\niv. Right to marry/raise a family\nv. Right to a fair trial\nvi. Right of assembly\nvii. Right to life/protection\nviii. Right to health\nix. Right to education\nx. Right to association\nxi. Right to vote\nxii. Right of movement (7x1=7mks)\n\nc) Ways in which the Church in Kenya helps to reform errant members in the society.\ni. The Church prays for /with them\nii. By teaching /preaching the word of God to them\niii. Through guiding and counseling them\niv. By contributing towards their basic needs\nv. The Church trains them in various skills in their vocational centres\nvi. The Church offers employment opportunities to them\nvii. By forgiving /accepting /involving them in Church activities\nviii. The Church establishes rehabili

### Some rough EDA on our text

In [None]:
!pip install pandas
!pip install numpy



In [None]:
import pandas as pd

df = pd.DataFrame(chunks_text)
df.head()

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count,text
0,329,48,82.25,CRE Notes Form 1 - 4 \n\nForm One - CRE Notes\...
1,1009,155,252.25,Why do schools’ study CRE?\nThere are many rea...
2,838,142,209.5,The Bible\n The Bible is the sacred book conta...
3,534,85,133.5,The Bible as a library\nThe Bible is referred ...
4,1025,163,256.25,Why the Bible is a library\nSeveral studies s...


In [None]:
#get some stats about the data
df.describe().round(2)

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count
count,1165.0,1165.0,1165.0
mean,785.86,124.46,196.47
std,264.94,44.04,66.23
min,46.0,8.0,11.5
25%,626.0,97.0,156.5
50%,840.0,131.0,210.0
75%,1010.0,158.0,252.5
max,1198.0,239.0,299.5


From the describtion we've seen so far, we have an average of 196 tokens and a max token count of 299. The model used for embedding, "all-mpnet-base-V2" has a max token limit of 384 which leaves us a lot of room for embedding and padding

### The code below can be ignored.
After experimenting and refinement, the chunking method chosen for use is Langchain's `RecursiveCharacterTextSplitter` that splits the text on` ["\n\n", "\n", ". "]`. The method below employ sentence chunking splitting only on `[". "]` followed by chunking the discorved sentences together.

#### Some more preprocessing

Sentence chunking

In [None]:
!pip install spacy

In [None]:
from spacy.lang.en import English
nlp = English()

config = {"punct_chars": ["\n","/"]}
nlp.add_pipe("sentencizer")

doc = nlp((pages_text[11]).get("text"))
#assert len(list(doc.sents)) == 1

list(doc.sents)

In [None]:
from spacy.lang.en import English
nlp = English()
nlp.add_pipe("sentencizer")
for item in tqdm(pages_text):
  item["sentences"] = list(nlp(item["text"]).sents)
  item["sentences"] = [str(sentence) for sentence in item["sentences"]]
  item["page_sentence_count_spacy"] = len(item["sentences"])

In [None]:
#check a random sample
#random.sample(pages_text, k=1)
pages_text[222]

In [None]:
#check how the data looks after spacy
df = pd.DataFrame(pages_text)
df.describe().round(2)

In [None]:
#max number of sentences in a chunk
sent_chunk = 15

#function ot recursively split the list
def split_list(input_list: list, slice_size: int)-> list[list[str]]:
  return [input_list[i:i+slice_size] for i in range(0, len(input_list), slice_size)]

for item in tqdm(pages_text):
  item["sentence_chunks"] = split_list(input_list=item["sentences"],
                                       slice_size = sent_chunk)
  item["num_chunks"] = len(item["sentence_chunks"])

In [None]:
#sample an example
random.sample(pages_text, k=1)

In [None]:
#get some stats about our new data
df = pd.DataFrame(pages_text)
df.describe().round(2)

#### Splitting each chunk into its own item

In [None]:
import re

#split each chunk into its own item
pages_chunks = []
for item in tqdm(pages_text):
  for sen_chunk in item["sentence_chunks"]:
    chunk_dict = {}
    chunk_dict["page_number"] = item["page_number"]

    #join the sentences together to form a single string
    joined_sent_chunk = "".join(sen_chunk).replace("  ", " ").strip()
    joined_sent_chunk = re.sub(r'\.([A-Z])', r'\1', joined_sent_chunk)
    chunk_dict["sen_chunk"] = joined_sent_chunk

    #some stats about the chunk
    chunk_dict["chunk_char_count"] = len(joined_sent_chunk)
    chunk_dict["chunk_word_count"] = len([word for word in joined_sent_chunk.split(" ")])
    chunk_dict["chunk_token_count"] = len(joined_sent_chunk) / 4 # 1 token = ~4 characters

    pages_chunks.append(chunk_dict)

len(pages_chunks)

In [None]:
random.sample(pages_chunks, k=1)

In [None]:
#some stats about our chunks
df = pd.DataFrame(pages_chunks)
df.describe().round(2)

In [None]:
# Show random chunks with under 30 tokens in length
min_token_length = 20
for row in df[df["chunk_token_count"] <= min_token_length].sample(5).iterrows():
    print(f'Chunk token count: {row[1]["chunk_token_count"]} | Text: {row[1]["sen_chunk"]}')

## Embedding our text chunks

In [None]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(
    model_name_or_path = "all-mpnet-base-v2",
    device = "cpu")

#test sentences to see how the embeddings look like
sentences = [
    "The Sentences Transformers library provides an easy and open-source way to create embeddings.",
    "Sentences can be embedded one by one or as a list of strings.",
    "Embeddings are one of the most powerful concepts in machine learning!",
    "Learn to use embeddings well and you'll be well on your way to being an AI engineer."
]
#initialise an instance of the embedding model
embeddings = embedding_model.encode(sentences)
embeddings_dict = dict(zip(sentences, embeddings))

# See the embeddings
for sentence, embedding in embeddings_dict.items():
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Sentence: The Sentences Transformers library provides an easy and open-source way to create embeddings.
Embedding: [-2.07982697e-02  3.03164832e-02 -2.01217849e-02  6.86484650e-02
 -2.55256221e-02 -8.47686455e-03 -2.07225574e-04 -6.32377118e-02
  2.81606894e-02 -3.33353989e-02  3.02633960e-02  5.30721806e-02
 -5.03527038e-02  2.62288321e-02  3.33313718e-02 -4.51577231e-02
  3.63044813e-02 -1.37122418e-03 -1.20171458e-02  1.14947259e-02
  5.04510924e-02  4.70856987e-02  2.11913940e-02  5.14606535e-02
 -2.03746483e-02 -3.58889401e-02 -6.67763175e-04 -2.94393823e-02
  4.95859198e-02 -1.05639677e-02 -1.52014112e-02 -1.31758570e-03
  4.48197424e-02  1.56023465e-02  8.60379430e-07 -1.21392624e-03
 -2.37978697e-02 -9.09368275e-04  7.34484056e-03 -2.53933994e-03
  5.23370504e-02 -4.68043424e-02  1.66214760e-02  4.71579395e-02
 -4.15599644e-02  9.01976076e-04  3.60277519e-02  3.42214219e-02
  9.68227163e-02  5.94829023e-02 -1.64984372e-02 -3.51249315e-02
  5.92516130e-03 -7.07903586e-04 -2.4103

In [None]:
%time
#send model to the GPU
embedding_model.to("cuda")

#perform a batched operation to embed our chunks
text_chunks = [item["text"] for item in chunks_text]

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 6.91 µs


In [None]:
%time
embedding_model.to("cuda")

#create a column in our dataframe for the embeddings
for item in tqdm(chunks_text):
  item["embedding"] = embedding_model.encode(item["text"])

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.68 µs


  0%|          | 0/1165 [00:00<?, ?it/s]

In [None]:
text_embedding = pd.DataFrame(chunks_text)
text_embedding.head()

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count,text,embedding
0,329,48,82.25,CRE Notes Form 1 - 4 \n\nForm One - CRE Notes\...,"[-0.019121116, -0.00020399032, 0.014480151, 0...."
1,1009,155,252.25,Why do schools’ study CRE?\nThere are many rea...,"[0.033478357, -0.00044604312, -0.0023351368, 0..."
2,838,142,209.5,The Bible\n The Bible is the sacred book conta...,"[0.04458893, -0.0074575553, 0.027849834, -0.01..."
3,534,85,133.5,The Bible as a library\nThe Bible is referred ...,"[0.01278649, -0.019161435, -0.00067532813, 0.0..."
4,1025,163,256.25,Why the Bible is a library\nSeveral studies s...,"[0.026911512, -0.019623242, 0.00017200968, 0.0..."


### Save the embeddings to file
Easier than having to embed text all over again

In [None]:
text_embeddings_df = pd.DataFrame(chunks_text)
embeddings_df = "/content/drive/MyDrive/DME/Build/Train/text_embeddings.csv"
text_embeddings_df.to_csv(embeddings_df, index=False)

In [None]:
!ls "/content/drive/MyDrive/DME/Build/Train"

cre_pp1.gdoc  CRE.txt	     pp1_pdf.pdf      pp1_pdf_pg.pdf	   text_embeddings.gsheet
cre_pp1.txt   final_123.pdf  pp1_pdf_pg1.pdf  text_embeddings.csv


In [None]:
text_embeddings_df["embedding"]

0       [-0.019121116, -0.00020399032, 0.014480151, 0....
1       [0.033478357, -0.00044604312, -0.0023351368, 0...
2       [0.04458893, -0.0074575553, 0.027849834, -0.01...
3       [0.01278649, -0.019161435, -0.00067532813, 0.0...
4       [0.026911512, -0.019623242, 0.00017200968, 0.0...
                              ...                        
1160    [0.0020182794, 0.014634834, 0.00344758, 0.0192...
1161    [0.048364837, 0.030426404, -0.022581587, 0.024...
1162    [0.010999964, -0.03211778, -0.016392093, 0.032...
1163    [0.018476944, -0.012908273, -0.01594961, -0.00...
1164    [0.015156806, 0.019250376, -0.013837779, 0.014...
Name: embedding, Length: 1165, dtype: object

In [None]:
import pandas as pd
#import csv file to see if it has saved properly
embeddings_df = "/content/drive/MyDrive/DME/Build/Train/text_embeddings.csv"
text_chunks_test = pd.read_csv("/content/drive/MyDrive/DME/Build/Train/text_embeddings.csv")
text_chunks_test.head()

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count,text,embedding
0,329,48,82.25,CRE Notes Form 1 - 4 \n\nForm One - CRE Notes\...,[-1.91211160e-02 -2.03990319e-04 1.44801512e-...
1,1009,155,252.25,Why do schools’ study CRE?\nThere are many rea...,[ 3.34783569e-02 -4.46043123e-04 -2.33513676e-...
2,838,142,209.5,The Bible\n The Bible is the sacred book conta...,[ 4.45889309e-02 -7.45755527e-03 2.78498344e-...
3,534,85,133.5,The Bible as a library\nThe Bible is referred ...,[ 1.27864899e-02 -1.91614348e-02 -6.75328134e-...
4,1025,163,256.25,Why the Bible is a library\nSeveral studies s...,[ 2.69115120e-02 -1.96232423e-02 1.72009677e-...


In [None]:
text_chunks_test.tail()

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count,text,embedding
1160,902,128,225.5,c) The role of the Holy Spirit in the Church t...,[ 2.01827940e-03 1.46348337e-02 3.44757992e-...
1161,409,66,102.25,5. a) Similarities between the Traditional Afr...,[ 4.83648367e-02 3.04264035e-02 -2.25815866e-...
1162,1052,137,263.0,b) Explain five factors that have led to the i...,[ 1.09999636e-02 -3.21177803e-02 -1.63920932e-...
1163,989,135,247.25,c) Challenges that children from single parent...,[ 1.84769444e-02 -1.29082734e-02 -1.59496106e-...
1164,996,143,249.0,b) State seven rights of a Kenyan citizen\ni. ...,[ 1.51568064e-02 1.92503762e-02 -1.38377789e-...


# Semantic search
Matching a user question with docs with similar meaning. Use the embedding created in the section above to identify similarity

In [None]:
import random
import torch
import numpy as np
import pandas as pd
import ast

device = "cuda" if torch.cuda.is_available() else "cpu"

#import texts and embedding df
text_embeddings_df = pd.read_csv(embeddings_df)

 #convert embedding column to np array
text_embeddings_df["embedding"] = text_embeddings_df["embedding"].apply(lambda x: np.fromstring(x.strip("[]"), sep = " "))

#convert texts and embedding df to list of dicts
chunks_texts = text_embeddings_df.to_dict(orient="records")

#convert the numpy array to pytorch tensor
embeddings = torch.tensor(np.array(text_embeddings_df["embedding"].to_list()), dtype = torch.float32).to(device)
embeddings.shape


torch.Size([1165, 768])

In [None]:
text_embeddings_df.head()

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count,text,embedding
0,329,48,82.25,CRE Notes Form 1 - 4 \n\nForm One - CRE Notes\...,"[-0.019121116, -0.000203990319, 0.0144801512, ..."
1,1009,155,252.25,Why do schools’ study CRE?\nThere are many rea...,"[0.0334783569, -0.000446043123, -0.00233513676..."
2,838,142,209.5,The Bible\n The Bible is the sacred book conta...,"[0.0445889309, -0.00745755527, 0.0278498344, -..."
3,534,85,133.5,The Bible as a library\nThe Bible is referred ...,"[0.0127864899, -0.0191614348, -0.000675328134,..."
4,1025,163,256.25,Why the Bible is a library\nSeveral studies s...,"[0.026911512, -0.0196232423, 0.000172009677, 0..."


In [None]:
import random
import torch
import numpy as np
import pandas as pd

#path with our embeddings
embeddings_df = "/content/drive/MyDrive/DME/Build/Train/text_embeddings.csv"

device = "cuda" if torch.cuda.is_available() else "cpu"

text_embeddings_df = pd.read_csv(embeddings_df)
text_embeddings_df["embedding"].head()

0    [-1.91211160e-02 -2.03990319e-04  1.44801512e-...
1    [ 3.34783569e-02 -4.46043123e-04 -2.33513676e-...
2    [ 4.45889309e-02 -7.45755527e-03  2.78498344e-...
3    [ 1.27864899e-02 -1.91614348e-02 -6.75328134e-...
4    [ 2.69115120e-02 -1.96232423e-02  1.72009677e-...
Name: embedding, dtype: object

In [None]:
text_embeddings_df.tail()

Unnamed: 0,chunk_char_count,chunk_word_count,chunk_token_count,text,embedding
1160,902,128,225.5,c) The role of the Holy Spirit in the Church t...,[ 2.01827940e-03 1.46348337e-02 3.44757992e-...
1161,409,66,102.25,5. a) Similarities between the Traditional Afr...,[ 4.83648367e-02 3.04264035e-02 -2.25815866e-...
1162,1052,137,263.0,b) Explain five factors that have led to the i...,[ 1.09999636e-02 -3.21177803e-02 -1.63920932e-...
1163,989,135,247.25,c) Challenges that children from single parent...,[ 1.84769444e-02 -1.29082734e-02 -1.59496106e-...
1164,996,143,249.0,b) State seven rights of a Kenyan citizen\ni. ...,[ 1.51568064e-02 1.92503762e-02 -1.38377789e-...


In [None]:
#creating an extra instance of the embedding model to avoid rerun
from sentence_transformers import util, SentenceTransformer

embedding_model = SentenceTransformer(
    model_name_or_path = "all-mpnet-base-v2",
    device = device
)

In [None]:
#perform a semantic search --example
#define a query string
query = "role of medicine men"
print(f"Query: {query}")

#turn the query string to an embedding with the same model
query_embedding = embedding_model.encode(
    query, convert_to_tensor = True
)

#get similarity scores (dot product)
dot_scores = util.dot_score(a = query_embedding, b = embeddings)[0]

print(f"Length of embeddings: {len(embeddings)}")
top_results_dp = torch.topk(dot_scores, k=5)
top_results_dp

Query: role of medicine men
Length of embeddings: 1165


torch.return_types.topk(
values=tensor([0.7207, 0.6512, 0.5150, 0.4658, 0.4094], device='cuda:0'),
indices=tensor([ 179,  249,  178,  180, 1054], device='cuda:0'))

In [None]:
#a helper function to print wrapped text
import textwrap

def print_wrapped(text, wrap_length=80):
  wrapped_text = textwrap.fill(text, wrap_length)
  print(wrapped_text)

In [None]:
print(f"Query: {query}\n")
print("Results:")

#loop through zipped together scores and indices from torch.topk
for score, idx in zip(top_results_dp[0], top_results_dp[1]):
  print(f"Score: {score:.7f}")
  print(f"Text: {chunks_text[idx]['text']}")
  print("\n")

Query: role of medicine men

Results:
Score: 0.7206972
Text: Roles of the medicine women/men in the Community.
Medicine women/men are healers who were and are respected by the community. This is because they were and are able to:
1) Treat and heal the sick
2) Solve serious and complicated chronic illnesses
3) Give medicine in form of powder, herbs, minerals or liquid form and observed patients swallowing, drinking, sniffing, and applying on the skin.
4) Offer prayers and sacrifices to God.
5) Give charms to protect individual persons from evil spirits.
6) Perform specialized medical roles in some communities in spite of the fact that we have modern hospitals, counselors and psychologists.
Elders were and still are community leaders.
They were not religious specialists but the community gave elders duties, which made them close to religious leaders.


Score: 0.6511767
Text: Qd. Explain the role of medicine-men in the African Communities and their relevant today
1) Medicine men
• They ar

### Functionizing our semantic search pipeline
Make the semantic search into a function that can be called. In this project, as we working with NLU techniques, text and words, we will be using cosine similarity as it measures direction unlike dot product which measures only the euclidian distance

In [None]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/868.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m839.7/868.8 kB[0m [31m25.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.2 torchmetrics-1.4.0.post0


In [None]:
import time
import torch.nn.functional as F #import pytorch function apis libraries

#a function to retrieve resources
def retrieve_resources(
    query: str, embeddings: torch.tensor,
    model: SentenceTransformer = embedding_model,
    n_resources_to_return: int=3,
    print_time: bool=False):

  #embed the query
  query_embedding = model.encode(query, convert_to_tensor=True)

  #get the cosine similarity of the embeddings
  start_time = time.time()
  cos_scores = F.cosine_similarity(query_embedding[None: ], embeddings, dim=1)
  end_time = time.time()

  if print_time:
    print(f"Time taken to get scores on {len(embeddings)} embeddings: {end_time-start_time:.5f} seconds")

  scores, indices = torch.topk(
      input = cos_scores,
      k = n_resources_to_return
  )
  return scores, indices

def print_top_results(
    query: str, embeddings: torch.tensor,
    chunks_text: list[dict] = chunks_text,
    n_resources_to_return: int = 5):
  scores, indices = retrieve_resources(
      query = query, embeddings = embeddings,
      n_resources_to_return= n_resources_to_return
  )

  print(f"Query: {query}")
  print("Results:")

  #loop through zipped together scores and indices
  for score, index in zip(scores, indices):
    print(f"Score: {score:.4f}")
    #print relevant sentence chunks
    print(f"{chunks_text[index]['text']}")
    print("\n")

In [None]:
#test our functions
query = "Isaiah's prophecy"

scores, indices = retrieve_resources( query=query,
                                     embeddings = embeddings)
scores, indices

(tensor([0.7450, 0.7179, 0.6670], device='cuda:0'),
 tensor([1152,  255,  257], device='cuda:0'))

In [None]:
#print out the text with top scores
print_top_results(
    query=query,
    embeddings = embeddings
)

Query: Isaiah's prophecy
Results:
Score: 0.7450
1. a) Isaiah’s prophecies concerning the Messiah
i. The Messiah would be born of a virgin/young women
ii. He would be called Immanuel
iii. He would be a wonderful counselor/might God/everlasting father/prince of peace
iv. The Messiah would have an everlasting kingdom
v. He would be a ruler from the house of David
vi. He would rule with justice/righteousness/peace
vii. He would be humble/simple/ordinary/without beauty (nothing attractive)
viii. The Messiah would be rejected/despised/sentenced to die
ix. He would suffer for the forgiveness of the sins of many people
x. It would be the will of his father for him to suffer
xi. He would succeed in his work/be highly honoured
xii. He would be filled with God’s spirit/anointed
xiii. He would preach goods news/set the captives free
(8x1 = 8mks)


Score: 0.7179
(ii) Isaiah’s Prophecy – Isaiah 7: 10 – 16; 9: 1- 7; 61:1 –2; and 63.
In these readings, Isaiah prophesied that the Messiah would be:
• Bo

### Get access to the LLM model (gemma)

In [None]:
!pip install huggingface_hub



In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#### Getting an LLM for local generation
Time for the G part of RAG

In [None]:
#get GPU available memory (if any)
import torch
gpu_mem = torch.cuda.get_device_properties(0).total_memory
gpu_memory_gb = round(gpu_mem/(2**30))
print(f"Available mem: {gpu_memory_gb} GB")

Available mem: 15 GB


In [None]:
#check with version of Gemma is best for running here
if gpu_memory_gb < 5.1:
    print(f"Your available GPU memory is {gpu_memory_gb}GB, you may not have enough memory to run a Gemma LLM locally without quantization.")
elif gpu_memory_gb < 8.1:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in 4-bit precision.")
    use_quantization_config = True
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb < 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.")
    use_quantization_config = False
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb > 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommend model: Gemma 7B in 4-bit or float16 precision.")
    use_quantization_config = False
    model_id = "google/gemma-7b-it"

print(f"use_quantization_config set to: {use_quantization_config}")
print(f"model_id set to: {model_id}")

GPU memory: 15 | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.
use_quantization_config set to: False
model_id set to: google/gemma-2b-it


In [None]:
#get the LLM
#the model to be used is gemma-2b-it (instruction tuned) for generation

#import required tokens
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import is_flash_attn_2_available

#check if flash attention is available else set the attention mechanism to sdpa
if(is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
    attn_implementation = "flash_attention_2"
else:
    attn_implementation = "sdpa" #scaled dot product attention

print(f"[INFO] Using attention implementation: {attn_implementation}")

#pick a model to use
model_id = model_id
print(f"[INFO] Using model id: {model_id}")

#instantiate tokenizer
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path = model_id)

#insatntiate the model
llm_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = model_id,
    torch_dtype = torch.float16,
    quantization_config = None,
    low_cpu_mem_usage = False,
    attn_implementation = attn_implementation)

llm_model.to("cuda")


[INFO] Using attention implementation: sdpa
[INFO] Using model id: google/gemma-2b-it


tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): GemmaR

In [None]:
#get the number of parameters
def get_params(model: torch.nn.Module):
    return sum([param.numel() for param in model.parameters()])

get_params(llm_model)

2506172416

In [None]:
#get some info about the model
def get_model_size(model: torch.nn.Module):
    mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
    mem_buffers = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])

    # Calculate various model sizes
    model_mem_bytes = mem_params + mem_buffers # in bytes
    model_mem_mb = model_mem_bytes / (1024**2) # in megabytes
    model_mem_gb = model_mem_bytes / (1024**3) # in gigabytes

    return {"model_mem_bytes": model_mem_bytes,
            "model_mem_mb": round(model_mem_mb, 2),
            "model_mem_gb": round(model_mem_gb, 2)}

get_model_size(llm_model)

{'model_mem_bytes': 5012344832, 'model_mem_mb': 4780.14, 'model_mem_gb': 4.67}

### Generating text with LLM

In [None]:
input_text = "What is the meaning of christian religous education?"
print(f"Input text: \n {input_text}")

#create prompt temptlate for instruction-tuned model
dialogue_template = [
    {"role": "user",
    "content": input_text}
]

#Apply the chat template
prompt = tokenizer.apply_chat_template(
    conversation = dialogue_template,
    tokenize = False,
    add_generation_prompt = True)

print(f"\nPrompt (formatted):\n {prompt}")

Input text: 
 What is the meaning of christian religous education?

Prompt (formatted):
 <bos><start_of_turn>user
What is the meaning of christian religous education?<end_of_turn>
<start_of_turn>model



In [None]:
#tokenize the input text and send it to GPU
input_ids = tokenizer(
    prompt, return_tensors="pt").to("cuda")
print(f"Model input (tokenized):\n {input_ids}\n")

#generate outputs passed on the tokenized input
outputs = llm_model.generate(
    **input_ids, max_new_tokens = 256)
print(f"Model output (tokens):\n{outputs[0]}n")

Model input (tokenized):
 {'input_ids': tensor([[     2,      2,    106,   1645,    108,   1841,    603,    573,   6996,
            576,  48234,   5919,    819,   5404, 235336,    107,    108,    106,
           2516,    108]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}

Model output (tokens):
tensor([     2,      2,    106,   1645,    108,   1841,    603,    573,   6996,
           576,  48234,   5919,    819,   5404, 235336,    107,    108,    106,
          2516,    108,  29098,  11527,   5404,    603,    476,  17540,   5688,
           577,  93189,   9278,   1105,  32136, 235269,   3359,   1277,   4281,
        235269,  65806, 235269,  29013, 235269,    578,  12317, 235265,   1165,
         22346,    577,  29181,    476,   5271,   8377,    578,  29379,    604,
         32136, 235269,  30864,   9278,    577,   1501,  15335,  12013,   1105,
          1024,   8278, 235265,    109,    688,   2469,

In [None]:
outputs_decoded = tokenizer.decode(outputs[0])
print(f"Model output (decoded):\n{outputs_decoded}\n")

Model output (decoded):
<bos><bos><start_of_turn>user
What is the meaning of christian religous education?<end_of_turn>
<start_of_turn>model
Christian religious education is a comprehensive approach to educating individuals about Christianity, including its history, doctrines, beliefs, and practices. It aims to foster a deep understanding and appreciation for Christianity, encouraging individuals to make informed decisions about their faith.

**Key elements of Christian religious education include:**

* **Historical perspective:** Exploring the origins and development of Christianity, including the life and teachings of Jesus Christ, the early church, and the development of Christian theology.
* **Biblical study:** Reading and interpreting the Bible, including the Old and New Testaments, to gain a deeper understanding of its meaning and message.
* **Theology:** Exploring Christian doctrines, beliefs, and practices, such as salvation, grace, sin, and the nature of God.
* **Ethics and Ch

In [None]:
print(f"Input text: {input_text}\n")
print(f"Output text:\n{outputs_decoded.replace(prompt, '').replace('<bos>','').replace('<eos>', '')}")

Input text: What is the meaning of christian religous education?

Output text:
Christian religious education is a comprehensive approach to educating individuals about Christianity, including its history, doctrines, beliefs, and practices. It aims to foster a deep understanding and appreciation for Christianity, encouraging individuals to make informed decisions about their faith.

**Key elements of Christian religious education include:**

* **Historical perspective:** Exploring the origins and development of Christianity, including the life and teachings of Jesus Christ, the early church, and the development of Christian theology.
* **Biblical study:** Reading and interpreting the Bible, including the Old and New Testaments, to gain a deeper understanding of its meaning and message.
* **Theology:** Exploring Christian doctrines, beliefs, and practices, such as salvation, grace, sin, and the nature of God.
* **Ethics and Christian living:** Developing a moral framework based on Christ

### Response augumentation
Augumented answer for a response

In [None]:
#create questions for testing
questions = [
    "Outline six categories of prophets",
    "Outine reasons why the bible is referred to as the word of God",
    "Give reasons for naming children"
]

In [None]:
#check if our retrieve function works with our list of queries
import random
query = random.choice(questions)

print(f"Query: {query}")

#get just scores and indices of top related results
scores, indices = retrieve_resources(query = query,
                                    embeddings = embeddings)
scores, indices

Query: Outine reasons why the bible is referred to as the word of God


(tensor([0.7982, 0.7791, 0.5643], device='cuda:0'),
 tensor([1023,    2,    4], device='cuda:0'))

#### Augumenting our prompt with context items

In [None]:
def prompt_formatter(query: str,
                    context_items: list[dict]) -> str:
    #join contnext items into one dotted paragraph
    context = "- " + "\n- ".join([item["text"] for item in context_items])

    ###create a base prompt with examples to help the model
    base_prompt = """
    Based on the following context items, please answer the query.
Give yourself room to think by extract relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Return the answer as points. Make sure the answers are exhaustive while still accurate
Use the following examples as references for the ideal answer style.
\nExample 1:
Query: Give seven responsibilities of the living towards ancestors in African Traditional Communities
Answer:
Naming children after them
Pouring libation for them
Taking care of their graveyards
Making sacrifices to honour them
Consulting/ communicating to them in times of need
Inviting/ involving them in ceremonies
Invoking their names during prayers
Transmitting their wishes/visions
By holding commermoration ceremonies for them
Managing their property wisely
Building shrines for them
Teaching children about them
\nExample 2:
Query: Give seven similarities between Jewish and traditional African practice of circumcision
Answer:
In both cases, it promotes one into full membership of the community.
In both cases, it is a mark of identification of a person to a particular community.
In both cases, it is carried out on male children.
In both cases, circumcision has a religious significance.
In both cases, special people/ religious leaders/heads of the community carry out the operations.
In both cases, it unites the members with the ancestors.
In both cases, members receive new names.
In both cases, the rite is carried on from generation to generation/ is compulsory/ whoever fails to observe it is considered an outcast.
In both cases, the ritual is a communal affair.
In both cases, it involves the cutting of the foreskin.
\nExample 3:
Query: State six similarites between the First and the second account of creation
Answer:
In both, God is portrayed as the only sole creator.
In both, man is portrayed as 2 special creatures; man was created in God's image and likeness, and there was nothing else created in that way
Both outline the creation of the living and non-living things, i.e., heaven, man, plants, and animals, etc.
In both, Man shares in with God. God breathed e!l into man' s nostrils and created him in his own image.
In both cases, Man is given special privileges and responsibilities, and is to multiply and fill the earth.
In both stories, God existed before creation.
In both mankind is created into full sexuality (male and female).
\nNow use the following context items to answer the user query:
{context}
\nRelevant passages: <extract relevant passages from the context here>
User query: {query}
Answer:
"""
    #update the base prompt with the context items and query
    base_prompt = base_prompt.format(context=context, query=query)

    #create a prompt template for the instruction-tuned model
    dialogue_template = [
        {"role":"user",
        "content":base_prompt}]

    #apply the chat template
    prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                          tokenize=False,
                                          add_generation_prompt=True)

    return prompt

In [None]:
query = random.choice(questions)
print(f"Query: {query}")

#get relevant resources
scores, indices = retrieve_resources(query=query,
                                    embeddings = embeddings)

#create a list of context items
context_items = [chunks_text[i] for i in indices]

#format prompt with context items
prompt = prompt_formatter(query=query,
                         context_items = context_items)

print(prompt)

Query: Outline six categories of prophets
<bos><start_of_turn>user
Based on the following context items, please answer the query.
Give yourself room to think by extract relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Return the answer as points. Make sure the answers are exhaustive while still accurate
Use the following examples as references for the ideal answer style.

Example 1:
Query: Give seven responsibilities of the living towards ancestors in African Traditional Communities
Answer:
Naming children after them
Pouring libation for them
Taking care of their graveyards
Making sacrifices to honour them
Consulting/ communicating to them in times of need
Inviting/ involving them in ceremonies
Invoking their names during prayers
Transmitting their wishes/visions
By holding commermoration ceremonies for them
Managing their property wisely
Building shrines for them
Teaching children about them

Example 2:
Query: Give seven

In [None]:
#tokenize and pass it straight to our LLM
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")

#generate an output of tokens
outputs = llm_model.generate(**input_ids,
                            temperature = 0.6,
                            do_sample=True,
                            max_new_tokens=256)

#turn the output tokens into human readable text
output_text = tokenizer.decode(outputs[0])

print(f"Query: {query}")
print(f"RAG answer: \n {output_text.replace(prompt, '').replace('<bos>', '').replace('<eos>', '')}")

Query: Outline six categories of prophets
RAG answer: 
 The context does not provide relevant passages about six categories of prophets, so I cannot answer this query from the provided context.


In [None]:
def ask(query,
       temperature = 0.4,
       max_new_tokens = 512,
       format_answer_text=True,
       return_answer_only = False):

    #get just the scores and indices of the top related results
    scores, indices = retrieve_resources(query = query,
                                        embeddings = embeddings)

    #create a list of context items
    context_items = [chunks_text[i] for i in indices]

    #add score to context item
    for i, item in enumerate(context_items):
        item["score"] = scores[i].cpu()

    #format the prompt with context items
    prompt = prompt_formatter(query=query,
                             context_items=context_items)

    #tokenize the prompt
    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")

    #generate an output of tokens
    outputs = llm_model.generate(**input_ids,
                          temperature = temperature,
                          do_sample = True,
                          max_new_tokens=max_new_tokens)

    #turn the output to human readable text
    output_text = tokenizer.decode(outputs[0])

    if format_answer_text:
        output_text = output_text.replace(prompt, '').replace('<bos>', '').replace('<eos>', '').replace("Sure, here is the answer to the user query:\n\n", "")

    #only return the answer without the context items
    if return_answer_only:
        return output_text

    return output_text, context_items

In [None]:
#test the function
query = "Outline categories of true prophets"
print(f"Question: {query}")

#answer query with context
answer= ask(query=query,
            temperature=0.4,
            max_new_tokens = 512,
            format_answer_text=True,
            return_answer_only = True)

print(f"Answer:\n{answer}")
print("\n\n\n\n")
#print(f"Context items: {context_items}")

Question: Outline categories of true prophets
Answer:
Sure, here's the answer to the user's query:

**Categories of True Prophets in the Old Testament**

1. **Major Prophets**: These prophets are called major because their books are long and contain clearly written messages. They bear the names of the writers.
2. **Minor Prophets**: These prophets are short and contain less important messages than those in major books. They are collectively referred to as canonical prophets.
3. **Canonical Prophets**: Both the major and minor prophets are together referred to as canonical prophets.
4. **Early Prophets**: They belonged to guilds or schools. They lived together in communities under a chief prophet. Elijah and Elisha are examples.
5. **Cultic Prophets**: They worked side by side with priests and said prayers especially people’s petition to Yahweh.







In [None]:
print("Exam setting")
query = input("Enter a question:")
print(f"Question: {query}")


#answer query with context
answer, context_items = ask(query=query,
            temperature=0.1,
            max_new_tokens = 1024,
            format_answer_text=True,
            return_answer_only = False)

print(f"Answer:\n{answer}")
print("\n\n\n\n")
#print(f"Context items: {context_items}")



Exam setting
Enter a question:Outline seven reasons why prophet Amos condemned idolatry in Israel
Question: Outline seven reasons why prophet Amos condemned idolatry in Israel
Answer:
**Q3. Reasons why Prophet Amos condemned idol worship in Israel**

- It broke the covenant way of life.
- It was against the 10 commandments, which directed the Israelites to worship one God.
- It was a sign of disobedience to God.
- It promoted immorality e.g. temple prostitution.
- It promoted falsehood in the society.
- Religion became commercialized.
- Led to false prophecy.





