In [1]:
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_NAME = "phi3:mini"

# Consult the LlamaIndex docs if you're unsure what this does
# documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
reader = SimpleDirectoryReader(input_dir="Data", recursive=True)
docs = reader.load_data()

In [3]:
# bge-base embedding model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
Settings.llm = Ollama(
    model=MODEL_NAME,
    temperature=0.2,
    request_timeout=360.0,
    system_prompt = "You are an AI language model trained to answer questions on a product called - Milli-Q® IQ 7000 Ultrapure Water System. It is a product of M, a Life Science company. You are given the user question and the external knowledge based on product specifications to answer that question. Keep your answers concise and based on facts – do not hallucinate features. If you do not know, respond by saying that you do not know."
)
index = VectorStoreIndex.from_documents(docs)
rag_application = index.as_chat_engine(chat_mode="context", verbose=True, streaming=True)


In [27]:
# An example input to your RAG application
user_input1 = "What is the resistivity of the water produced by the Milli-Q® IQ 7000?"

response_object1 = rag_application.chat(user_input1)

if response_object1 is not None:
    actual_output1 = response_object1.response
    retrieval_context1 = [node.get_content() for node in response_object1.source_nodes]

In [17]:
actual_output

'The resistivity of the ultrapure water delivered by the Milli-Q® IQ 7000 system is **18.2 MΩ·cm at 25°C**. This high level of purity ensures that there are no ionic contaminants in the water, which could potentially interfere with sensitive laboratory procedures and experiments.'

In [18]:
retrieval_context

['Water Quality\nThe Milli-Q® IQ 7000 delivers ultrapure water with a resistivity of **18.2 MΩ·cm at 25°C**, ensuring that the water is free from ionic contamination. The **Total Organic Carbon (TOC)** level is kept at **≤ 5 ppb**, which is vital for sensitive applications such as HPLC or mass spectrometry. \r\n\r\nIn terms of microbiological purity, the system ensures that the water contains **less than 0.01 CFU/mL of bacteria**, making it ideal for molecular biology applications. The system also removes **particulates larger than 0.22 µm**, ensuring that the water is clear of any particulate matter. \r\n\r\nFor researchers working with endotoxins, the system, when equipped with appropriate filters, can produce water with endotoxin levels lower than **0.001 EU/mL**. Additionally, the Milli-Q® IQ 7000 can deliver **RNase- and DNase-free water** when used with specialized final filters, making it suitable for sensitive molecular biology and cell culture work.',
 'Milli-Q® IQ 7000 Ultrap

In [19]:
len(retrieval_context)

2

In [16]:
l = [1,2]
w = [[1,3]]
w.append(l)
w

[[1, 3], [1, 2]]

In [22]:
rc = []
rc.append(retrieval_context)
df = pd.DataFrame()
df['Question'] = [user_input]
df['Actual Output'] = [actual_output]
df['Retreived Context'] = [rc]

In [10]:
print(s)

None


In [23]:
df

Unnamed: 0,Question,Actual Output,Retreived Context
0,What is the resistivity of the water produced ...,The resistivity of the ultrapure water deliver...,[[Water Quality\nThe Milli-Q® IQ 7000 delivers...


In [28]:
def query_chat_get_response(user_input):
    response_object = rag_application.chat(user_input)

    if response_object is not None:
        actual_output = response_object.response
        retrieval_context = [node.get_content() for node in response_object.source_nodes]
        return actual_output, retrieval_context
    return None, []

In [29]:
user_list = ["Does the Milli-Q® IQ 7000 remove endotoxins from water?", "What is the resistivity of the water produced by the Milli-Q® IQ 7000?"]
actual_output_list = []
retreival_final_list = []
for input in user_list:
    actual_output_str, retreival_list = query_chat_get_response(input)
    actual_output_list.append(actual_output_str)
    retreival_final_list.append(retreival_list)


In [30]:
actual_output_list

['Yes, when properly equipped with specialized filters that are capable of removing endotoxins effectively, the Milli-Q® IQ 7000 Ultrapure Water System can produce ultrapure water wherein the levels of endotoxins have been reduced to less than **0.001 EU/mL**. This is particularly important for laboratory applications that are sensitive to even trace amounts of contaminants, such as molecular biology research and cell culture work, ensuring a high degree of cleanliness in the water used within these environments.',
 'The ultrapure water system known as the Milli-Q® IQ 7000 produces water with a very high level of purity. Specifically, it delivers water that has a resistivity value of **18.2 MΩ·cm at 25°C**. This indicates an exceptionally low presence of ionic impurities in the water, which is essential for applications requiring ultrapure conditions to prevent contamination and interference with sensitive experiments or processes.']

In [31]:
retreival_final_list

[['Water Quality\nThe Milli-Q® IQ 7000 delivers ultrapure water with a resistivity of **18.2 MΩ·cm at 25°C**, ensuring that the water is free from ionic contamination. The **Total Organic Carbon (TOC)** level is kept at **≤ 5 ppb**, which is vital for sensitive applications such as HPLC or mass spectrometry. \r\n\r\nIn terms of microbiological purity, the system ensures that the water contains **less than 0.01 CFU/mL of bacteria**, making it ideal for molecular biology applications. The system also removes **particulates larger than 0.22 µm**, ensuring that the water is clear of any particulate matter. \r\n\r\nFor researchers working with endotoxins, the system, when equipped with appropriate filters, can produce water with endotoxin levels lower than **0.001 EU/mL**. Additionally, the Milli-Q® IQ 7000 can deliver **RNase- and DNase-free water** when used with specialized final filters, making it suitable for sensitive molecular biology and cell culture work.',
  'Milli-Q® IQ 7000 Ultr

In [32]:
df = pd.DataFrame()
df['Question'] = user_list
df['Actual Output'] = actual_output_list
df['Retreived Context'] = retreival_final_list

In [36]:
df

Unnamed: 0,Question,Actual Output,Retreived Context
0,Does the Milli-Q® IQ 7000 remove endotoxins fr...,"Yes, when properly equipped with specialized f...",[Water Quality\nThe Milli-Q® IQ 7000 delivers ...
1,What is the resistivity of the water produced ...,The ultrapure water system known as the Milli-...,[Water Quality\nThe Milli-Q® IQ 7000 delivers ...


In [34]:
df.to_csv("Testdataset_output.csv")

In [37]:
df.to_excel("Testdataset_output.xlsx")

In [1]:
rc = [1,2]
fr = [3,4]

r1 = []
r2 = []
r1.append(rc[0])
r2.append(rc[1])
r1.append(fr[0])
r2.append(fr[1])

In [2]:
r1

[1, 3]

In [None]:
# An example input to your RAG application
user_input = "What is the resistivity of the water produced by the Milli-Q® IQ 7000?"

# LlamaIndex returns a response object that contains
# both the output string and retrieved nodes
# response_object = rag_application.query(user_input)
response_object = rag_application.chat(user_input)

# Process the response object to get the output string
# and retrieved nodes
if response_object is not None:
    actual_output = response_object.response
    retrieval_context = [node.get_content() for node in response_object.source_nodes]


#  sources List[ToolOutput], unformatted_response str, List[NodeWithScore] source nodes

rc = []
rc.append()
df = pd.DataFrame()
df['Actual Output'] = [actual_output]

In [3]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
embeddings1 = model.encode("Generated sentence")
embeddings2 = model.encode("Reference sentence")
cosine_similarity = util.cos_sim(embeddings1, embeddings2)

  from tqdm.autonotebook import tqdm, trange


In [4]:
cosine_similarity

tensor([[0.4525]])

In [5]:
embeddings1 = model.encode("A cat is sitting under the bed.")
embeddings2 = model.encode("There is a cat sitting under the bed.")
cosine_similarity = util.cos_sim(embeddings1, embeddings2)

In [6]:
cosine_similarity

tensor([[0.9876]])