In [1]:
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.lmstudio import LMStudio
import json
import phoenix as px
from llama_index.core import (
    Settings,
    set_global_handler,
)
import nest_asyncio
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import (
    get_qa_with_reference,
    get_retrieved_documents,
)
from phoenix.trace import DocumentEvaluations, SpanEvaluations
from tqdm import tqdm

nest_asyncio.apply()

In [2]:
session = px.launch_app()
set_global_handler("arize_phoenix")

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [4]:
llm = Ollama(model="llama3.1:8b", request_timeout=60.0)
embedder = OllamaEmbedding(
    model_name="nomic-embed-text",
)

Settings.llm = llm
Settings.embed_model = embedder

In [5]:
llm.metadata.is_function_calling_model = True

In [6]:
llm.metadata

LLMMetadata(context_window=3900, num_output=256, is_chat_model=True, is_function_calling_model=True, model_name='llama3.1:8b', system_role=<MessageRole.SYSTEM: 'system'>)

In [7]:
llm.complete("hello")

CompletionResponse(text='Hello! How can I assist you today?', additional_kwargs={'tool_calls': []}, raw={'model': 'llama3.1:8b', 'created_at': '2025-01-30T09:25:04.611157Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7996159750, 'load_duration': 829651708, 'prompt_eval_count': 11, 'prompt_eval_duration': 6805000000, 'eval_count': 10, 'eval_duration': 360000000, 'message': Message(role='assistant', content='Hello! How can I assist you today?', images=None, tool_calls=None), 'usage': {'prompt_tokens': 11, 'completion_tokens': 10, 'total_tokens': 21}}, logprobs=None, delta=None)

In [7]:
embedder.get_text_embedding("hello")

[0.42144137620925903,
 -0.13775025308132172,
 -4.1203837394714355,
 -0.3233436644077301,
 0.8000353574752808,
 1.0508289337158203,
 0.2911536395549774,
 -0.06047961860895157,
 -0.3482259511947632,
 -0.9226974248886108,
 -0.21647962927818298,
 1.2151598930358887,
 1.3565224409103394,
 1.343890905380249,
 1.057612419128418,
 -1.2590347528457642,
 0.6766302585601807,
 -1.1090795993804932,
 -0.9175370335578918,
 0.632752001285553,
 0.21995720267295837,
 -1.5684449672698975,
 0.11625935137271881,
 -0.13814355432987213,
 4.047388076782227,
 -0.10346291959285736,
 0.4391018748283386,
 1.9591352939605713,
 0.03767038881778717,
 -0.5290690660476685,
 0.4780785143375397,
 -0.440297394990921,
 0.3601878881454468,
 0.07514747977256775,
 0.6475369930267334,
 -0.44028440117836,
 -0.08981560915708542,
 0.2652130722999573,
 0.3800978362560272,
 0.45303770899772644,
 -0.4143969416618347,
 -0.22600185871124268,
 0.21730023622512817,
 -0.6651782393455505,
 1.207326054573059,
 0.1712249219417572,
 -0.5195

In [8]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

In [9]:
documents = SimpleDirectoryReader(input_files=["data/Phone_and_Internet_Allowance_Policy_abrd.pdf"]).load_data()

In [10]:
index = VectorStoreIndex.from_documents(documents, embed_model=embedder)
query_engine = index.as_query_engine(llm=llm, similarity_top_k=3)

In [11]:
response = query_engine.query("What does the term 'Social Media' refer to in the document?")
print(response)

In a professional setting, social media typically refers to online platforms where individuals or organizations share information, ideas, and experiences with a wide audience. These platforms often enable users to create and manage their own content, engage with others through comments and messages, and build relationships with people they may not know personally.


In [12]:
for source in response.source_nodes:
    print(source, end="\n")

Node ID: baa65c3d-7b9d-4e5b-bfb9-af0276bd2901
Text: EMPLOYEE  COMMUNICATIONS  POLICY    Document  owner:
Reference:    Author:   Communications  and  Human  Resources   Date:
14/08/2024   Approved  By:    Head  Human  Resources    Approval
Date:   14/01/2025   Classification:   Internal   Version:  1.0
Effective  Date:  14/01/2025    Record  of  Change   Name  Designation
Reason  for  Ch...
Score:  0.655

Node ID: 32e97649-cfac-427e-b904-a5b5e242638c
Text: 1.2.5  Members  of  employees  are  to  always  treat  others
with  respect  on  social  media.  If  someone  questions  or
criticizes   the   work   we   do   at   Helium   Health,   responses
should   be   thoughtful   and   kind.   If   the   criticism   is
especially   aggressive   or   inconsiderate,   it   is   recommended
that   ...
Score:  0.646

Node ID: ef8b46a3-3c17-4cde-953d-4a28ca43cbcc
Text:
Score:  0.631



In [13]:
response = query_engine.query("List some social media guidelines for usage?")
print(response)

for source in response.source_nodes:
    print(source, end="\n")

Here are some social media guidelines for usage:

1. Be ambassadors of the company by being respectful and professional online.
2. Employees are responsible for the content they put out or curate on their personal social media accounts, even if they are not affiliated with the company.
3. Avoid posting or sharing inflammatory, violent, or spreading misinformation.
4. Treat others with respect on social media, especially when responding to criticism of the company's work.
5. Refrain from responding to aggressive or inconsiderate comments.
6. Be mindful that personal social media accounts are public spaces and actions taken on them may have repercussions at work.
Node ID: 32e97649-cfac-427e-b904-a5b5e242638c
Text: 1.2.5  Members  of  employees  are  to  always  treat  others
with  respect  on  social  media.  If  someone  questions  or
criticizes   the   work   we   do   at   Helium   Health,   responses
should   be   thoughtful   and   kind.   If   the   criticism   is
especially   aggr

In [14]:
response = query_engine.query("What are professional websites?")
print(response)

for source in response.source_nodes:
    print(source, end="\n")

Websites that provide legitimate online services or resources for job or professional growth.
Node ID: 32e97649-cfac-427e-b904-a5b5e242638c
Text: 1.2.5  Members  of  employees  are  to  always  treat  others
with  respect  on  social  media.  If  someone  questions  or
criticizes   the   work   we   do   at   Helium   Health,   responses
should   be   thoughtful   and   kind.   If   the   criticism   is
especially   aggressive   or   inconsiderate,   it   is   recommended
that   ...
Score:  0.532

Node ID: bb36f5b9-4f31-4214-9ea2-9bee37aa12fe
Text: 4.2.8.  Helium  Health  understands  the  benefits  of  a
diverse  workforce  and  expects  that  every  employee  should
foster   an   understanding   of   others’   differences   in   order
to   create   an   environment   where   those   differences
contribute   to   a   better   organization.    5.  COMPANY  EMAIL
USE  5.1.  Definiti...
Score:  0.517

Node ID: d00f1d5b-d6ac-4428-8f96-2a01dddf33fc
Text: 5.5.  Email  Etiquette   5.5.1.  Al

## PDF Readers

In [15]:
# pip install PyMuPDF

In [16]:
documents[2]

Document(id_='14c9dc1d-296e-47e4-9b06-e108ed79ffde', embedding=None, metadata={'page_label': '3', 'file_name': 'Employee Communication Policy v1.pdf', 'file_path': 'data/Employee Communication Policy v1.pdf', 'file_type': 'application/pdf', 'file_size': 229997, 'creation_date': '2025-01-22', 'last_modified_date': '2025-01-21'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text=" \nThis  Employee  Communication  Policy  (“ Policy ”)  contains  the  following  sections:   1.  SOCIAL  MEDIA  GUIDELINES   2.  PROFESSIONAL  WEBSITES  3.  THOUGHT  LEADERSHIP  PRACTICES  4.  INTERNAL  COMMUNICATION  PLATFORMS   5.  COMPANY  EMAIL  USE   1.  SOCIAL  

In [11]:
from llama_index.readers.file import PyMuPDFReader, PDFReader

In [70]:
# PyMuPDF Reader example
parser = PyMuPDFReader()
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(
    input_files=["./data/transport allowance.pdf"], file_extractor=file_extractor, encoding="utf8"
).load_data()

# PDF Reader with `SimpleDirectoryReader`
# parser = PDFReader()
# file_extractor = {".pdf": parser}
# documents = SimpleDirectoryReader(
#     "./data", file_extractor=file_extractor
# ).load_data()

In [75]:
documents[0].text

'One Global Medical Technology Ltd.\nRC: 1264820\nMEMO\nTO: Chief Operating Officer (COO), Finance Team, HR Team\nFROM: Head Human Resources\nDATE: 24th August 2023\nSUBJECT: Lagos Team Transport Allowance (Update on Travel Policy for Lagos Team Members)\nThis memo is being developed in response to the recent hike in transportation and petrol costs in Nigeria\nand is to apply to Helium Team Members who operate hybrid workdays of a minimum of 2 days in the\noffice. As at the time of this memo, this will apply to Lagos Team members only who are not receiving any\nkind of transport allowance. This memo is effective on the date stated, transport costs to the office within\nLagos will be subsidised by the Company at a monthly flat fee rate.\nPolicy Statement.\n●\nHelium Health will pay a Transport Allowance as a subsidised allowance for Lagos based employees\nonly, who are not receiving any kind of transport allowance.\n●\nAn allowance for transportation for employees and for personal use h

In [32]:
import unicodedata

In [76]:
normalized = unicodedata.normalize("NFKC", documents[0].text).strip().replace('\n', '').strip()
normalized

'One Global Medical Technology Ltd.RC: 1264820MEMOTO: Chief Operating Officer (COO), Finance Team, HR TeamFROM: Head Human ResourcesDATE: 24th August 2023SUBJECT: Lagos Team Transport Allowance (Update on Travel Policy for Lagos Team Members)This memo is being developed in response to the recent hike in transportation and petrol costs in Nigeriaand is to apply to Helium Team Members who operate hybrid workdays of a minimum of 2 days in theoffice. As at the time of this memo, this will apply to Lagos Team members only who are not receiving anykind of transport allowance. This memo is effective on the date stated, transport costs to the office withinLagos will be subsidised by the Company at a monthly flat fee rate.Policy Statement.●Helium Health will pay a Transport Allowance as a subsidised allowance for Lagos based employeesonly, who are not receiving any kind of transport allowance.●An allowance for transportation for employees and for personal use has been provided under the“Transpo

In [42]:
import re

In [46]:
re.split(r'[ ]{3,}', normalized)

['3.6. An employee whose device either becomes fully or partially inoperative or is otherwise unusable shall attempt to',
 'repair or replace their device or notify the HR Team within 5 days through any appropriate and available mode',
 'of contact.  4.PROCESSING 4.1. The department head will determine those employees in their department for whom the Phone and / or Internet',
 'Allowance is justified and the level of need for that employee. To request the allowance the department head',
 'will send the list of eligible employees to HR indicating the usage level.',
 '4.2. The employee will receive a Phone Allowance in the form of credit purchase to be paid monthly by the Finance',
 'department directly to the appropriate network providers; and Internet Allowance to be paid to the employee. 4.3. This allowance would come in before the end of the first week of every new month. 4.4. This allowance does not constitute an increase to base pay and will not be included in the calculation of',


In [97]:
from pydrive2.fs import GDriveFileSystem

# replace `root` with ID of a drive or directory and give service account access to it
fs = GDriveFileSystem("1a59en4nkmKnfI7vxO7_CyMEBSmEAeNFM", client_id="",
        client_secret="")

for fnames in fs.walk("1a59en4nkmKnfI7vxO7_CyMEBSmEAeNFM/"):
    print(fnames, end="w")

('1a59en4nkmKnfI7vxO7_CyMEBSmEAeNFM', [], ['Memo_ Lagos Team Transport Allowance abridged.pdf', 'Phone_and_Internet_Allowance_Policy_abrd.pdf', 'Full_Policy_Jan_2021.pdf', 'Contract_Signatory_Authority_Policy.pdf', 'Leave_Handover_Form_shrd.docx', 'Intellectual_Property_and_Copyright_Compliance_Policy_17062022.pdf', 'Guideline_document_on_the_use_of_Generative_AI_clean_28072023.pdf', 'Helium_Health_Employee_Handbook_V1.0_20210825 (2).pdf', 'Full_Policy_updated_Aug_2023 (2).pdf', 'Memo__Meeting_and_Email_Guidelines_2023.pdf', 'HH_ISMS0502_POL_Information_Security_Management_System_Policy_V3.0_20240710_FIN (1).docx', 'HH_ISMSA.5.1_POL_Policies_for_Information_Security_V3.0_20240820_FIN (1).docx', 'Org Structure shared Nov 2024.pdf', 'Employee Communication Policy v1.pdf'])w

In [112]:
from llama_index.readers.file import DocxReader

In [117]:
reader = SimpleDirectoryReader(
    input_files=['1a59en4nkmKnfI7vxO7_CyMEBSmEAeNFM/Leave_Handover_Form_shrd.docx'],
    fs=fs,
    file_extractor={".pdf": DocxReader()}
    # recursive=True,  # recursively searches all subdirectories
)

In [118]:
docs = reader.load_data(fs=fs)

Failed to load file 1a59en4nkmKnfI7vxO7_CyMEBSmEAeNFM/Leave_Handover_Form_shrd.docx with error: File is not a zip file. Skipping...


In [111]:
docs[0]

Document(id_='ada6e74d-2c4a-4e20-889b-4453198de206', embedding=None, metadata={'page_label': '1', 'file_name': 'Memo_ Lagos Team Transport Allowance abridged.pdf', 'file_path': '1a59en4nkmKnfI7vxO7_CyMEBSmEAeNFM/Memo_ Lagos Team Transport Allowance abridged.pdf', 'file_type': 'application/pdf', 'file_size': 109667}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='OneGlobal Medical TechnologyLtd.RC:1264820\nMEMO\nTO: Chief OperatingOfficer(COO), FinanceTeam, HRTeamFROM: HeadHumanResourcesDATE: 24thAugust2023SUBJECT: LagosTeamTransportAllowance(UpdateonTravel PolicyforLagosTeamMembers)\nThis memois being developedinresponse tothe recent hike

### Clean Text 

In [47]:
def clean_text(text):
    # normalize text for unicode
    clean_text = unicodedata.normalize("NFKD", text).strip()
    clean_text = clean_text.replace('\n', '')
    clean_text = " ".join(re.split(r'[ ]{3,}', normalized))
    return clean_text


In [48]:
clean_text(documents[2].text)

'3.6. An employee whose device either becomes fully or partially inoperative or is otherwise unusable shall attempt to repair or replace their device or notify the HR Team within 5 days through any appropriate and available mode of contact.  4.PROCESSING 4.1. The department head will determine those employees in their department for whom the Phone and / or Internet Allowance is justified and the level of need for that employee. To request the allowance the department head will send the list of eligible employees to HR indicating the usage level. 4.2. The employee will receive a Phone Allowance in the form of credit purchase to be paid monthly by the Finance department directly to the appropriate network providers; and Internet Allowance to be paid to the employee. 4.3. This allowance would come in before the end of the first week of every new month. 4.4. This allowance does not constitute an increase to base pay and will not be included in the calculation of percentage increases to bas

## Document Splitters

In [20]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(
    chunk_size=256,
    chunk_overlap=10,
)

print(f"Len documents: ", len(documents))
nodes = splitter.get_nodes_from_documents(documents)
print(f"Len nodes: ", len(nodes))

Len documents:  7
Len nodes:  18


In [21]:
print(nodes[4])

Node ID: 858eb49e-695b-42d6-b42b-8669725369d8
Text: 1.2.4​ Employees' personal social media accounts are online safe
zones and are not policed by Helium Health. It is  however the duty of
the employee to remember that these are very public spaces and actions
taken on these  spaces may have repercussions at Helium Health if they
are considered to be egregious.  1.2.5​ Members of employees are to
a...


In [22]:
index = VectorStoreIndex(nodes=nodes, embed_model=embedder)
query_engine = index.as_query_engine(llm=llm, similarity_top_k=5)

In [23]:
response = query_engine.query("What does the term 'Social Media' refer to in the document?")
print(response, end="\n")

for source in response.source_nodes:
    print(source, end="\n")

The term "Social Media" refers to online platforms which aid interactions among people, including but not limited to X (formerly Twitter), Facebook, Tiktok, Snapchat, or any other platform by which individuals engage for the purpose(s) of creating or curating content.
Node ID: 9193f6d4-133b-431f-afdb-c513aefb9ffa
Text: This Employee Communication Policy (“Policy”) contains the
following sections:    1.​ SOCIAL MEDIA GUIDELINES   2.​ PROFESSIONAL
WEBSITES  3.​ THOUGHT LEADERSHIP PRACTICES  4.​ INTERNAL COMMUNICATION
PLATFORMS   5.​ COMPANY EMAIL USE    1.​ SOCIAL MEDIA GUIDELINES
1.1.​ Definitions  1.1.1.​ The usage of the term “Social Media” now
and hitherto...
Score:  0.748

Node ID: b42b40ac-b7a7-41d5-b38b-5c3facd82e14
Text: If  you provide us with any contact information for customer
service purposes, it will only be used to  manage your enquiry and
will not be used for any other reason.”  OR  “Any personal data shared
in public by the user on social platforms is shared at their own

In [24]:
response = query_engine.query("List some social media guidelines for usage?")
print(response, end="\n\n")

for source in response.source_nodes:
    print(source, end="\n")

The company encourages members of the team to be ambassadors of the company on social media platforms. When engaging online, employees are viewed by customers, partners, and other external parties, placing an even greater responsibility on them.

Employees are responsible for the content they put out or curate on these platforms, and are prohibited from posting or sharing anything that may be considered inflammatory, inciting of violence (in any form) or spreading of misinformation.

Node ID: 9193f6d4-133b-431f-afdb-c513aefb9ffa
Text: This Employee Communication Policy (“Policy”) contains the
following sections:    1.​ SOCIAL MEDIA GUIDELINES   2.​ PROFESSIONAL
WEBSITES  3.​ THOUGHT LEADERSHIP PRACTICES  4.​ INTERNAL COMMUNICATION
PLATFORMS   5.​ COMPANY EMAIL USE    1.​ SOCIAL MEDIA GUIDELINES
1.1.​ Definitions  1.1.1.​ The usage of the term “Social Media” now
and hitherto...
Score:  0.768

Node ID: b42b40ac-b7a7-41d5-b38b-5c3facd82e14
Text: If  you provide us with any contact informa

In [25]:
response = query_engine.query("What are professional websites?")
print(response, end="\n\n")

for source in response.source_nodes:
    print(source, end="\n")

Professional networking platforms such as LinkedIn, Angel List, Crunchbase, and others that allow individuals to share or curate content related to their work or professional progress.

Node ID: b42b40ac-b7a7-41d5-b38b-5c3facd82e14
Text: If  you provide us with any contact information for customer
service purposes, it will only be used to  manage your enquiry and
will not be used for any other reason.”  OR  “Any personal data shared
in public by the user on social platforms is shared at their own
risk.”    1.3.2​ The Admin(s) should delete (where possible) any
personal data that...
Score:  0.662

Node ID: 9193f6d4-133b-431f-afdb-c513aefb9ffa
Text: This Employee Communication Policy (“Policy”) contains the
following sections:    1.​ SOCIAL MEDIA GUIDELINES   2.​ PROFESSIONAL
WEBSITES  3.​ THOUGHT LEADERSHIP PRACTICES  4.​ INTERNAL COMMUNICATION
PLATFORMS   5.​ COMPANY EMAIL USE    1.​ SOCIAL MEDIA GUIDELINES
1.1.​ Definitions  1.1.1.​ The usage of the term “Social Media” now
and hithert

In [26]:
from llama_index.core.node_parser import SemanticSplitterNodeParser

splitter = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embedder
)

print(f"Len documents: ", len(documents))
nodes = splitter.get_nodes_from_documents(documents)
print(f"Len nodes: ", len(nodes))

Len documents:  7
Len nodes:  12


In [27]:
documents[2]

Document(id_='3f7f2aea-22c3-425f-a9fb-7cf68d4af628', embedding=None, metadata={'file_path': '/Users/sasuusen/Projects/Helium/lola/notebooks/data/Employee Communication Policy v1.pdf', 'file_name': 'Employee Communication Policy v1.pdf', 'file_type': 'application/pdf', 'file_size': 229997, 'creation_date': '2025-01-22', 'last_modified_date': '2025-01-21', 'total_pages': 7, 'source': '3'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text=" \nThis Employee Communication Policy (“Policy”) contains the following sections: \n \n1.\u200b\nSOCIAL MEDIA GUIDELINES  \n2.\u200b\nPROFESSIONAL WEBSITES \n3.\u200b\nTHOUGHT LEADERSHIP PRACTICES \n4.\u200b\

In [28]:
nodes[3]

TextNode(id_='b7430188-5e5e-4de6-85bc-2b68c4aece98', embedding=None, metadata={'file_path': '/Users/sasuusen/Projects/Helium/lola/notebooks/data/Employee Communication Policy v1.pdf', 'file_name': 'Employee Communication Policy v1.pdf', 'file_type': 'application/pdf', 'file_size': 229997, 'creation_date': '2025-01-22', 'last_modified_date': '2025-01-21', 'total_pages': 7, 'source': '3'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='3f7f2aea-22c3-425f-a9fb-7cf68d4af628', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/sasuusen/Projects/Helium/lola/notebooks/data/Employee Communication Policy v1.pdf', 'file_name': 'Employee Communication Policy v1.pdf', 'file_type': 'application/pdf', 

In [29]:
index = VectorStoreIndex(nodes=nodes, embed_model=embedder)
query_engine = index.as_query_engine(llm=llm, similarity_top_k=5)

In [30]:
response = query_engine.query("What can i post on my personal twitter?")
print(response, end="\n")

for source in response.source_nodes:
    print(source, end="\n")

You should refrain from posting anything that may be considered inflammatory, inciting of violence, spreading of misinformation, or lacking in basic human decency.
Node ID: 13c4b871-6e93-4c9c-a478-5fab39d12db4
Text: 1.2.8​ Employees are not to share, reshare or endorse any news
about Helium Health on their social media without  confirming its
authenticity from line managers or the members of the Communications
team as they may be  false or unconfirmed.    1.3​Data Protection
1.3.1​ Where a social media user expresses their intention to share
their personal...
Score:  0.611

Node ID: ba19a985-ce17-48a0-a19a-57c882b5c000
Text: This Employee Communication Policy (“Policy”) contains the
following sections:    1.​ SOCIAL MEDIA GUIDELINES   2.​ PROFESSIONAL
WEBSITES  3.​ THOUGHT LEADERSHIP PRACTICES  4.​ INTERNAL COMMUNICATION
PLATFORMS   5.​ COMPANY EMAIL USE    1.​ SOCIAL MEDIA GUIDELINES
1.1.​ Definitions  1.1.1.​ The usage of the term “Social Media” now
and hitherto...
Score:  0.608

Nod

In [33]:
response.response

'You should refrain from posting anything that may be considered inflammatory, inciting of violence, spreading of misinformation, or lacking in basic human decency.'

## 

In [119]:
from llama_index.storage.docstore.redis import RedisDocumentStore
from llama_index.vector_stores.redis import RedisVectorStore
from llama_index.storage.index_store.redis import RedisIndexStore
from redisvl.schema import IndexSchema
from llama_index.core.storage import StorageContext

In [120]:
REDIS_HOST = "localhost"
REDIS_PORT = 6379
LLM = Ollama(model="llama3-groq-tool-use", request_timeout=60.0)
EMBED_MODEL = OllamaEmbedding(
    model_name="nomic-embed-text",
)
DOC_STORE = RedisDocumentStore.from_host_and_port(
    REDIS_HOST, REDIS_PORT, namespace="lola_document_store"
)
VECTOR_STORE = RedisVectorStore(
    schema=IndexSchema.from_dict(dict(json.load(open("../custom_redis_vector_schema.json", "r")))),
    redis_url=f"redis://{REDIS_HOST}:{REDIS_PORT}",
)
INDEX_STORE = RedisIndexStore.from_host_and_port(
    host=REDIS_HOST, port=REDIS_PORT, namespace="lola_index"
)
STORAGE_CONTEXT = StorageContext.from_defaults(
    index_store=INDEX_STORE,
    docstore=DOC_STORE,
)

14:46:08 redisvl.index.index INFO   Index already exists, not overwriting.


In [150]:
from llama_index.core import load_index_from_storage, load_indices_from_storage

In [151]:
vector_index = load_indices_from_storage(
    STORAGE_CONTEXT
)

In [160]:
vector_index[2].summary

'None'

In [161]:
vector_engine = vector_index[1].as_retriever(similarity_top_k=3)

In [162]:
await vector_engine.aretrieve("phone and internet policy?")

[]

## Evaluation

In [18]:
from llama_index.llms.lmstudio import LMStudio

In [19]:
from phoenix.evals import LiteLLMModel
from litellm import completion

* 'fields' has been removed


In [20]:
import os

os.environ['LM_STUDIO_API_BASE'] = "http://127.0.0.1:1234/v1"
os.environ['LM_STUDIO_API_KEY'] = "sk-123"

In [21]:
eval_model = LiteLLMModel(model="lm_studio/phi4-latest", _verbose=True)

In [22]:
response = eval_model("Hey there, what is 2+2?")
print(str(response))

2 + 2 equals 4. If you have any more questions or need further assistance, feel free to ask!


In [23]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

In [24]:
queries_df

Unnamed: 0_level_0,input,output,reference
context.span_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
78c28db07ff9886c,What does the term 'Social Media' refer to in ...,"In the document, ""social media"" refers to onli...",EMPLOYEE COMMUNICATIONS POLICY Document ...
2afccf2582b4cae4,List some social media guidelines for usage?,The organization encourages team members to ac...,1.2.5 Members of employees are to always...
b886d4efd22d34fb,What are professional websites?,Professional websites refer to online platform...,1.2.5 Members of employees are to always...
35fab9bee5eb6300,What does the term 'Social Media' refer to in ...,"The term ""Social Media"" in the document refers...",Definitions 1.1.1. The usage of the term...
3354bc4371077c9f,List some social media guidelines for usage?,Employees are encouraged to act as ambassadors...,Definitions 1.1.1. The usage of the term...
1e5c387c5d636d65,What are professional websites?,Professional websites refer to platforms desig...,Definitions 2.1.1. The usage of the term...


In [25]:
retrieved_documents_df

Unnamed: 0_level_0,Unnamed: 1_level_0,context.trace_id,input,reference,document_score
context.span_id,document_position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
530af4522d6d8273,0,d51ccb08c0fedf13cb7faadc0ef8709b,What does the term 'Social Media' refer to in ...,EMPLOYEE COMMUNICATIONS POLICY Document ...,0.655043
530af4522d6d8273,1,d51ccb08c0fedf13cb7faadc0ef8709b,What does the term 'Social Media' refer to in ...,1.2.5 Members of employees are to always...,0.645901
530af4522d6d8273,2,d51ccb08c0fedf13cb7faadc0ef8709b,What does the term 'Social Media' refer to in ...,,0.630648
fbefcb4215e8cdc8,0,66a9aafd70cbf0ad6fbedfc2ac36a95b,List some social media guidelines for usage?,1.2.5 Members of employees are to always...,0.696991
fbefcb4215e8cdc8,1,66a9aafd70cbf0ad6fbedfc2ac36a95b,List some social media guidelines for usage?,4.2.8. Helium Health understands the bene...,0.628873
fbefcb4215e8cdc8,2,66a9aafd70cbf0ad6fbedfc2ac36a95b,List some social media guidelines for usage?,This Employee Communication Policy (“ Poli...,0.61594
ab83ed62670696ff,0,9d6a962b94adafa8b989b7b5939b443c,What are professional websites?,1.2.5 Members of employees are to always...,0.53155
ab83ed62670696ff,1,9d6a962b94adafa8b989b7b5939b443c,What are professional websites?,4.2.8. Helium Health understands the bene...,0.516587
ab83ed62670696ff,2,9d6a962b94adafa8b989b7b5939b443c,What are professional websites?,5.5. Email Etiquette 5.5.1. All Helium ...,0.509712
fed4ecdba15db354,0,bf17d1e72b5ff31b1e0d6cf680e10b0f,What does the term 'Social Media' refer to in ...,Definitions 1.1.1. The usage of the term...,0.807887


In [26]:
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

In [27]:
%%time
hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
    # concurrency=1
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
    # concurrency=1
)[0]

I0000 00:00:1737543835.687621    9601 fork_posix.cc:75] Other threads are currently calling into gRPC, skipping fork() handlers


run_evals |          | 0/12 (0.0%) | ⏳ 00:00<? | ?it/s


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 1: raised litellm.Timeout: APITimeoutError - Request timed out. Error_str: Request timed out.
Requeuing...
Exception in worker on attempt 1: raised litellm.Timeout: APITimeoutError - Request timed out. Error_str: Request timed out.
Requeuing...
Exception in worker on attempt 

run_evals |          | 0/24 (0.0%) | ⏳ 00:00<? | ?it/s


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 1: raised litellm.BadRequestError: Lm_studioException - Error code: 400 - {'error': 'The model has crashed without additional information. (Exit code: 11)'}
Requeuing...
CPU times: user 20.5 s, sys: 4.17 s, total: 24.7 s
Wall time: 51min 15s


In [28]:
px.Client().log_evaluations(
    SpanEvaluations(
        eval_name="Hallucination", dataframe=hallucination_eval_df
    ),
    SpanEvaluations(
        eval_name="QA Correctness", dataframe=qa_correctness_eval_df
    ),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)