In [1]:
import pandas as pd
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document 
from sentence_transformers import SentenceTransformer
import chromadb
from typing import List

## Vector Database

### Creating the Vector DB `RUN ONCE`

`Creating sentence out of the CSV`

In [None]:
csv_path = r"C:\Users\dana.alnadi\Documents\Visual Studio 2022\Code Snippets\Python\HRChatbot\data\csvs\medical_network_cleaned.csv"

df = pd.read_csv(csv_path)

documents = []

name_col = "Name"
loc_col = "location"
phone_col = "Phone"
main_cat_col = "Main Category"
subcat_col = "Sub Category"

for _, row in df.iterrows():
    name = str(row[name_col]).strip()
    loc = str(row[loc_col]).strip()
    phone = str(row[phone_col]).strip()
    main_cat = str(row[main_cat_col]).strip()
    subcat = str(row[subcat_col]).strip()

    sentence = (
        f"{name} ูู ุฌูุฉ ูู ูุฆุฉ {main_cat} ุถูู {subcat} "
        f"ูููุน ูู ููุทูุฉ {loc} ููููู ุงูุชูุงุตู ุนุจุฑ ุงูุฑูู {phone}."
    )
    documents.append(sentence)

print(f"โ Created {len(documents)} documents with full Arabic details!")

โ Created 4905 documents with full Arabic details!


`Create and save embeddings in Chroma using Sentence Transformer` 

In [None]:
# Initialize model and Chroma client
model_name = 'Snowflake/snowflake-arctic-embed-l-v2.0'
model = SentenceTransformer(model_name)

# Generate embeddings
embeddings = model.encode(documents , normalize_embeddings=True) #the model uses Euclidean (L2) distance instead of cosine so we normalize the embeddings

# Connect to (or create) a local persistent Chroma database
client = chromadb.PersistentClient(path="./my_chroma_db")

# Create or get a collection
collection = client.get_or_create_collection(name="medical_network_collection")

# Add documents + embeddings
collection.add(
    ids=[f"doc_{i}" for i in range(len(documents))],
    embeddings=embeddings,
    documents=documents
)

print("โ Embeddings saved successfully to local Chroma database.")


### Lodeing the VDB

In [3]:
def load_chroma_collection(db_path="./my_chroma_db", collection_name="medical_network_collection"):
    client = chromadb.PersistentClient(path=db_path)
    return client.get_collection(name=collection_name)

collection = load_chroma_collection(db_path="./my_chroma_db", collection_name="medical_network_collection")
print(f"โ Loaded collection '{collection.name}' with {collection.count()} documents.")

โ Loaded collection 'medical_network_collection' with 4905 documents.


In [4]:
stored = collection.get(include=["documents", "embeddings"])
document_embeddings = stored["embeddings"]
documents = stored["documents"]
#documents 

`Testing the embeddings`

In [None]:
def embed_queries(queries ,model):
    query_embeddings = model.encode(queries).tolist()

    # Query Chroma collection directly
    results = collection.query(
        query_embeddings=query_embeddings,
        n_results = 10 # number of top matches per query
    )

    # Display the results
    for i, query in enumerate(queries):
        print(f"\n๐ Query: {query}")
        docs = results["documents"][i]
        scores = results["distances"][i]
        for doc, score in zip(docs, scores):
            print(f"Score: {score:.4f} | {doc}")

    return results  

In [None]:
# Encode the queries
model_name = 'Snowflake/snowflake-arctic-embed-l-v2.0'

model = SentenceTransformer(model_name)

queries = ["ุฏูุชูุฑ ุฌูุฏูู ูู ุงููุฑู", "ุฏูุชูุฑ ุฃุทูุงู ูู ุงูุฒุฑูุงุก"]

embed_queries(queries ,model)


๐ Query: ุฏูุชูุฑ ุฌูุฏูู ูู ุงููุฑู
Score: 0.9794 | ูุฑูุฒ ุงููุฑู ููุนูุงุฌ ุงูุทุจูุนู ูู ุฌูุฉ ูู ูุฆุฉ ูุฑุงูุฒ ูุชุฎุตุตุฉ ุถูู ูุฑุงูุฒ ุงูุนูุงุฌ ุงูุทุจูุนู ูููุน ูู ููุทูุฉ ุงููุฑู ููููู ุงูุชูุงุตู ุนุจุฑ ุงูุฑูู 2351447.
Score: 1.0669 | ูุฎุชุจุฑุงุช ุงููุฑู ููุชุญุงููู ุงูุทุจูุฉ ูู ุฌูุฉ ูู ูุฆุฉ ูุฎุชุจุฑุงุช ุถูู ุงููุฎุชุจุฑุงุช ุงููุนุชูุฏุฉ ูููุน ูู ููุทูุฉ ุงููุฑู ููููู ุงูุชูุงุตู ุนุจุฑ ุงูุฑูู 2353545.
Score: 1.1053 | ูุฑูุฒ ุงูุบููุฉ ุงูุทุจู ูู ุฌูุฉ ูู ูุฆุฉ ุฃุทุจุงุก ุถูู ุงูุทุจ ุงูุนุงู ูููุน ูู ููุทูุฉ ุงููุฑู ููููู ุงูุชูุงุตู ุนุจุฑ ุงูุฑูู 0798226322.
Score: 1.1110 | ุฎูุฏูู ุนูุงูู ูู ุฌูุฉ ูู ูุฆุฉ ุฃุทุจุงุก ุถูู ุงูุงูุฑุงุถ ุงูุฏุงุฎููุฉ (ุงูุจุงุทููุฉ) ูููุน ูู ููุทูุฉ ุงููุฑู ููููู ุงูุชูุงุตู ุนุจุฑ ุงูุฑูู 0795880580.
Score: 1.1117 | ููุซ ุนุจูุฏุง

{'ids': [['doc_4686',
   'doc_4566',
   'doc_231',
   'doc_568',
   'doc_2246',
   'doc_1681',
   'doc_4146',
   'doc_1970',
   'doc_471',
   'doc_4142'],
  ['doc_1385',
   'doc_1387',
   'doc_1389',
   'doc_1395',
   'doc_1390',
   'doc_1392',
   'doc_1393',
   'doc_1396',
   'doc_1386',
   'doc_1394']],
 'distances': [[0.9794231057167053,
   1.066909909248352,
   1.105273723602295,
   1.1109833717346191,
   1.1117273569107056,
   1.1210353374481201,
   1.1249890327453613,
   1.1353000402450562,
   1.1382639408111572,
   1.1450440883636475],
  [1.020609974861145,
   1.0285881757736206,
   1.065293550491333,
   1.0667290687561035,
   1.0719636678695679,
   1.0851032733917236,
   1.105173110961914,
   1.1399930715560913,
   1.1540724039077759,
   1.1593173742294312]],
 'metadatas': [[None, None, None, None, None, None, None, None, None, None],
  [None, None, None, None, None, None, None, None, None, None]],
 'embeddings': None,
 'documents': [['ูุฑูุฒ ุงููุฑู ููุนูุงุฌ ุงูุทุจู

## RAG Chain

In [7]:
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

In [8]:
#Test the LLM language support
llm = Ollama(model="command-r7b-arabic")   

print(llm("ูู ุชููู ุงูุนุฑุจููุ" ))

  warn_deprecated(


ูุนูุ ุฃูุง ุฃุณุชุทูุน ููู ุงููุบุฉ ุงูุนุฑุจูุฉ. ุฃูุง ูุตูู ููุณุงุนุฏุฉ ุงููุณุชุฎุฏููู ุจุงููุบุงุช ุงููุฎุชููุฉุ ุจูุง ูู ุฐูู ุงููุบุฉ ุงูุนุฑุจูุฉ. ุฅุฐุง ูุงู ูุฏูู ุฃู ุฃุณุฆูุฉ ุฃู ุงุณุชูุณุงุฑุงุช ุจุงููุบุฉ ุงูุนุฑุจูุฉุ ูููููู ูุณุงุนุฏุชู ูุชูุฏูู ุงูุฅุฌุงุจุงุช ุงูููุงุณุจุฉ.


In [12]:
embedding_function = HuggingFaceEmbeddings(model_name='Snowflake/snowflake-arctic-embed-l-v2.0')
vectordb = Chroma(
    collection_name="medical_network_collection",
    persist_directory="./my_chroma_db",
    embedding_function=embedding_function
)

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k":10 })

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [14]:
query = "ุงููุฎุชุจุฑุงุช ุงููุนุชูุฏุฉ"
response = qa_chain(query)
print("Answer:", response["result"])

  warn_deprecated(


Answer: ููุงู ุงูุนุฏูุฏ ูู ุงููุฎุชุจุฑุงุช ุงูุทุจูุฉ ุงููุนุชูุฏุฉ ูู ูุฎุชูู ุงูููุงุทู ูู ุงูุฃุฑุฏูุ ูุชุถู ูุฐู ุงููุงุฆูุฉ:

1. ุงููุคูุฏุฉ ูููุฎุชุจุฑุงุช ุงูุทุจูุฉ - ููุทูุฉ ูุฑุฌ ุงูุญูุงูุ ุฑูู ุงูุชูุงุตู: 0770041416.
2. ุงุณู ุงููุฎุชุจุฑ - ููุทูุฉ ุงูุนููุงูุ ุฑูู ุงููุงุชู ุบูุฑ ูุชููุฑ.
3. ุงููุฎุชุจุฑุงุช ุงูุทุจูุฉ ุงูุชุฎุตุตูุฉ - ููุทูุฉ ุงูุฒุฑูุงุกุ ุฑูู ุงูุชูุงุตู: 3993446.
4. ุงููุฎุชุจุฑุงุช ุงููุชููุฒุฉ ููุชุญุงููู ุงูุทุจูุฉ - ููุทูุฉ ุฌุจู ุงูุญุณููุ ุฑูู ุงูุชูุงุตู: 5695190.
5. ูุฎุชุจุฑุงุช ุงูุนูููุฉ ุงูุทุจูุฉ - ููุทูุฉ ุงููุฑูุ ุฑูู ุงูุชูุงุตู: 2353901.
6. ูุฎุชุจุฑุงุช ููุณุทูู ููุชุญุงููู ุงูุทุจูุฉ - ููุทูุฉ ุงููุญุฏุงุชุ ุฑูู ุงูุชูุงุตู: 4786906.
7. ูุฎุชุจุฑุงุช ุงูุนูุจุฉ ุงูุทุจูุฉ - ููุทูุฉ ุงูุนูุจุฉุ ุฑูู ุงูุชูุงุตู: 2016396.
8. ุงููุฎุชุจุฑุงุช ุงูุฐููุฉ ู

In [16]:
query = "ุฏูุชูุฑ ุฌูุฏูู ูู ุงููุฑู ู"
response = qa_chain({"query": query})   # or qa_chain(query) depending on API
print(response["result"])               # or response["answer"]


ููุงู ุฑุงุชุจ ุตุจุญู ุงููุงุจูุณู ูู ุฌูุฉ ูู ูุฆุฉ ุฃุทุจุงุก ุถูู ุงูุงูุฑุงุถ ุงูุฌูุฏูุฉ ูุงูุชูุงุณููุฉ ูููุน ูู ููุทูุฉ ุงููุฑู ููููู ุงูุชูุงุตู ุนุจุฑ ุงูุฑูู 0799197232.
