### Install library ###

In [None]:
%pip install ollama chromadb langchain_community pypdf fastapi uvicorn

### Import ###

In [3]:
import ollama
import chromadb
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import Ollama

### Initial Ollama model ###

In [17]:
llm = Ollama(model="llama3.2")  # Or other model

### Read PDF ###

In [18]:
folder_path = "data"  # Change to your folder path
text = ""
doc = []
for filename in os.listdir(folder_path):
    if filename.lower().endswith(".pdf"):
        pdf_path = os.path.join(folder_path, filename)
        print("reading ",pdf_path)
        reader = PyPDFLoader(pdf_path)
    
        pages = reader.load()
        # doc.extend(reader.load())
        for page in pages:
            text += page.page_content + "\n"
            # print(f"{doc[0].page_content}")
            # print(f"text:",text)



reading  data/iot-leaflet-v10-en.pdf


### Split into smaller chunks ###

In [19]:
# 2. Split into smaller chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_text(text)


### Create Ollama embedding model ###

In [4]:
embeddings = OllamaEmbeddings(model="mxbai-embed-large")  # Or other Ollama embedding model

  embeddings = OllamaEmbeddings(model="mxbai-embed-large")  # Or other Ollama embedding model


### Initialize ChromaDB ###

In [5]:
client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="pdf_docs")

### Store in ChromaDB ###

In [None]:

vectors = [embeddings.embed_query(chunk) for chunk in docs]
ids = [str(i) for i in range(len(docs))]
metadatas = [
    {"title": "iot-leaflet-v10-en", "type": "pdf", "chunk_index": i}
    for i in range(len(docs))
]

collection.add(
    embeddings=vectors,
    documents=docs,
    metadatas=metadatas,
    ids=ids,
)

In [59]:
#search in chromadb
question = "What is vh-001?"
q_vector = embeddings.embed_query(question)
print(q_vector)
results = collection.query(
    query_embeddings=[q_vector],
    n_results=3
)
print(results)



[0.06161545589566231, 0.27872204780578613, -0.5239893198013306, 0.8287005424499512, -0.7302275896072388, -0.3845903277397156, 0.06885894387960434, 0.2874869406223297, 0.199640154838562, 0.003152400255203247, 0.02832014113664627, 0.788447916507721, -0.11683638393878937, 0.28650185465812683, -0.9586089253425598, 0.42641186714172363, 0.3532472848892212, -0.13316896557807922, -1.0392723083496094, 0.28765052556991577, -0.6271764039993286, -0.026553090661764145, -1.0619559288024902, -0.6783238649368286, -0.25833630561828613, 0.3143206238746643, -0.4371732175350189, -0.6914133429527283, 0.7145335674285889, 0.9485552310943604, 0.1778549998998642, -0.184513658285141, 0.339548796415329, -0.3821694850921631, 0.19049949944019318, -0.44373393058776855, 0.5176495313644409, -0.7122499942779541, -0.5138477683067322, -0.19765037298202515, -0.09576530009508133, 0.8037701845169067, 0.7702277898788452, -1.109739899635315, -0.6688625812530518, 0.3161863088607788, 0.16511079668998718, -0.3203842043876648, -

### Log collect database ###

In [51]:
# List all collections
collections = client.list_collections()

# Print collection names
for col in collections:
    print(col.name)

pdf_docs


### Delete Collection database ###

In [42]:
client.delete_collection(name="pdf_docs")

In [6]:
all_data = collection.get()

print(all_data)




### Query data from database ###

In [None]:


# 1. User question
question = "what is vh-002?"

# 2. Embed question
q_vector = embeddings.embed_query(question)

# 3. Search in ChromaDB
results = collection.query(
    query_embeddings=[q_vector],
    n_results=5
)

# print("search in chromaDB:",results)

# 4. Combine retrieved chunks
context = results
print(context)


{'ids': [['13', '10', '18']], 'embeddings': None, 'documents': [[None, None, None]], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None, None]], 'distances': [[175.74383544921875, 182.60958862304688, 182.89749145507812]]}


In [None]:

prompt = f"You are aliencheckbot and a question-answering assistant. Answer briefly,short and accurately.using the following context:\n\n{context}\n\nQuestion: {question}.If you don't know the answer, say 'I don't know.'"
answer = llm(prompt)

print("Answer:", answer)

In [42]:
# an example input
input = "who are we at VP.Start"
text = """1. Who are we at VP. Start?
In 2014, a young electrical engineer had some questions. 'What if we had a reliable energy grid in Cambodia?
What if we could solve issues with the grid remotely and immediately? What if we could use our existing
infrastructure, upgrade it affordably?
These questions gave birth to an innovative idea, the SIMA Journey. The engineer gathered a few friends
in a small rented space and they started chasing their dreams, working late nights and weekends and often
packing everything in their car to go to some remote rural locations to test out their ideas. These were the first
steps on the SIMA Journey."""

# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3.2",
  prompt=f"Using this data: {text}. Respond to this prompt: {input}"
)

print(output['response'])

Based on the provided data, it appears that at VP.Start, we are a group of innovators and engineers who came together with a shared vision to create a reliable energy grid in Cambodia. Our mission is to address issues with the existing grid, solve problems remotely, and upgrade our infrastructure in an affordable manner.

From what I understand, our team was founded by a young electrical engineer in 2014, who was driven by questions about how we could improve the energy infrastructure in Cambodia. Since then, we've grown into a dedicated group of like-minded individuals who are passionate about using technology to make a positive impact on our community.

We're currently based in a rented space and have been working tirelessly to bring our vision to life, often putting in long hours and traveling to remote rural locations to test out our ideas. Our journey, dubbed the "SIMA Journey", has just begun, and we're excited to see where it takes us!
