In [44]:
from langchain.document_loaders.pdf import PyPDFDirectoryLoader

def load_documents():
    document_loader = PyPDFDirectoryLoader('./pdf_data')
    return document_loader.load()

In [45]:
documents = load_documents()
print(documents[0])

page_content='6:
Energy Efficiency 
Program Best Practices 
Energy efficiency programs have been operating successfully in some parts of the country since the late 
1980s. Fr om the exper ience of these successful pr ograms, a number of best practice strategies have 
evolved for making energy efficiency a resource, developing a cost-effective portfolio of energy efficiency pro­
grams for all customer classes, designing and delivering energy efficiency programs that optimize budgets, 
and ensuring that programs deliver results. 
Overview
Cost-effective energy ef ficiency pr ograms have been 
delivered by large and small utilities and thir d-party pro­
gram administrators in some parts of the country since 
the late 1980s. The rationale for utility investment in effi­
ciency programming is that within certain existing mar­
kets for energy-efficient products and services, there are 
barriers that can be over come to ensure that customers 
from all sectors of the economy choose mor e energ

In [46]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.document import Document

def split_documents(documents : list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, 
            chunk_overlap=300,
            length_function=len,
            is_separator_regex=False,
            )
    return text_splitter.split_documents(documents)



In [47]:
chunks = split_documents(documents)
chunks

[Document(metadata={'source': 'pdf_data/three.pdf', 'page': 0}, page_content='6:\nEnergy Efficiency \nProgram Best Practices \nEnergy efficiency programs have been operating successfully in some parts of the country since the late \n1980s. Fr om the exper ience of these successful pr ograms, a number of best practice strategies have \nevolved for making energy efficiency a resource, developing a cost-effective portfolio of energy efficiency pro\xad\ngrams for all customer classes, designing and delivering energy efficiency programs that optimize budgets, \nand ensuring that programs deliver results. \nOverview\nCost-effective energy ef ficiency pr ograms have been \ndelivered by large and small utilities and thir d-party pro\xad\ngram administrators in some parts of the country since \nthe late 1980s. The rationale for utility investment in effi\xad\nciency programming is that within certain existing mar\xad\nkets for energy-efficient products and services, there are \nbarriers that ca

In [48]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
def get_embedding_function():
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    return embeddings 


In [51]:
from langchain.vectorstores.chroma import Chroma

def add_to_chroma(chunks: list[Document]):
    unique_ids = []
    prev = None
    index = 0
    for chunk in chunks:
        source = chunk.metadata.get("source")
        page = chunk.metadata.get("page")
        curr_id = f"{source}:{page}"
        
        if curr_id == prev:
            index += 1
        else:
            index = 0
        unique_id = f"{curr_id}:{index}"
        unique_ids.append(unique_id)
        prev = curr_id

    db = Chroma(
        persist_directory="./chroma_db", 
        embedding_function=get_embedding_function()
    )
    
    db.add_documents(chunks, ids=unique_ids)
    db.persist()
    print(f"Added {len(chunks)} documents to Chroma and persisted to {"./chroma_db"}")




In [52]:
add_to_chroma(chunks)

Added 640 documents to Chroma and persisted to ./chroma_db


  db.persist()


In [65]:
from llama_index.llms.ollama import Ollama
def query_rag(query: str):
    db = Chroma(
        persist_directory="./chroma_db", 
        embedding_function=get_embedding_function()
    )
    results = db.similarity_search_with_score(query, k=10)
    context = "\n".join([result[0].page_content for result in results])
    return context

llm=Ollama(model="hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:latest", request_timeout=120.0)

PROMPT_TEMPLATE = """
Answer the question using the following context to help you.

Context:
{context}

Question: {question}

Let's think about this step by step:
1. First, analyze the relevant information from the context
2. Consider the specific requirements
3. Form a detailed recommendation

Now provide your final answer:
"""

context = query_rag("What is the purpose of the building energy simulator?")
response = llm.complete(PROMPT_TEMPLATE.format(context=context, question="My team noticed that the power usage of the building is higher than expected. What should we do to reduce it and we believe the HVAC system is the problem. What should we do?"))
print(response)

Based on the provided context, here's a step-by-step analysis and recommendation:

1. **Analyze the relevant information**: The context mentions that the building's power usage is higher than expected, and the team suspects that the HVAC system is the problem.
2. **Consider the specific requirements**: The text does not explicitly mention specific requirements for reducing power usage in relation to HVAC systems. However, it does provide guidance on energy efficiency measures (EEMs) for various building types, including those related to ventilation, load management, and plug loads.
3. **Form a detailed recommendation**:
	* **Conduct an energy audit**: Perform a thorough analysis of the building's energy usage patterns, focusing on the HVAC system. Identify areas of inefficiency and opportunities for improvement.
	* **Evaluate the current HVAC system**: Assess the existing HVAC system's performance, including its energy consumption, efficiency, and any existing energy-saving measures (e