In [1]:
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
from utils import *
from langchain.schema import Document
#according to docs, Document is a class for storing a piece of text and associated metadata

load_dotenv()

embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

# all_texts = load_pdfs_from_folder("data/insurance")
# all_text_chunks = chunk_texts(all_texts)

documents = list()
for file in os.listdir("data/insurance"):
    all_texts = load_text_from_pdf("data/insurance/" + file)
    all_text_chunks = chunk_texts(all_texts)
    documents.extend([
        Document(page_content=chunk, metadata={"source": file, "content": "insurance"}) 
        for chunk in all_text_chunks
    ])


db = FAISS.from_documents(documents, embeddings)
db.save_local("faiss_index") 


  embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))


In [6]:
from langchain_openai import ChatOpenAI
import json


with open("news_sources.json") as f:
    metadata_lookup = json.load(f)

# embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name="gpt-4o")

db = FAISS.load_local(
    "faiss_index", 
    embeddings, 
    allow_dangerous_deserialization = True
    )

def ask(query):
    docs = db.similarity_search(query, k=3)

    context = "\n\n".join(doc.page_content for doc in docs)
    
    prompt = f"""Answer the question based only on the context below.

Context:
{context}

Question: {query}"""

    response = llm.invoke(prompt).content

    source_list = set()
    for doc in docs:
        src = doc.metadata["source"]
        source_list.add(metadata_lookup.get(src))
    
    source_list = list(source_list)
    source_str = ", ".join(source_list)
    
    response += f"\n\nsources: {source_str}"

    return response

if __name__ == "__main__":
    q = "what is insurance and how does it work?"
    print("\nAnswer:", ask(q))
    q = "What are the main types of insurance?"
    print("\nAnswer:", ask(q))
    q= "How do insurance companies make money"
    print("\nAnswer:", ask(q))



Answer: Insurance is a contract represented by a policy where a policyholder receives financial protection or reimbursement against losses from an insurance company. The company pools clients' risks to make payments more manageable. There are many types of insurance policies, with common ones being auto, health, homeowners, and life insurance. The core components of most insurance policies include the premium (the amount paid for the policy), the deductible (the amount paid out of pocket by the policyholder before the insurance kicks in), and policy limits (the maximum amount the insurance will pay). Most individuals in the United States have at least one type of personal insurance, and car insurance is required by state law.

sources: https://www.investopedia.com/terms/i/insurance.asp

Answer: The main types of insurance are auto, health, homeowners, and life insurance.

sources: https://www.investopedia.com/terms/i/insurance.asp, https://www.investopedia.com/ask/answers/051915/how-d

In [7]:
llm.config.hidden_size

AttributeError: 'ChatOpenAI' object has no attribute 'config'