In [None]:
# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Standard library imports
import os
import glob

# Third-party imports
import gradio as gr
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# Constants
MODEL = "gpt-4o-mini"
DB_NAME = "vector_db"

In [None]:
# Load environment variables and ensure OpenAI API key is set
from dotenv import load_dotenv
import os

load_dotenv(override=True)

# Ensure the OpenAI API key is loaded into the environment
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise EnvironmentError("OPENAI_API_KEY not found in environment variables.")

os.environ["OPENAI_API_KEY"] = openai_api_key

In [133]:
folders = glob.glob('../knowledge-base-Amir/*')

In [None]:

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={'encoding': 'utf-8'} )
    folder_docs= loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)


Error loading file ../knowledge-base-Amir/publications/s11257-024-09418-w.pdf


ModuleNotFoundError: No module named 'pdfminer'

In [109]:
documents

[Document(metadata={'source': '../knowledge-base-Amir/future_plan/future.md', 'doc_type': 'future_plan'}, page_content='## 🎯 Ultimate Life Goals (Age 40)\n\n**Deadline:** By Age 40\n\n“Designing a life of freedom, impact, and legacy—where family, wealth, and purpose align.”\n\n✅ **Mission:** Master AI, Build Wealth, Create a Legacy.\n\n---\n\n## 🏆 Dream Life Snapshot:\n\n- 🏡 Living in a luxurious mansion\n- 🚘 Driving a Rolls-Royce\n- 👨\u200d👩\u200d👧\u200d👦 With wife & kids\n- 🤖💰 Running AI businesses while giving back to the world. 🌍\n\n---\n\n## 🔥 🏆 The Big Goals\n\n- 💰 **$100 Million Net Worth** – Building AI & Automation Wealth 🚀\n- 💸 **$2 Million Monthly Passive Income** – Investing, Business & AI 💡\n- ❤️ **Find & Build a Loving Family** – Wife + 4 kids 👩\u200d❤️\u200d👨👧👦👧👦\n- 🏠 **Buy Dream Mansion & Luxury Cars** for Parents and Myself 🎁\n- 🛂 **Secure a U.S. Green Card** 🇺🇸\n- 🇨🇦 **Canadian Citizenship** – Permanent Global Freedom 🌎\n- 🏛️ **PhD at a Top 50 University** – AI & Rese

In [110]:
text_splitter = CharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [None]:
print(chunks[0])

IndexError: list index out of range

In [113]:
len(chunks)

24

In [114]:
embeddings = OpenAIEmbeddings()
vecotrestore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)


In [115]:
vecotrestore._collection.count()

270

In [116]:
len(vecotrestore._collection.get(limit=4,include=['embeddings'])['embeddings'][3])

1536

In [117]:
llm = ChatOpenAI(temperature=0.7)

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vecotrestore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [120]:
result = conversation_chain.invoke({'question':'give me a some bulletpoints about trideeprec'})

In [121]:
result['answer']

'I\'m sorry, but there is no information provided about "Trideeprec" in the context given. If you have more context or details, I\'d be happy to help with that.'

In [99]:
conversation_chain.invoke({'question':'who is avery'})['answer']

'Avery Lancaster is the Co-Founder & Chief Executive Officer (CEO) of Insurellm, a leading Insurance Tech provider based in San Francisco, California. Avery has a background as a Senior Product Manager at Innovate Insurance Solutions before co-founding Insurellm in 2015. Throughout her career, Avery has demonstrated resilience, adaptability, and innovative leadership strategies that have positioned Insurellm as a key player in the insurance technology landscape.'

In [100]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [102]:
view = gr.ChatInterface(chat,type='messages').launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
