In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import warnings
warnings.filterwarnings("ignore")
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

In [3]:
from langchain.prompts.prompt import PromptTemplate

In [5]:
from langchain_community.document_loaders import PyPDFLoader

def load_pdf_data(pdf_path):
    """
    this function loads text data from pdf file
    """
    loader = PyPDFLoader(file_path=pdf_path)
    documents = loader.load()
    return documents

In [7]:
react_docs = load_pdf_data(pdf_path = "../data/Training_docs/Episode_1.pdf")

In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_documents(documents, chunk_size=800, chunk_overlap=80):
    """
    this function splits documents into chunks of given size and overlap
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    return chunks

In [11]:
react_chunks = split_documents(react_docs)

In [15]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os

def create_embedding_vector_db(chunks, db_name, target_directory=f"../vector_databases"):
    """
    this function uses the open-source embedding model HuggingFaceEmbeddings 
    to create embeddings and store those in a vector database called FAISS, 
    which allows for efficient similarity search
    """
    # instantiate embedding model
    embedding = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2'
    )
    # create the vector store 
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embedding
    )
    # save vector database locally
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)
    vectorstore.save_local(f"{target_directory}/{db_name}_vector_db")

In [39]:
create_embedding_vector_db(chunks=react_chunks, db_name="doc")

In [17]:
def retrieve_from_vector_db(vector_db_path):
    """
    this function splits out a retriever object from a local vector database
    """
    # instantiate embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2'
    )
    react_vectorstore = FAISS.load_local(
        folder_path=vector_db_path,
        embeddings=embeddings,
        allow_dangerous_deserialization=True
    )
    retriever = react_vectorstore.as_retriever()
    return retriever

In [32]:
doc_retriever = retrieve_from_vector_db("../vector_databases/react_vector_db")

In [33]:
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain

In [34]:
from langchain.chains.retrieval import create_retrieval_chain

In [35]:
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain

In [36]:
doc_retrieval_chain = connect_chains(doc_retriever)

In [37]:
def print_output(
    inquiry,
    retrieval_chain=doc_retrieval_chain
):
    result = retrieval_chain.invoke({"input": inquiry})
    print(result['answer'].strip("\n"))

In [38]:
print_output("Give me the summary the text in 3 sentences.")

The text discusses the significance of understanding the causes of diseases and aging, with the speaker arguing that it is a turning point in medical history. The speaker's research has shown that aging is controllable and can be slowed down or reversed, and that this technology will fundamentally change the course of human history. The goal is not just to prevent old age, but to prevent aging throughout one's entire lifespan, and the speaker plans to share practical tips and information on how to achieve this in future episodes.
