In [51]:
import os

from langchain import hub
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI
from langchain_core.chat_history import BaseChatMessageHistory


In [44]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [17]:


file_paths = ["./data/paper_1.pdf", "./data/paper_2.pdf"]
all_pages = []

for file_path in file_paths:
    loader = PyPDFLoader(file_path)
    pages = loader.load()
    all_pages.extend(pages)

# Now `all_pages` contains all the pages from both PDFs


In [20]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits=text_splitter.split_documents(all_pages)

In [24]:
vectors_storer=Chroma.from_documents(documents=splits,embedding=OpenAIEmbeddings())

In [25]:
retreive=vectors_storer.as_retriever()

In [30]:
query="what is the most famous emotion model currently present ?"

result=vectors_storer.similarity_search(query)

In [34]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

In [38]:
contextualize_q_prompt=ChatPromptTemplate.from_messages(
   [ ("system",contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human","{input}"),
    ]
)

In [45]:
history_aware_retreiver=create_history_aware_retriever(llm,retreive,contextualize_q_prompt)

In [46]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [47]:
qa_prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}"),
    ]
)

In [48]:
stuff=create_stuff_documents_chain(llm,qa_prompt)

In [49]:
rag_chain=create_retrieval_chain(history_aware_retreiver,stuff)

In [52]:
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [56]:
conversational_rag_chain =RunnableWithMessageHistory(

    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [58]:
config={
    "configurable":{"session_id":"abc123"}
}

In [59]:
conversational_rag_chain.invoke(
    {"input": "what is EEG?"},
    config=config,
)["answer"]

Parent run 0841801d-2686-458f-b6e6-02edb6e249c8 not found for run fe2b9e87-f40e-48ce-a413-c52814c24690. Treating as a root run.


'EEG stands for Electroencephalography, which is a standard method used to record human brain activity in the form of electric pulses over time. EEG data is valuable for various clinical applications like quantifying anesthesia levels, diagnosing epilepsy, predicting seizures, and aiding in neurofeedback applications for conditions like autism and neuro-rehabilitation. EEG signals are directly generated by the central nervous system and are closely related to human emotions, making them popular in emotion recognition studies.'

In [60]:
conversational_rag_chain.invoke(
    {"input": "what is the most famous emotion model present and what is the highest accuracy acheived?"},
    config=config,
)["answer"]

Parent run cc7bc212-a1ba-4478-8fe8-da7035b30a5c not found for run 682343b9-ce95-4799-a645-32f0826a0f56. Treating as a root run.


"The most famous emotion model is Russell's Circumplex Model of Affect, which categorizes emotions based on arousal and valence dimensions. In recent studies, the Softmax algorithm has been commonly used for emotion recognition, showing high efficiency and accuracy compared to other classification algorithms. While specific accuracy values may vary across studies, Softmax has consistently demonstrated superior performance in emotion recognition tasks."