In [44]:
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain, create_history_aware_retriever, create_retrieval_chain, LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage, AIMessage
from langchain_groq.chat_models import ChatGroq

os.environ["OPENAI_API_KEY"] = ""
os.environ["GROQ_API_KEY"] = ""

In [25]:
def get_pdf_text(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as pdf_file:
        pdf_reader = PdfReader(pdf_file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        separator='\n',
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

In [50]:
llm = ChatGroq()
pdf_text = get_pdf_text('COMBUSTION_2023.pdf')
pdf_chunk = get_text_chunks(pdf_text)
vectorstore = get_vectorstore(pdf_chunk)
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",
    search_kwargs={'k':2, 'score_threshold': 0.6})

In [51]:
memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)

conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
    )

In [53]:
x = conversation_chain.invoke("what is combustion?")

In [27]:
contextualization_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualization_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [28]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [29]:
chat_history = []
memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)

for i in range(2):
        
        question = input("Enter your question")
        response = rag_chain.invoke({"input": question, "chat_history":chat_history})
        # chunks = [chunk for chunk in rag_chain.stream({"input": question, "chat_history":chat_history})]
        chat_history.extend([HumanMessage(content=question), AIMessage(content =response["answer"])])
        print(f"Human: {question}\n")
        print(f"AI Response: {response['answer']}\n")

[]


In [19]:
from langchain_core.runnables import RunnablePassthrough

In [12]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

retrieval_chain = (
    {
        "context": retriever.with_config(run_name="Docs"),
        "question": RunnablePassthrough(),
    }
    | prompt
    | model
    | StrOutputParser()
)

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x78f4eaeb5b00> >