In [14]:
import streamlit as stlit
from PyPDF2 import PdfReader 
from langchain.text_splitter import RecursiveCharacterTextSplitter as recur_split
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings as google_embeddings
import google.generativeai as google_gen
from langchain.vectorstores import FAISS as fais
from langchain_google_genai import ChatGoogleGenerativeAI as llm_model
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv as ld_env
from langchain.tools.base import StructuredTool
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory as conbuff
from langchain.retrievers import ContextualCompressionRetriever as concom
from langchain.retrievers.document_compressors import LLMChainExtractor as conllm
from langchain.chains import RetrievalQA as rtrqa


ld_env()
os.getenv("GOOGLE_API_KEY")
google_gen.configure(api_key=os.getenv("GOOGLE_API_KEY"))



In [15]:
def extract_text(docs):
    extracted_text = ""
    for doc in docs:
        doc_parser = PdfReader(doc)
        doc_len = len(doc_parser.pages)
        for p in range(doc_len):
            p1 = doc_parser.pages[p]
            extracted_text += p1.extract_text()
    return extracted_text

def generate_vectors(extracted_text):
    chunks_split = recur_split(chunk_size=5000, chunk_overlap=1000)
    chunks = chunks_split.split_text(extracted_text)
    embeddings = google_embeddings(model = "models/gemini-1.5-pro-latest")
    fais_vector_space = fais.from_texts(chunks, embedding=embeddings)
    fais_vector_space.save_local("faiss_gemini_embeds")



In [16]:
def user_input(user_prompt):
    prt_temp = """
    respond to the prompts based on the documents, if the right answer can be found give the relevant source document
    {context} {prompt_question}
    """
    model = llm_model(model="gemini-1.5-pro-latest",
                                temperature=0.6)
    embeddings = google_embeddings(model = "models/gemini-1.5-pro-latest")
    memory = conbuff(memory_key="chat_history", return_messages=True)

    fais_db =fais.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True)
    fais_retriever = fais_db.as_retriever(score_threshold = 0.7)
    compress_prompt = conllm.from_llm(model)
    comp_rtr = concom(base_compressor=compress_prompt, base_retriever=fais_retriever)

    compressed_relavent_docs = comp_rtr.get_relevant_documents(query=user_prompt)

    intial_prompt= PromptTemplate(
        template=prt_temp, input_variables=["context", "question"]
    )
    chain_prompt_temp = {"prompt_temp": intial_prompt}
    


    chain = rtrqa.from_chain_type(llm=model,
                                chain_type="map_reduce",
                                retriever=comp_rtr,
                                input_key="query",
                                memory=memory,
                                chain_type_kwargs=chain_prompt_temp)

    stlit.write("Reply: ", chain(user_prompt)) 

In [19]:
def main():
    stlit.set_page_config("customized Gemini pro ")
    stlit.header("Ask your questions after uploading the pdf file")

    query = stlit.text_input("give prompts relevant to the pdf")

    if query:
        user_input(query)

    with stlit.sidebar:
        stlit.title("documents uploading")
        user_docs = stlit.file_uploader("please upload the necessary documents you are interested in")
        if stlit.button("generate vectors"):
            with stlit.spinner("creating embeddings "):
                processed_text=extract_text(user_docs )
                generate_vectors( processed_text)
                stlit.success("you can now ask the questions or upload another file")


In [None]:
if __name__ == "__main__":
    main()

In [18]:
!streamlit run c:\Users\jashi\anaconda3\lib\site-packages\ipykernel_launcher.py

^C
