In [1]:
import warnings 
warnings.filterwarnings('ignore')
from dotenv import load_dotenv
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain

from PyPDF2 import PdfReader

# import google.generativeai as genai

In [2]:
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') 

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY

In [3]:
def get_embedds(file_path, filename):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


    if os.path.exists('doc_embeddings'):
        print('searching for doc embedding')

    else:
        os.mkdir('doc_embeddings')

    if not os.path.exists('doc_embeddings/' + filename):
        print('not found in vectorstore, creating and loading....')
        reader = PdfReader(file_path)
        corpus = ''.join([p.extract_text() for p in reader.pages if p.extract_text()])

        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = splitter.split_text(corpus)

        vectors = FAISS.from_texts(chunks, embeddings)
        vectors.save_local(f'doc_embeddings/{filename}')

    else:
        print('loading from vectorstore')
        vectors = FAISS.load_local(f'doc_embeddings/{filename}', embeddings=embeddings, allow_dangerous_deserialization=True)

    return vectors


In [None]:
vectors = get_embedds(file, file_name)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectors.as_retriever(), return_source_documents=True)

##
query = 'alamalc'
chat_history = []
result = qa({"question": query, "chat_history": chat_history})
chat_history.append((query, result["answer"]))

In [None]:
def upload_file(file):    
    UPLOAD_FOLDER = "./data/"    
    if not os.path.exists(UPLOAD_FOLDER):    
        os.mkdir(UPLOAD_FOLDER)    
    if file.endswith('.pdf'):
        print(file)
        if not os.path.exists(UPLOAD_FOLDER + file):
            shutil.copy(file, UPLOAD_FOLDER)    
            gr.Info("File Uploaded!!!")    
        else:
            print('already there')
    else: 
        print('error')

    

with gr.Blocks() as demo:    
    gr.Markdown(    
        """    
        # Upload a PDF here:
        """)    
    upload_button = gr.UploadButton("Click to Upload a File")    
    upload_button.upload(upload_file, upload_button)

    
    # Chat window
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        result = qa({"question": message, "chat_history": llm_chat_history})
        llm_chat_history.append((message, result["answer"]))
        chat_history.append((message, result["answer"]))
        # return result['answer']
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])