In [None]:
import warnings 
warnings.filterwarnings('ignore')
from dotenv import load_dotenv
import os

In [None]:
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') 

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain

from PyPDF2 import PdfReader

# import google.generativeai as genai

In [None]:
# TODO:
# 1. store doc embeddings [DONE]
# 2. check google embeddings on larger corpus
# 3. github instructions
# 4. py script

In [None]:
def get_embedds(file_path, filename):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


    if os.path.exists('doc_embeddings'):
        print('searching for doc embedding')

    else:
        os.mkdir('doc_embeddings')

    if not os.path.exists('doc_embeddings/' + filename):
        print('not found in vectorstore, creating and loading....')
        reader = PdfReader(file_path)
        corpus = ''.join([p.extract_text() for p in reader.pages if p.extract_text()])

        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = splitter.split_text(corpus)

        vectors = FAISS.from_texts(chunks, embeddings)
        vectors.save_local(f'doc_embeddings/{filename}')

    else:
        print('loading from vectorstore')
        vectors = FAISS.load_local(f'doc_embeddings/{filename}', embeddings=embeddings, allow_dangerous_deserialization=True)

    return vectors


In [None]:
files = os.listdir('./sample_pdfs/')
file_name = files[0].split('.')[0]
file = './sample_pdfs/' + file_name + '.pdf'
print('filename: ', file_name, str(file))

In [None]:
vectors = get_embedds(file, file_name)

In [None]:
# with open('dd.txt', 'r', encoding='utf-8') as f:
#     corpus = f.readlines()

# ###### CHANGE THIS ######
# corpus2 = corpus[:100]
# corpus2 = ' '.join(corpus)
# len(corpus2)

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectors.as_retriever(), return_source_documents=True)

In [None]:
# query = 'tell me about unsupervised Anomaly Detection'
query = 'explain Figure 1'

In [None]:
chat_history = []
result = qa({"question": query, "chat_history": chat_history})
chat_history.append((query, result["answer"]))

In [None]:
print(chat_history[-1][1])

In [None]:
import gradio as gr

chat_history = []
def echo(message, history):
    result = qa({"question": message['text'], "chat_history": chat_history})
    chat_history.append((message['text'], result["answer"]))
    return result['answer']


demo = gr.ChatInterface(
    fn=echo,
    examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
    title="Echo Bot",
    multimodal=True,
)
demo.launch()

In [None]:
demo.close()

In [None]:
from pathlib import Path
import gradio as gr
import time 

llm_chat_history = []

with gr.Blocks() as demo:

    # Chat window
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        result = qa({"question": message, "chat_history": llm_chat_history})
        llm_chat_history.append((message, result["answer"]))
        chat_history.append((message, result["answer"]))
        # return result['answer']
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    demo.launch()

In [119]:
demo.close()

Closing server running on port: 7879


In [114]:
import os    

def upload_file(file):    
    UPLOAD_FOLDER = "./data/"    
    if not os.path.exists(UPLOAD_FOLDER):    
        os.mkdir(UPLOAD_FOLDER)    
    if file.endswith('.pdf'):
        print(file)
        if not os.path.exists(UPLOAD_FOLDER + file):
            shutil.copy(file, UPLOAD_FOLDER)    
            gr.Info("File Uploaded!!!")    
        else:
            print('already there')
    else: 
        print('error')

with gr.Blocks() as demo:    
    gr.Markdown(    
        """    
        # Upload a PDF here:
        """)    
    upload_button = gr.UploadButton("Click to Upload a File")    
    upload_button.upload(upload_file, upload_button)

    
    # Chat window
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        result = qa({"question": message, "chat_history": llm_chat_history})
        llm_chat_history.append((message, result["answer"]))
        chat_history.append((message, result["answer"]))
        # return result['answer']
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

In [117]:
demo.launch()

Running on local URL:  http://127.0.0.1:7879

To create a public link, set `share=True` in `launch()`.




C:\Users\Hanish\AppData\Local\Temp\gradio\d9cb449af41b6da76c98475ff268b3ef06d6dec1\A Deep Neural Network for Unsupervised Anomaly Detection and Diagnosis in Multivariate Time Series Data.pdf
