In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import google.generativeai as genai
import pandas as pd
from sklearn.metrics.pairwise import cosine_distances

In [2]:
loader = PyPDFLoader("nust.pdf")
pages = loader.load_and_split()
text = "\n".join([doc.page_content for doc in pages])

In [81]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)
docs = text_splitter.create_documents([text])
for i, d in enumerate(docs):
    d.metadata = {"doc_id": i}

In [None]:
genai.configure(api_key="AIzaSyC4K40iwfMXj9d-rUxXbX5h7xI8m0IPo98")

In [86]:
def get_embeddings(text):
    model = 'models/text-embedding-004'
    embedding = genai.embed_content(model=model,
                                    content=text,
                                    task_type="retrieval_document")
    return embedding['embedding']

In [87]:
content_list = [doc.page_content for doc in docs]

In [88]:
embeddings = [get_embeddings(content) for content in content_list]

In [89]:
df = pd.DataFrame({
    'page_content': content_list,
    'embeddings': embeddings
})

In [90]:
def get_relevant_docs(user_query):
    query_embeddings = get_embeddings(user_query) 
    def calculate_distance(embedding):
        return cosine_distances([query_embeddings], [embedding])[0][0]   
    df['dist'] = df['embeddings'].apply(calculate_distance)    
    top_docs = df.nsmallest(7, 'dist')    
    relevant_docs = top_docs['page_content'].tolist()
    return relevant_docs

In [118]:
def make_rag_prompt(query, relevant_passage):
    relevant_passage = ' '.join(relevant_passage)
    prompt = (
        # f"Respond in a complete sentence and make sure that your response is easy to understand."
        f"Just give me the answer donot state that you are quoting it from the provided text"
        f"If the data has no relevant information then just say you dont know the answer. If is a general question then answer according to your intuition\n\n"
        f"QUESTION: '{query}'\n"
        f"relevant data: '{relevant_passage}'\n\n"
        f"ANSWER:"
    )
    return prompt

In [108]:
def generate_response(user_prompt):
    model = genai.GenerativeModel('gemini-1.5-pro')
    answer = model.generate_content(user_prompt)
    return answer.text

In [154]:
def generate_answer(query):
    relevant_text = get_relevant_docs(query)
    prompt = make_rag_prompt(query, relevant_passage=relevant_text)
    answer = generate_response(prompt)
    return answer

In [110]:
import tkinter as tk
from tkinter import scrolledtext

In [None]:
def create_app():
    root = tk.Tk()
    root.title("Query Interface")

    tk.Label(root, text="Enter your query:").pack(pady=5)
    prompt_box = tk.Entry(root, width=50)
    prompt_box.pack(pady=5)

    tk.Label(root, text="Response:").pack(pady=5)
    response_box = scrolledtext.ScrolledText(root, width=50, height=10, wrap=tk.WORD)
    response_box.pack(pady=5)

    def on_send():
        query = prompt_box.get()
        response = generate_answer(query)
        response_box.delete(1.0, tk.END)
        response_box.insert(tk.END, response)

    send_button = tk.Button(root, text="Send", command=on_send)
    send_button.pack(pady=10)

    root.mainloop()

In [119]:
create_app()