In [1]:
import fitz  
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain.docstore.document import Document

# Define the PDF path
pdf_path = "your path.pdf"

# Function to extract text from PDF using PyMuPDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        text += page.get_text()
    return text

# Extract text from the PDF
the_text = extract_text_from_pdf(pdf_path)

# Wrap the extracted text in a Document object
document = Document(page_content=the_text, metadata={"source": pdf_path})

# Split the extracted text into documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents([document])


In [2]:

# Define the embedding model
embedding_model = OllamaEmbeddings(model='nomic-embed-text')

# Create the vector store from the documents
vectorstore = Chroma.from_documents(
    documents=docs,
    collection_name="ollama_embeds",
    embedding=embedding_model,
)

# Create the retriever
retriever = vectorstore.as_retriever()

In [4]:
import os

os.environ['MY_API_KEY'] = 'your_api'

# Access the environment variable later in your code
groq_api_key = os.environ['MY_API_KEY']

In [5]:
from langchain_groq import ChatGroq
llm = ChatGroq(
            groq_api_key=groq_api_key,
            model_name='mixtral-8x7b-32768'
    )


In [7]:
from langchain_core.prompts import ChatPromptTemplate
rag_template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
rag_prompt = ChatPromptTemplate.from_template(rag_template)
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [8]:
import os
import time
import textwrap
import gradio as gr

response=rag_chain.invoke("What is this document about?")
print(textwrap.fill(response,width=80))

Based on the provided document, it appears to be a brochure or document related
to the LVM3-M4/Chandrayaan-3 mission, which is a space mission consisting of
several components:  1. A launch vehicle with solid rocket boosters and stages
2. A propulsion module to carry the lander and rover from injection orbit to a
100 km lunar orbit 3. A lander module to carry a spectro-polarimetry of
Habitable Planetary payload and to descend to the lunar surface 4. A rover to
explore the lunar surface  The document includes specifications for the lander
and rover, such as mass, power, dimensions, communication capabilities, and
payloads. The lander's payloads include a Langmuir Probe to measure the near-
surface plasma density and changes over time, an Alpha Particle X-Ray
Spectrometer to derive the chemical composition and infer mineralogical
composition of the lunar surface, a Laser Induced Breakdown Spectroscope to
determine the elemental composition of lunar soil and rocks, a Spectro-
polarimetry 

In [12]:
def process_question(user_question):
    start_time=time.time()
    
    response=rag_chain.invoke(user_question)
    end_time=time.time()
    response_time=f"Response time: {end_time-start_time: .2f} seconds."
    full_response=f"{response}\n\n{response_time}"
    

    return full_response

iface=gr.Interface(fn=process_question,
                  inputs=gr.Textbox(lines=2,placeholder="Type your question here..."),
                   outputs=gr.Textbox(),
                   title="RAG Chat App",
                   description="Ask any question about your document and get an answer")
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860


OSError: [WinError 225] Operation did not complete successfully because the file contains a virus or potentially unwanted software: 'C:\\Users\\tessa\\anaconda3\\Lib\\site-packages\\gradio\\frpc_windows_amd64_v0.2'