In [1]:
!pip install  langchain-groq
!pip install streamlit



In [2]:
# Install LangChain and dependencies
!pip install langchain langchain-community langchain-core langchain-groq

# Install FAISS (for vector storage)
!pip install faiss-cpu

# OpenAI (needed for some parts like token counting, even if not used directly)
!pip install openai

# Streamlit (though not typically used in Colab, included because it's in your code)
!pip install streamlit

# Ollama embeddings dependencies (requires HTTPX, etc.)
!pip install httpx

# If you're using PDFs
!pip install pypdf


Collecting faiss-cpu
  Using cached faiss_cpu-1.11.0-cp310-cp310-win_amd64.whl.metadata (5.0 kB)
Using cached faiss_cpu-1.11.0-cp310-cp310-win_amd64.whl (15.0 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0
Collecting openai
  Downloading openai-1.93.2-py3-none-any.whl.metadata (29 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.10.0-cp310-cp310-win_amd64.whl.metadata (5.3 kB)
Downloading openai-1.93.2-py3-none-any.whl (755 kB)
   ---------------------------------------- 0.0/755.1 kB ? eta -:--:--
   ---------------------------------------- 755.1/755.1 kB 8.0 MB/s eta 0:00:00
Downloading jiter-0.10.0-cp310-cp310-win_amd64.whl (207 kB)
Installing collected packages: jiter, openai

   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ---

In [3]:
import streamlit as st
import os
from langchain_groq import ChatGroq
# from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
# import openai

In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
 ## Load the Groq API KEYS 

os.environ['GROQ_API_KEY']=os.getenv("GROQ_API_KEY")

## Once i got this i will able to use the Groq LLM.


groq_api_key=os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

In [6]:
 ## In Prompt Template we will use the LLM to answer the question.
 
 prompt=ChatPromptTemplate.from_template(
    """
    Answer the questions based on the provided context only.
    Please provide the most accurate respone based on the question
    <context>
    {context}
    <context>
    Question:{input}

    """

)

##### create_stuff_documents_chain
##### create_retrieval_chain

This have some separate meaning for this type of context message 

In [None]:
## SO WE HAVE TO CREATE THE EMBEDDINGS FOR THE QUESTION AND CONTEXT.

## I HAVE TO READ THE PDF FILES FROM THE DIRECTORY AND LOAD THEM.
## tHEN APPLY RECURSIVE CHARACTER TEXT SPLITTER TO SPLIT THE TEXT INTO SMALLER CHUNKS.

## THEN STORE THEM IN VECTOR STORE DATA BASE USING FAISS.



In [15]:
def create_vector_embedding():
    
## we specifically create the vector store db we need to create a session_id to load the pdf files.

    if "vectors" not in st.session_state:
        st.session_state.embeddings = OllamaEmbeddings()
        
        ## We need to load the pdf files from the directory.
        st.session_state.loader = PyPDFDirectoryLoader('PDfs')
        
        ## For load the pdf files we need to use the load method.
        st.session_state.docs = st.session_state.loader.load()
        
        ## We need to split the text into smaller chunks so that we can use them in the vector store.
        st.session_state.text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
        
        ##Final documents after splitting the text into smaller chunks.
        ## We Saved here  the final documents in the session state.
        ## So that we can use them later in the retrieval chain.
        ## Have some memory of the documents.For the retrieval chain.
        
        st.session_state.final_docs = st.session_state.text_splitter.split_documents(st.session_state.docs[:50]) ## 50 is the number of documents we want to use.
        
        ## We need to create the vector store using the final documents and embeddings.
        st.session_state.vectors=FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)

In [16]:
##st.tittle is used to display the title of the app.

st.title("RAG Document Q&A With Groq And Lama3")

## user_prompt is used to take the input from the user.
user_prompt=st.text_input("Enter your query from the research paper")


## st.button is used to create a button in the app.
if st.button("Document Embedding"):
    
    ## create_vector_embedding() ## This function will create the vector store database.
    create_vector_embedding()
    
    ##st,write is used to display the message in the app.
    st.write("Vector Database is ready")

2025-07-08 22:54:44.034 
  command:

    streamlit run c:\Users\ASUS\OneDrive\Desktop\sid\Sidd_project\venv\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-07-08 22:54:44.043 Session state does not function when running a script without `streamlit run`


## What actuclly happend all the list of documents will specifically go and whatever specific input  we are giving it will go over Question:{input} here ! 


In [None]:
# import time

In [21]:
import time

if user_prompt:
    
    ## Create_stuff_documents_chain for Create a chain for passing a list of doc to a model.
    document_chain=create_stuff_documents_chain(llm,prompt)
    
    
    retriever=st.session_state.vectors.as_retriever()
    retrieval_chain=create_retrieval_chain(retriever,document_chain)
    
    start=time.process_time()
    response=retrieval_chain.invoke({'input':user_prompt})
    print(f"Response time :{time.process_time()-start}")

    st.write(response['answer'])

    ## With a streamlit expander
    with st.expander("Document similarity Search"):
        for i,doc in enumerate(response['context']):
            st.write(doc.page_content)
            st.write('------------------------')

In [25]:
pip install streamlit-jupyter

Collecting streamlit-jupyter
  Downloading streamlit_jupyter-0.2.1-py3-none-any.whl.metadata (8.1 kB)
Collecting fastcore (from streamlit-jupyter)
  Downloading fastcore-1.8.5-py3-none-any.whl.metadata (3.7 kB)
Collecting ipywidgets==7.7.2 (from streamlit-jupyter)
  Downloading ipywidgets-7.7.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting jupyter (from streamlit-jupyter)
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting stqdm (from streamlit-jupyter)
  Downloading stqdm-0.0.5-py3-none-any.whl.metadata (3.0 kB)
Collecting tabulate (from streamlit-jupyter)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting ipython-genutils~=0.2.0 (from ipywidgets==7.7.2->streamlit-jupyter)
  Downloading ipython_genutils-0.2.0-py2.py3-none-any.whl.metadata (755 bytes)
Collecting widgetsnbextension~=3.6.0 (from ipywidgets==7.7.2->streamlit-jupyter)
  Downloading widgetsnbextension-3.6.10-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting jupyterlab-wid

In [27]:
pip install streamlit streamlit-jupyter

Note: you may need to restart the kernel to use updated packages.


In [7]:
## Load the Image from a file. 

from PIL import Image

img = Image.open('Result\Screenshot (130).png')
img.show()


In [8]:
img.show()