In [1]:
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

In [2]:
!pip install langchain langchain-huggingface sentence-transformers faiss-cpu transformers torch 



In [3]:
!pip install langchain_community




In [4]:
from langchain_community.embeddings.ollama import OllamaEmbeddings



In [5]:
import os
import sys
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore

In [6]:
!pip install pypdf




In [10]:
# Load the PDF document
pdf_path = r"iphone_manual.pdf"# Update this path
loader = PyPDFLoader(pdf_path)
documents = loader.load()


In [11]:
# Create a docstore
docstore = InMemoryDocstore({doc.metadata["source"]: doc for doc in documents})

In [12]:
# # Extract text and split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

In [13]:
print(f"Number of chunks: {len(chunks)}")
print(f"First chunk: {chunks[0].page_content[:100]}...")

Number of chunks: 538
First chunk: Before using iPhone, review the iPhone User Guide  at  
support.apple.com/en-gb/guide/iphone . You c...


In [14]:
# Model Availability Check
try:
    llm = Ollama(model="gemma2:2b")
    test_input = "What is the capital of Haryana?"
    test_output = llm.predict(test_input)
    print(f"Model test prediction: {test_output}")
except Exception as e:
    print(f"Error in loading or using the model: {e}")

  warn_deprecated(


Model test prediction: The capital of Haryana is **Chandigarh**. 



In [15]:
# Vector Store and Embeddings Check
try:
    embed_model = OllamaEmbeddings(model="gemma2:2b") 
    vectorstore = FAISS.from_documents(chunks, embed_model)
    retriever = vectorstore.as_retriever()
    
    # Test retrieval with a sample query
    query = "What is the document about?"
    results = retriever.get_relevant_documents(query)  # Correct method for retrieval
    print(f"Vector store retrieval result: {results}")
except Exception as e:
    print(f"Error in creating or using vector store: {e}")

  warn_deprecated(


Vector store retrieval result: [Document(metadata={'source': 'C:\\Users\\Mugdhi Saxena\\Documents\\Mugdhi S\\cognizant\\iphone_manual.pdf', 'page': 44}, page_content='Before using iPhone, review the iPhone User Guide  at support.apple.com/\nguide/iphone . You can also download the guide from Apple Books (where \navailable). Retain documentation for future reference.'), Document(metadata={'source': 'C:\\Users\\Mugdhi Saxena\\Documents\\Mugdhi S\\cognizant\\iphone_manual.pdf', 'page': 8}, page_content='“Important safety information” in the iPhone User Guide.'), Document(metadata={'source': 'C:\\Users\\Mugdhi Saxena\\Documents\\Mugdhi S\\cognizant\\iphone_manual.pdf', 'page': 20}, page_content='“Important safety information” in the iPhone User Guide.'), Document(metadata={'source': 'C:\\Users\\Mugdhi Saxena\\Documents\\Mugdhi S\\cognizant\\iphone_manual.pdf', 'page': 52}, page_content='“Important safety information” in the iPhone User Guide.')]


In [16]:
# QA Chain Check
try:
    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template="Context: {context}\nQuestion: {question}\nAnswer:"
    )
    
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt_template}
    )
    
    test_context = "This is a test context to verify QA chain functionality."
    test_question = "What is this context about?"
    test_answer = qa_chain.run({"query": f"{test_context} {test_question}"})  # Combine context and question into one query
    print(f"QA chain test answer: {test_answer}")
    
except Exception as e:
    print(f"Error in creating or using QA chain: {e}")
    qa_chain = None  # Set qa_chain to None if there's an error

  warn_deprecated(


QA chain test answer: This context is about **Apple's product warranties and environmental commitments**. 

Here's why:

* **Warranty information:** The text provides links to Apple's warranty terms and support pages, indicating a focus on product usage and potential issues.
* **Environmental initiatives:** Mentions of recycling programs, restricted substances, and other "environmental initiatives" suggest an emphasis on sustainability and the impact of technology on the environment. 

Therefore, this context aims to inform users about their product's legal protection and Apple's commitment towards environmentally responsible practices. 



In [17]:
# Ensure the QA chain is defined before creating tools
tools = []
if qa_chain:
    tools = [
        Tool(
            name="PDF QA System",
            func=qa_chain.run,
            description="Useful for answering questions about the PDF document"
        ),
        Tool(
            name="Document Lookup",
            func=lambda x: docstore.search(x),
            description="Useful for looking up specific documents by source"
        )
    ]


In [18]:
try:
    if tools:  # Only proceed if tools are defined
        agent_prompt = """Answer the following question directly and concisely. If you need to use a tool, do so without explaining the process. Only provide the final answer.

        Question: {input}
        """
        agent = initialize_agent(
            tools, 
            llm, 
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=False,  # Set to False to reduce output
            agent_kwargs={
                "prompt": PromptTemplate.from_template(agent_prompt),
                "max_iterations": 3  # Reduce max iterations for faster responses
            }
        )
except Exception as e:
    print(f"Error in initializing agent: {e}")


  warn_deprecated(


In [19]:
# Function to run the agent
def run_agent(user_input):
    try:
        # Combine context and question into a single 'query' key
        input_data = {"query": user_input}
        response = qa_chain.run(input_data)
        return response
    except Exception as e:
        return f"Error: {e}"

In [20]:
# Run the agent if qa_chain is successfully created
if qa_chain:
    while True:
        user_input = input("Ask a question (or type 'exit' to quit): ")
        if user_input.lower() == 'exit':
            break
        response = run_agent(user_input)
        print("Answer:", response)
else:
    print("QA chain was not created successfully, so the agent will not run.")

Ask a question (or type 'exit' to quit): ihone 13 details
Answer: Let's talk about iPhone 13 details! 

I need you to be a little more specific about what kind of details you want. 

For example, do you want to know about:

* **Technical specifications:** processor, RAM, storage options, cameras etc.?
* **Design features:** size, weight, colors, materials?
* **Performance and battery life:** how it handles demanding apps, how long the battery lasts?
* **Availability and pricing:** release date, where to buy, cost? 

Tell me what you're curious about, and I can give you more detailed answers! 😄 

Ask a question (or type 'exit' to quit): exit


In [21]:
#UI Implementation
# Read the file to check its contents
with open("app.py", "r") as file:
    content = file.read()

print(content)


# Define the code as a string
streamlit_code = """
import streamlit as st
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from langchain_community.document_loaders import PyPDFLoader

# Initialize the components (assuming you've set them up in your notebook)
def setup_components():
    embedding_model = OllamaEmbeddings()
    return embedding_model

st.title("Document Retrieval System")

uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
if uploaded_file:
    # Load and process the document
    loader = PyPDFLoader(uploaded_file)
    documents = loader.load()
    st.write(f"Loaded {len(documents)} pages.")

    # Set up embeddings and vectorstore
    embedding_model = setup_components()
    vectorstore = FAISS.from_documents(documents, embedding_model)

    st.write("Documents processed and vector store created.")

    # Add

In [23]:
!pip install streamlit




ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tables 3.8.0 requires blosc2~=2.0.0, which is not installed.
tables 3.8.0 requires cython>=0.29.21, which is not installed.
python-lsp-black 1.2.1 requires black>=22.3.0, but you have black 0.0 which is incompatible.



Collecting packaging<24,>=14.1 (from streamlit)
  Obtaining dependency information for packaging<24,>=14.1 from https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl.metadata
  Using cached packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Using cached packaging-23.2-py3-none-any.whl (53 kB)
Installing collected packages: packaging
  Attempting uninstall: packaging
    Found existing installation: packaging 24.1
    Uninstalling packaging-24.1:
      Successfully uninstalled packaging-24.1
Successfully installed packaging-23.2


In [25]:
!pip install --upgrade streamlit


Collecting streamlit
  Obtaining dependency information for streamlit from https://files.pythonhosted.org/packages/b0/68/cf905fd2db4a84dc9b46803512b9765a3e9a6dfaa378a67c8db910c44ab3/streamlit-1.37.1-py2.py3-none-any.whl.metadata
  Downloading streamlit-1.37.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Obtaining dependency information for pydeck<1,>=0.8.0b4 from https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl.metadata
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.37.1-py2.py3-none-any.whl (8.7 MB)
   ---------------------------------------- 0.0/8.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.7 MB 1.4 MB/s eta 0:00:07
    --------------------------------------- 0.2/8.7 MB 2.6 MB/s eta 0:00:04
   - -------------------------------------- 0.4/8.7 MB 3.7 MB/s eta 0:00:03
   -- -------------

In [30]:
!streamlit --version


Streamlit, version 1.37.1


In [45]:
pip install PyPDF2


Collecting PyPDF2Note: you may need to restart the kernel to use updated packages.

  Obtaining dependency information for PyPDF2 from https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl.metadata
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
   ---------------------------------------- 0.0/232.6 kB ? eta -:--:--
   - -------------------------------------- 10.2/232.6 kB ? eta -:--:--
   ---------------------------------------- 232.6/232.6 kB 3.5 MB/s eta 0:00:00
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [4]:
!streamlit run app3.py


^C


In [6]:
pip install chainlit


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\Mugdhi Saxena\\anaconda3\\Lib\\site-packages\\~umpy\\core\\_multiarray_tests.cp311-win_amd64.pyd'
Consider using the `--user` option or check the permissions.



Collecting chainlit
  Obtaining dependency information for chainlit from https://files.pythonhosted.org/packages/f8/db/ed38770febc1355441dad14137b4f06cd7f81f5bda48411a614f6a3c85b5/chainlit-1.1.402-py3-none-any.whl.metadata
  Downloading chainlit-1.1.402-py3-none-any.whl.metadata (6.3 kB)
Collecting aiofiles<24.0.0,>=23.1.0 (from chainlit)
  Obtaining dependency information for aiofiles<24.0.0,>=23.1.0 from https://files.pythonhosted.org/packages/c5/19/5af6804c4cc0fed83f47bff6e413a98a36618e7d40185cd36e69737f3b0e/aiofiles-23.2.1-py3-none-any.whl.metadata
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting asyncer<0.0.3,>=0.0.2 (from chainlit)
  Obtaining dependency information for asyncer<0.0.3,>=0.0.2 from https://files.pythonhosted.org/packages/01/61/92a12d4a53f4b88178c36209188ba2478ca51862210edb5d22c9236a9c34/asyncer-0.0.2-py3-none-any.whl.metadata
  Downloading asyncer-0.0.2-py3-none-any.whl.metadata (6.8 kB)
Collecting click<9.0.0,>=8.1.3 (from chainlit)
  Ob

Collecting chainlit
  Obtaining dependency information for chainlit from https://files.pythonhosted.org/packages/f8/db/ed38770febc1355441dad14137b4f06cd7f81f5bda48411a614f6a3c85b5/chainlit-1.1.402-py3-none-any.whl.metadata
  Using cached chainlit-1.1.402-py3-none-any.whl.metadata (6.3 kB)
Collecting aiofiles<24.0.0,>=23.1.0 (from chainlit)
  Obtaining dependency information for aiofiles<24.0.0,>=23.1.0 from https://files.pythonhosted.org/packages/c5/19/5af6804c4cc0fed83f47bff6e413a98a36618e7d40185cd36e69737f3b0e/aiofiles-23.2.1-py3-none-any.whl.metadata
  Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting asyncer<0.0.3,>=0.0.2 (from chainlit)
  Obtaining dependency information for asyncer<0.0.3,>=0.0.2 from https://files.pythonhosted.org/packages/01/61/92a12d4a53f4b88178c36209188ba2478ca51862210edb5d22c9236a9c34/asyncer-0.0.2-py3-none-any.whl.metadata
  Using cached asyncer-0.0.2-py3-none-any.whl.metadata (6.8 kB)
Collecting click<9.0.0,>=8.1.3 (from chainlit)
 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.0 requires FuzzyTM>=0.4.0, which is not installed.
tables 3.8.0 requires blosc2~=2.0.0, which is not installed.
tables 3.8.0 requires cython>=0.29.21, which is not installed.
anaconda-cloud-auth 0.1.3 requires pydantic<2.0, but you have pydantic 2.8.2 which is incompatible.
numba 0.57.1 requires numpy<1.25,>=1.21, but you have numpy 1.26.4 which is incompatible.
python-lsp-black 1.2.1 requires black>=22.3.0, but you have black 0.0 which is incompatible.
ypy-websocket 0.8.2 requires aiofiles<23,>=22.1.0, but you have aiofiles 23.2.1 which is incompatible.


In [11]:
!python app4.py

Traceback (most recent call last):
  File "C:\Users\Mugdhi Saxena\cognizant\app4.py", line 10, in <module>
    cl.main()  # This may replace cl.run(), depending on the latest API
    ^^^^^^^
  File "C:\Users\Mugdhi Saxena\anaconda3\Lib\site-packages\chainlit\utils.py", line 70, in __getattr__
    module_path = registry[name]
                  ~~~~~~~~^^^^^^
KeyError: 'main'


In [10]:
pip install --upgrade chainlit


Note: you may need to restart the kernel to use updated packages.


In [1]:
!chainlit run main.py


Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Mugdhi Saxena\anaconda3\Scripts\chainlit.exe\__main__.py", line 7, in <module>
  File "C:\Users\Mugdhi Saxena\anaconda3\Lib\site-packages\click\core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Mugdhi Saxena\anaconda3\Lib\site-packages\click\core.py", line 1078, in main
    rv = self.invoke(ctx)
         ^^^^^^^^^^^^^^^^
  File "C:\Users\Mugdhi Saxena\anaconda3\Lib\site-packages\click\core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Mugdhi Saxena\anaconda3\Lib\site-packages\click\core.py", line 1434, in invoke
    return ctx.invoke(self.callback, **ctx.params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Mugdhi Saxena\

In [17]:
pip install langchain_community

Note: you may need to restart the kernel to use updated packages.


In [None]:
!streamlit run app2.py