<a href="https://colab.research.google.com/github/Mohamed-Bencheikh/RAG/blob/main/RAG_PDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***RAG: Chat with your documents***

# **DEPENDENCIES**

In [None]:
!pip install langchain langchain_community

In [None]:
# !pip install unstructured[all-docs] unstructured
!pip install pypdf

In [None]:
!pip install chromadb

In [None]:
!pip install pyngrok ngrok

In [None]:
!pip install gradio
!pip install streamlit

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [1]:
# !cat /usr/local/bin/ollama

# **IMPORTS**

In [8]:
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader

In [9]:
from langchain_community.llms import Ollama

In [10]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [11]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [12]:
import os
import threading
import asyncio
from aiohttp import ClientSession
import time
from pyngrok import ngrok
import queue
from threading import Thread

In [13]:
import gradio as gr
import streamlit as st

# **LOADING**

In [None]:
file_path = "/content/LLMs enhanced CF.pdf"

In [None]:
loader = UnstructuredPDFLoader(file_path)
data = loader.load()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
data[0].page_content

'4 2 0 2\n\nr a\n\nM 6 2\n\n]\n\nR\n\nI . s c [\n\n1 v 8 8 6 7 1 . 3 0 4 2 : v i X r a\n\nLarge Language Models Enhanced Collaborative Filtering\n\nZhongxiang Sun Gaoling School of Artificial Intelligence Renmin University of China Beijing, China sunzhongxiang@ruc.edu.cn\n\nZihua Si Gaoling School of Artificial Intelligence Renmin University of China Beijing, China zihua_si@ruc.edu.cn\n\nXiaoxue Zang Kuaishou Technology Co., Ltd. Beijing, China zangxiaoxue@kuaishou.com\n\nKai Zheng Kuaishou Technology Co., Ltd. Beijing, China zhengkai@kuaishou.com\n\nYang Song Kuaishou Technology Co., Ltd. Beijing, China yangsong@kuaishou.com\n\nXiao Zhang Jun Xu Gaoling School of Artificial Intelligence Renmin University of China Beijing, China {zhangx89,junxu}@ruc.edu.cn\n\nABSTRACT Recent advancements in Large Language Models (LLMs) have at- tracted considerable interest among researchers to leverage these models to enhance Recommender Systems (RSs). Existing work predominantly utilizes LLMs to gene

# **SPLITTING DOCUMENT**

In [None]:
text_sp = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_sp.split_documents(data)

In [None]:
chunks[0].page_content

'4 2 0 2\n\nr a\n\nM 6 2\n\n]\n\nR\n\nI . s c [\n\n1 v 8 8 6 7 1 . 3 0 4 2 : v i X r a\n\nLarge Language Models Enhanced Collaborative Filtering\n\nZhongxiang Sun Gaoling School of Artificial Intelligence Renmin University of China Beijing, China sunzhongxiang@ruc.edu.cn\n\nZihua Si Gaoling School of Artificial Intelligence Renmin University of China Beijing, China zihua_si@ruc.edu.cn\n\nXiaoxue Zang Kuaishou Technology Co., Ltd. Beijing, China zangxiaoxue@kuaishou.com\n\nKai Zheng Kuaishou Technology Co., Ltd. Beijing, China zhengkai@kuaishou.com\n\nYang Song Kuaishou Technology Co., Ltd. Beijing, China yangsong@kuaishou.com\n\nXiao Zhang Jun Xu Gaoling School of Artificial Intelligence Renmin University of China Beijing, China {zhangx89,junxu}@ruc.edu.cn\n\nABSTRACT Recent advancements in Large Language Models (LLMs) have at- tracted considerable interest among researchers to leverage these models to enhance Recommender Systems (RSs). Existing work predominantly utilizes LLMs to gene

# **EMBEDDINGS**

In [None]:
embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)

In [None]:
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    collection_name="local-rag"
)

In [None]:
import pickle as pk

In [None]:
# pk.dump(vector_db,open('vector_db.pkl','wb'))
#   # pk.dump(vector_db, file)

# **Retreival**

In [None]:
#LLM from Ollama
llm = ChatOllama(model="mistral")

NameError: name 'ChatOllama' is not defined

In [None]:
query_prompt = PromptTemplate(
    input_variables= ['question'],
    template= "You are an AI language model assistant, your task is to generate a response for the question: {question}"
)

In [None]:
retreiver = MultiQueryRetriever.from_llm(
    retriever=vector_db.as_retriever(),
    llm=llm,
    prompt=query_prompt
)

In [None]:
# RAG Prompt
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [None]:
chain = (
    {"context": retreiver, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

NameError: name 'retreiver' is not defined

In [None]:
import streamlit
def main():
  st.title("Ollama Chatbot with RAG")

  chat_input = st.text_input("Ask your question:")
  if chat_input:
    response = chain.run(context="", question=chat_input)
    st.write("Chatbot Response:")
    st.write(response)

if __name__ == '__main__':
  main()

In [None]:
chain.invoke(input("> "))

# **PORT FORWARDING**

In [17]:
# !curl -fsSL https://ollama.com/install.sh | sh

In [52]:
# Get your ngrok token from your ngrok account:
# https://dashboard.ngrok.com/get-started/your-authtoken
token="2fojkzjyEaSY0fGVPQpPAVEf1G5_3DgCWGRS2t6JCzvDiLBm1"
ngrok.set_auth_token(token)

# set up a stoppable thread (not mandatory, but cleaner if you want to stop this later
class StoppableThread(threading.Thread):
    def __init__(self, *args, **kwargs):
        super(StoppableThread, self).__init__(*args, **kwargs)
        self._stop_event = threading.Event()

    def stop(self):
        self._stop_event.set()

    def is_stopped(self):
        return self._stop_event.is_set()

def start_ngrok(q, stop_event):
    try:
        # Start an HTTP tunnel on the specified port
        public_url = ngrok.connect(11434)
        # Put the public URL in the queue
        q.put(public_url)
        # Keep the thread alive until stop event is set
        while not stop_event.is_set():
            time.sleep(1)  # Adjust sleep time as needed
    except Exception as e:
        print(f"Error in start_ngrok: {e}")

In [55]:
# Create a queue to share data between threads
url_queue = queue.Queue()
# Start ngrok in a separate thread
ngrok_thread = StoppableThread(target=start_ngrok, args=(url_queue, StoppableThread.is_stopped))
ngrok_thread.start()

Error in start_ngrok: 'function' object has no attribute 'is_set'


In [56]:
# Wait for the ngrok tunnel to be established
while True:
    try:
        public_url = url_queue.get()
        if public_url:
            break
        print("Waiting for ngrok URL...")
        time.sleep(1)
    except Exception as e:
        print(f"Error in retrieving ngrok URL: {e}")

print("Ngrok tunnel established at:", public_url)

Ngrok tunnel established at: NgrokTunnel: "https://2d47-34-126-132-182.ngrok-free.app" -> "http://localhost:11434"


In [57]:
!export OLLAMA_HOST=https://2d47-34-126-132-182.ngrok-free.app/

In [58]:
import os
import asyncio

# NB: You may need to set these depending and get cuda working depending which backend you are running.
# Set environment variable for NVIDIA library
# Set environment variables for CUDA
os.environ['PATH'] += ':/usr/local/cuda/bin'
# Set LD_LIBRARY_PATH to include both /usr/lib64-nvidia and CUDA lib directories
os.environ['LD_LIBRARY_PATH'] = '/usr/lib64-nvidia:/usr/local/cuda/lib64'

async def run_process(cmd):
    print('>>> starting', *cmd)
    process = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE
    )

    # define an async pipe function
    async def pipe(lines):
        async for line in lines:
            print(line.decode().strip())

        await asyncio.gather(
            pipe(process.stdout),
            pipe(process.stderr),
        )

    # call it
    await asyncio.gather(pipe(process.stdout), pipe(process.stderr))

In [59]:
import asyncio
import threading

async def start_ollama_serve():
    await run_process(['ollama', 'serve'])

def run_async_in_thread(loop, coro):
    asyncio.set_event_loop(loop)
    loop.run_until_complete(coro)
    loop.close()

# Create a new event loop that will run in a new thread
new_loop = asyncio.new_event_loop()

# Start ollama serve in a separate thread so the cell won't block execution
thread = threading.Thread(target=run_async_in_thread, args=(new_loop, start_ollama_serve()))
thread.start()

>>> starting ollama serve
time=2024-05-02T06:42:40.474Z level=INFO source=images.go:817 msg="total blobs: 9"
time=2024-05-02T06:42:40.475Z level=INFO source=images.go:824 msg="total unused blobs removed: 0"
time=2024-05-02T06:42:40.475Z level=INFO source=routes.go:1143 msg="Listening on 127.0.0.1:11434 (version 0.1.32)"
time=2024-05-02T06:42:40.476Z level=INFO source=payload.go:28 msg="extracting embedded files" dir=/tmp/ollama461969675/runners


# **PULLING MODELS**

In [None]:
!ollama pull mistral

In [None]:
!ollama pull nomic-embed-text

In [None]:
!ollama list

# **CHAT INTERFACE**

In [33]:
def process(url, question):
  ##load data
  # loader = PyPDFLoader(document)
  loader = WebBaseLoader(url)
  data = loader.load()
  ## splitting
  text_sp = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
  chunks = text_sp.split_documents(data)
  ## embedding
  embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
  ## Vector store
  vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    collection_name="local-rag"
  )
  ## Retreival
  llm = ChatOllama(model="mistral")
  query_prompt = PromptTemplate(
    input_variables= ['question'],
    template= "You are an AI language model assistant, your task is to generate a response for the question: {question}"
  )
  retreiver = MultiQueryRetriever.from_llm(
    retriever=vector_db.as_retriever(),
    llm=llm,
    prompt=query_prompt
  )
  # RAG Prompt
  template = """Answer the question based only on the following context:
  {context}
  Question: {question}
  """
  prompt = ChatPromptTemplate.from_template(template)
  chain = (
    {"context": retreiver, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
  )
  return chain.invoke(question)

In [48]:
# Create a Gradio interface for file upload
iface = gr.Interface(
    fn=process,
    inputs=["text", "text"],
    outputs="text",
    title="ChatDoc",
    description="Ask about a webpage."
 )


In [None]:
# Launch the Gradio interface
iface.launch(debug=True, share=True)

In [44]:
# Create a Streamlit app
st.title("ChatDoc")

# Add a text input widget to get the user's input
url = st.text_input("Enter the URL of the web page:")
question = st.text_input("Enter your question:")

if st.button("Get answer"):
  # Call the function to process the user's input
  answer = process(url, question)

  # Display the answer to the user
  st.write("Answer:", answer)
st.rerun()


In [None]:
# prompt: how to run the code above as a streamlit app on colab

!pip install streamlit
!streamlit run app.py


# **EXECUTING**

In [28]:
file_path = "intro.pdf"

In [29]:
loader = PyPDFLoader(file_path)
data = loader.load()

In [31]:
question = "What is this about?"

In [None]:
process(file_path, question)

In [22]:
LLM = Ollama(model='mistral')

In [24]:
parser = StrOutputParser()
chain = LLM | parser

In [None]:
chain.invoke("what is the first country recognized the Independence of the United States?")

In [31]:
my_context = "My name is Mohamed, I was born at July, 24 2000, I am from Morocco."
my_question = "what is my nationality?"

In [29]:
test_template = "Answer the question: {question} based on the context: {context}"
test_prompt = PromptTemplate.from_template(test_template)
test_prompt.format(question= my_question, context= my_context)

'Answer the question: How old am I? based on the context: My name is Mohamed, I was born at July, 24 2000, I am from Morocco.'

In [None]:
test_chain = test_prompt | LLM | parser
test_chain.invoke({"question": my_question, "context": my_context})

In [62]:
import logging
# Set logging to show only warnings and errors
logging.basicConfig(level=logging.WARNING)


In [None]:
process(url="https://medium.com/riskified-technology/3-ways-to-break-into-data-science-6a7a8fd679b3", question="Who is the author of this blog?")