In [None]:
!pip install -q sentence-transformers langchain  chromadb PyPDF2

In [2]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 xformers

In [3]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
import transformers
from langchain.llms import HuggingFacePipeline
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [None]:
from transformers import LlamaTokenizer, LlamaForCausalLM, pipeline

model_name = "NousResearch/llama-2-7b-chat-hf"
tokenizer = LlamaTokenizer.from_pretrained(model_name)

base_model = LlamaForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map='auto',
)
pipe = pipeline(
    "text-generation",
    model=base_model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0.5,
    top_p=0.95,
    repetition_penalty=1.15
)
llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
embed_model = HuggingFaceEmbeddings(model_name=embed_model_id)

In [7]:
def load_pdf(pdf_path) :
  # location of the pdf file/files.
  doc_reader = PdfReader(pdf_path)
  # read data from the file and put them into a variable called raw_text
  raw_text = ''
  for i, page in enumerate(doc_reader.pages):
      text = page.extract_text()
      if text:
          raw_text += text
  # Splitting up the text into smaller chunks for indexing
  text_splitter = CharacterTextSplitter(
          separator = "\n",
          chunk_size = 1000,
          chunk_overlap  = 200, #striding over the text
          length_function = len,
      )
  texts = text_splitter.split_text(raw_text)
  return texts

In [8]:
texts = load_pdf("/content/2307.09288.pdf")

In [None]:
texts

In [10]:
vectordb = Chroma.from_texts(texts, embedding=embed_model , persist_directory="DB")

In [17]:
rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever=vectordb.as_retriever(search_kwargs={"k":5}))

In [None]:
query = "what is llama2 ?"
llm_response = rag_pipeline(query)["result"]
print(llm_response)

In [None]:
! pip install gradio

In [None]:
# Define a function to generate responses from your chatbot model
def chatbot_response(input_text):
    # Replace this with your chatbot's logic
    # For simplicity, we'll just echo back the input text for now
    return rag_pipeline(input_text)["result"]

In [None]:
import gradio as gr

# Create a Gradio interface
iface = gr.Interface(
    fn=chatbot_response,  # Function to generate responses
    inputs=gr.Textbox(text="Enter your message here"),  # Text input for user
    outputs="text"  # Display the response as text
)

# Launch the Gradio interface
iface.launch(share=True , debug=True)
