<a href="https://colab.research.google.com/github/JASONZ777/RAG-llm-langchain-interface/blob/main/rag_llm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Insall packages

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/MyDrive/rag-chatbot')

Mounted at /content/gdrive


In [None]:
!pip install gradio --quiet
!pip install chromadb --quiet
!pip install langchain --quiet
!pip install accelerate --quiet
!pip install transformers --quiet
!pip install tiktoken --quiet
!pip install bitsandbytes --quiet
!pip install openai --quiet
!pip install unstructured[pdf] --quiet
!pip install pypdf --quiet
!pip install optimum --quiet
!pip install auto-gptq==0.4.2 --extra-index-url --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/

In [None]:
import torch
import gradio as gr
import shutil
import openai
from textwrap import fill
from IPython.display import Markdown, display

from langchain import PromptTemplate
from langchain import HuggingFacePipeline
from langchain.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationChain

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
CHROMA_PATH = './chroma'
DATA_PATH = './data'
PROMPT_TEMPLATE = """
You are a helpfula and professional AI Assistant. Given the
following conversation history: {chat_history}, please answer the follow up question with help of the context.
Question: {query}
Context: {context}
"""

In [None]:
%env OPENAI_API_KEY="OPENAI_API_KEY"
openai.api_key=os.getenv("OPENAI_API_KEY")

# Upload pdf files & chunk

we need to write file uploading as a seperate function because it is a seperated interface

In [None]:
def upload(file_list):
  for file in file_list:
    base_name = os.path.basename(file)
    new_path = os.path.join('./data', base_name)
    shutil.copy2(file, new_path)
  return 'Successfully uploaded'

In [None]:
def pdf2chunk(DATA_PATH):
    loader = DirectoryLoader(DATA_PATH, glob='*.pdf')
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    return chunks

# Retrieve & construct prompts

In [None]:
def chunk2context(chunks,query):
  database = Chroma.from_documents(chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH)
  database.persist()  # store in the hard disk
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())
  matches = db.similarity_search_with_relevance_scores(query, k=3)
  context = "\n\--\n".join([doc.page_content for doc, _score in matches])

  return db, context

# Pipeline model: llama2-7b-chat

In [None]:
def model(model_id):

  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

  quantization_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_compute_dtype=torch.float16,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
  )
  model = AutoModelForCausalLM.from_pretrained(
      model_id, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto",
      quantization_config=quantization_config
  )
  generation_config = GenerationConfig.from_pretrained(model_id)
  generation_config.max_new_tokens = 1024
  generation_config.temperature = 0.1
  generation_config.top_p = 0.95
  generation_config.do_sample = True
  generation_config.repetition_penalty = 1.15

  text_pipeline = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
      generation_config=generation_config,
  )

  llm = HuggingFacePipeline(pipeline=text_pipeline)

  return llm

# Follow-up Q/A

In [None]:
def chatbot(model,context,query):
  prompt_template = PromptTemplate.from_template(PROMPT_TEMPLATE)
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  qa_chain = ConversationChain(
      llm=model,
      memory=memory,
      condense_question_prompt=PROMPT_TEMPLATE,
  )
  response = qa_chain.predict(context=context,query=query)
  return response

# Combine all components & UI

In [None]:
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)  # clear all stuff, make sure the UI supports multiple questions

if os.path.exists(DATA_PATH):
    shutil.rmtree(DATA_PATH)  # clear all stuff
os.mkdir(DATA_PATH)

def Arabica(query, history):
  chunks = pdf2chunk(DATA_PATH)
  db, context = chunk2context(chunks,query)

  model_id = "TheBloke/Llama-2-7b-Chat-GPTQ"
  llm = model(model_id)

  response_text = chatbot(llm, context, query)
  return response_text

file_upload = gr.Interface(fn=upload, inputs=gr.File(file_count='multiple', file_types=['.pdf']),outputs=gr.Text())
chat = gr.ChatInterface(fn=Arabica,title="Arabicabot")
demo = gr.TabbedInterface([file_upload, chat], ["Additional files", "Arabicabot"])
demo.launch(debug=True)

