In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install necessary Python packages for the project using pip
!pip install PyPDF2
!pip install faiss-cpu
!pip install faiss-gpu
!pip install accelerate
!pip install CTransformers[cuda]
!pip install sentence-transformers
!pip install langchain
!pip install streamlit --quiet
!pip install streamlit_chat

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Collecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl (26

In [None]:
# Importing necessary libraries and modules
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.llms import CTransformers
from accelerate import Accelerator
from langchain.chains import RetrievalQA
from langchain import PromptTemplate


In [None]:
# Setting file paths for PDF folder and FAISS vector store
pdf_folder_path="/content/drive/MyDrive/Projectl/work/PDF's"
DB_FAISS_PATH = "/content/drive/MyDrive/Projectl/work/vectorstores/db_faiss"

In [None]:
# Function to load PDF documents from a specified path
def load_pdfs(data_path):
    loaded_documents = []

    for file in os.listdir(data_path):
        if file.lower().endswith(('.pdf', '.PDF')):
            pdf_path = os.path.join(data_path, file)
            loader = PyPDFLoader(pdf_path)
            loaded_documents.extend(loader.load())
            print(f"Loaded PDF: {file}")

    return loaded_documents

In [None]:
# Function to create a vector database from PDF documents

def create_vector_db(data_path, db_path):
    # Load PDFs
    documents = load_pdfs(data_path)

    # Track the number of PDFs loaded
    num_pdfs_loaded = len(documents)

    # Text Splitting
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    # Embedding Setup
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})

    # Vector Database Creation with FAISS
    db = FAISS.from_documents(texts, embeddings)

    # Saving the Vector Database
    db.save_local(db_path)

    print(f"Total PDFs loaded: {num_pdfs_loaded}")


In [None]:
# Creating a vector database from PDF documents in the specified folder
create_vector_db(pdf_folder_path, DB_FAISS_PATH)

Loaded PDF: 02-Biggby Coffee April 30, 2021 FDD-clean-final v2.pdf
Loaded PDF: Atomic Wings AR FDD-Complete Clean.pdf
Loaded PDF: 2021 Amazing Athletes FDD.ISSUED.4.30.21.pdf
Loaded PDF: 2019 Bloomin Blinds FDD.pdf
Loaded PDF: 08-21 Wahlburgers Tribal Casino FDD (Corrected 080521).PDF
Total PDFs loaded: 1140


In [None]:
# Function to load a language model
def load_llm():
  a=Accelerator()
  config = {'max_new_tokens': 512, 'repetition_penalty': 1.1, 'temperature':0.5, 'gpu_layers':50}
  llm = CTransformers(
      model="/content/drive/MyDrive/Projectl/Model/llama-2-7b-chat.Q4_K_M.gguf",
      model_type="llama",
      config=config
      )
  llm, config = a.prepare(llm, config)
  return llm

In [None]:
# Custom prompt template for the expert chatbot
custom_prompt_template = """As an expert chatbot, Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
# Function to set a custom prompt template for QA retrieval
def set_custom_promot():
  """Prompt templete for QA retieval """
  prompt =PromptTemplate(template=custom_prompt_template, input_variables=['context','question'])
  return prompt

In [None]:
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={'k': 2}),
        return_source_documents=True,
        chain_type_kwargs={'prompt': prompt}
    )
    return qa_chain

In [None]:
# Function to set up a QA bot using embeddings, vector store, language model, and retrieval QA chain
def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
    DB_FAISS_PATH = "/content/drive/MyDrive/Projectl/work/vectorstores/db_faiss"
    db = FAISS.load_local(DB_FAISS_PATH, embeddings)
    llm = load_llm()
    qa_prompt = set_custom_promot()
    qa = retrieval_qa_chain(llm, qa_prompt, db)
    return qa

### Model *performance* checking without using Streamlit.

In [None]:
# Function to obtain the final result from the QA bot based on a user query
def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response

In [None]:
# Example usage
user_query = "what is Wahlburgers Franchising?"
result = final_result(user_query)
print(result)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

{'query': 'what is Wahlburgers Franchising?', 'result': 'Wahlburgers Franchising refers to the process of granting franchises for the establishment of Wahlburgers restaurants. This includes the marketing and sale of franchise agreements to independent business owners who will operate Wahlburgers restaurants under a licensing agreement with Wahlburgers Worldwide, LLC. The franchisees will be responsible for operating their restaurants according to the standards and guidelines set by Wahlburgers Worldwide, LLC, which may include using the Wahlburgers brand name, menu items, and marketing materials. In exchange for a fee and ongoing royalties, the franchisees will have the right to operate their Wahlburgers restaurants under the umbrella of the Wahlburgers brand.', 'source_documents': [Document(page_content='establishment of a third party facility for customizing local advertising; accounting costs; and \nholding an annual franchise convention.  Wahlburgers will not use the Brand Fund for

# Streamlit code

In [None]:
%%writefile app.py
import streamlit as st
from langchain.document_loaders import PyMuPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from accelerate import Accelerator
from streamlit_chat import message
import time

# Paths
DB_FAISS_PATH = '/content/drive/MyDrive/Projectl/work/vectorstores/db_faiss'
MODEL_PATH = "/content/drive/MyDrive/Projectl/Model/llama-2-7b-chat.Q4_K_M.gguf"

a = Accelerator()
config = {'max_new_tokens': 1024, 'repetition_penalty': 1.1, 'temperature': 0.75, 'gpu_layers': 40}
llm = CTransformers(
    model=MODEL_PATH,
    model_type="llama",
    config=config
)
llm, config = a.prepare(llm, config)

# Create a HuggingFaceEmbeddings instance
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})

# Create a FAISS vector store
db = FAISS.load_local(DB_FAISS_PATH, embeddings=embeddings)

custom_prompt_template = """As an expert chatbot, Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])

# Create a QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt}
)

# Function for conversational chat
def conversational_chat(queries):
    try:
        with st.spinner("Bot is working..."):
            time.sleep(2)
            results = [qa_chain({"query": query}) for query in queries]
    except Exception as e:
        st.error(f"Error during model inference: {e}")
        results = [{"result": "Error"}]

    return [result.get('result', '').strip() for result in results]

# Initialize chat history
if 'history' not in st.session_state:
    st.session_state['history'] = []

# Initialize messages
if 'generated' not in st.session_state:
    st.session_state['generated'] = ["Hello ! Ask me (LLAMA2  Chatbot) 🤗"]

if 'past' not in st.session_state:
    st.session_state['past'] = ["Hey ! 👋"]

# UI code for chat interaction
response_container = st.container()
container = st.container()

with container:
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_input("Query:", placeholder="Talk to LLAMA2 🧮", key='input')
        submit_button = st.form_submit_button(label='Send')

    if submit_button and user_input:
        output = conversational_chat([user_input])
        st.session_state['past'].append(user_input)
        st.session_state['generated'].append(output[0])

if st.session_state['generated']:
    with response_container:
        for i in range(len(st.session_state['generated'])):
            message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
            message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")


Writing app.py


In [None]:
!streamlit run /content/app.py & npx localtunnel --port 8501

[..................] | fetchMetadata: sill resolveWithNewModule localtunnel@2.0[0m[K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[K[?25hnpx: installed 22 in 6.355s
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.233.161.155:8501[0m
[0m
your url is: https://honest-pillows-dream.loca.lt
.gitattributes: 100% 1.18k/1.18k [00:00<00:00, 5.94MB/s]
1_Pooling/config.json: 100% 190/190 [00:00<00:00, 1.13MB/s]
README.md: 100% 10.6k/10.6k [00:00<00:00, 46.8MB/s]
config.json: 100% 612/612 [00:00<00:00, 2.92MB/s]
config_sentence_transformers.json: 100% 116/116 [00:00<00:00, 637kB/s]
data_config.json: 100% 39.3k/39.3k [00:00<00:00, 655kB/s]
pytorch_model.bin: 100% 90.9M/90.9M [00:02<00:00, 30.7MB/s]
sentence_bert_config.json: 100% 53.0/53.0 [00:00<00:00, 319kB/s]
special_tokens_map.json: 100% 112/112 [00:00<00:00, 624kB/s]
token

## I made few question that are related to the pdf's provide



In [None]:
#1.Total investment range for BIGGBY® COFFEE franchise, including payments to franchisor?
#2.What precaution before signing a BIGGBY® COFFEE franchise agreement?
#3.What is the total investment range to become a Wahlburgers master franchisee?
#4.What precaution is advised before signing a Wahlburgers master franchise agreement?
#5.What is the total investment range to start a Bloomin’ Blinds franchise?
#6.What precaution is recommended before signing a Bloomin’ Blinds franchise agreement?
#7.What is the total investment range for the "Complete AA Program" franchise?
#8.What is the total investment range for the "Basic Package" franchise?
#9.What is the total investment range for starting an Atomic Wings area representative business?
#10.What services does an Atomic Wings area representative provide in their defined territory?
