# Installing Modules

In [2]:

!pip install langchain chromadb pypdf sentence_transformers InstructorEmbedding streamlit bitsandbytes ctransformers[cuda] accelerate einops safetensors xformers
!pip install -q -U git+https://github.com/huggingface/peft.git


Collecting langchain
  Downloading langchain-0.0.352-py3-none-any.whl (794 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.4/794.4 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting chromadb
  Downloading chromadb-0.4.21-py3-none-any.whl (508 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m508.6/508.6 kB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-3.17.4-py3-none-any.whl (278 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting InstructorEmbedding
  Downloading InstructorEmbedding-1.0.1-py2.py3-none-any.whl (19 kB)
Collecting streamli

In [3]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.llms import LlamaCpp
from langchain.document_loaders.csv_loader import CSVLoader

from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings


# Defining the LLM

In [4]:
from langchain.llms import CTransformers


config = {'max_new_tokens': 1024, 'temperature': 0, 'context_length': 1024}
llm = CTransformers(model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF',model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", config=config, n_ctx=2048)

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

mistral-7b-instruct-v0.1.Q4_K_M.gguf:   0%|          | 0.00/4.37G [00:00<?, ?B/s]

# Loading the Data

In [5]:
file_path = "/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv"
loader = CSVLoader(file_path)

document = loader.load()

# Splitting data

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=300)
texts = text_splitter.split_documents(document)

In [7]:
len(texts)

32824

# Embeddings

In [8]:
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large",
model_kwargs={'device': 'cuda:0'})

.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.41k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512


# Creating DataBase

In [9]:
# # Embed and store the texts
# # Supplying a persist_directory will store the embeddings on disk
# persist_directory = 'db'

# ## Here is the nmew embeddings being used
# embedding = instructor_embeddings

# vectordb = Chroma.from_documents(documents=texts,
# embedding=embedding,
# persist_directory=persist_directory)

# # persiste the db to disk
# vectordb.persist()
# vectordb = None

In [10]:
# # Now we can load the persisted database from disk, and use it as normal.
# vectordb = Chroma(persist_directory=persist_directory,
# embedding_function=embedding)

In [11]:
# # Embed and store the texts
# # Supplying a persist_directory will store the embeddings on disk
# persist_directory = 'db'

# ## Here is the nmew embeddings being used
# embedding = instructor_embeddings

# vectordb = Chroma.from_documents(documents=texts,
# embedding=embedding,
# persist_directory=persist_directory)

# # persiste the db to disk
# vectordb.persist()
# vectordb = None
# # Now we can load the persisted database from disk, and use it as normal.
# vectordb = Chroma(persist_directory=persist_directory,
# embedding_function=embedding)

In [14]:
import pickle  # Import the pickle module for file saving

# Embed the texts
vectordb = Chroma.from_documents(documents=texts, embedding=instructor_embeddings)

# Save the embeddings to a file
file_name = 'embeddings.pkl'  # Choose your desired filename
with open(file_name, 'wb') as f:
    pickle.dump(vectordb.embeddings, f)  # Save the embeddings directly

# You can now load the embeddings from the file later:
with open(file_name, 'rb') as f:
    embeddings = pickle.load(f)

# Create a new Chroma instance using the loaded embeddings
vectordb = Chroma(embedding_matrix=embeddings.embeddings)


KeyboardInterrupt: ignored

# Building a Retriever

In [15]:
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

# Building a Chain

In [16]:
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True)

In [17]:
## Cite sources

import textwrap

def wrap_text_preserve_newlines(text, width=110):
# Split the input text into lines based on newline characters
 lines = text.split('\n')

# Wrap each line individually
 wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

# Join the wrapped lines back together using newline characters
 wrapped_text = '\n'.join(wrapped_lines)

 return wrapped_text


In [18]:
def process_llm_response(llm_response):
 print(wrap_text_preserve_newlines(llm_response['result']))
 print('\n\nSources:')
 for source in llm_response["source_documents"]:
  print(source.metadata['source'])


# Prompting

In [19]:
%%time

# example
query = "I Want a refund of my cancelled order, where should i begin?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 If you want to request a refund for a cancelled order, you should first check the vendor's or service
provider's refund policy to understand their specific process. You can usually find this information on their
website or by contacting their customer service directly. Once you have determined the appropriate steps,
follow the instructions provided and be prepared to provide any necessary documentation, such as your order
number or proof of purchase. If you encounter any issues during the process, don't hesitate to reach out to
the vendor's customer service for assistance.


Sources:
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
CPU times: user 16min 51s, sys: 2.62 s, total: 16min 54s
Wall time: 9min 55s


In [20]:
%%time

# example
query = "Please Cancel my previous burger order and give me sandwich"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 To cancel your previous burger order and place a new sandwich order, please follow these steps:

1. Log in to your {{Online Company Portal Info}}.
2. Navigate to the '{{Online Order Interaction}}' or '{{Online Order Interaction}}' section.
3. Locate the specific order you wish to cancel.
4. Click on the order to open the details page.
5. Look for the option to '{{Online Order Interaction}}'.
6. Select the cancellation option and follow any additional prompts or instructions.
7. Once your burger order has been cancelled, you can place a new sandwich order by following the same steps
as above.


Sources:
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
CPU times: user 13min 18s, sys: 1.79 s, total: 13min 20s
Wall time: 7min 52s


In [21]:
%%time

# example
query = "How can i track my Chips and Chicken Order? What is the return policy incase im not satisfied"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 To track your Chips and Chicken order, please provide me with the {{Order Number}}. As for the return policy,
our refund policy is designed to protect your satisfaction and ensure a positive experience with our products
or services. If you have any specific concerns or need help with a particular order, please provide me with
the relevant details such as the {{Order Number}} or any other pertinent information. I'm here to ensure your
satisfaction and address any further questions you may have.


Sources:
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv
CPU times: user 8min 27s, sys: 1.21 s, total: 8min 28s
Wall time: 4min 57s
