In [None]:
!pip install langchain openai chromadb tiktoken sentence_transformers accelerate einops transformers xformers bitsandbytes faiss-gpu




In [None]:
import transformers
from torch import cuda, bfloat16
import transformers
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Model loaded on cuda:0


In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

stopping_words = ['\n\n', '\n']
stop_token_ids = [(tokenizer(word)['input_ids'][1:]) for word in stopping_words]
stop_token_ids

import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation', #device = device,
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.2,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    top_p = 0.15,
    top_k = 5,
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.2  # without this output begins repeating
)

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings

llm_llama = HuggingFacePipeline(pipeline=generate_text)
embeddings_model = HuggingFaceEmbeddings()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.agents import initialize_agent, Tool
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain.document_loaders import WebBaseLoader
from langchain.memory import ConversationBufferMemory
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.schema.output_parser import OutputParserException
from bs4 import BeautifulSoup as Soup
from langchain.vectorstores.utils import filter_complex_metadata
from langchain.vectorstores import FAISS

class chatbot:
  url="https://www.moveworks.com/"
  loader = RecursiveUrlLoader(url=url, extractor=lambda x: Soup(x, "html.parser").text) ### exp
  loader2 = WebBaseLoader("https://en.wikipedia.org/wiki/Moveworks")
  blog = loader.load()
  blog += loader2.load()
  splitter =  RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=100, separators=["\n\n"])

  blog_split = splitter.split_documents(filter_complex_metadata(blog))

  # response_schemas = [
  #   ResponseSchema(name="answer", description="answer to the user's question"),
  #   ResponseSchema(name="feedback", description="if the user's question seems to be a feedback to the Dino products, then write a short description of the feedback given. else if the user's question does not seems to be a feedback then return False (bool)")
  #   ]
  #output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
  blog_template = """You are a Moveworks Customer Support chatbot, you are here to help answer any queries\
a user might be facing regarding the organization and the people associated with Moveworks. \
Please provide as much detail as possible about their queries.

You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  \
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. \
Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. \
If you don't know the answer to a question, please don't share false information. \
If the queries is not regarding Moveworks, or related to what Moveworks does, you must politely refuse to answer. Do not answer questions unrelated to Moveworks.

You are given the context you are required to answer the user query, use the information provided to answer the question in a brief and concise manner.

Context: {context}

History: {chat_history}

Question: {question}

Answer: \
"""


  def __init__(self, llm, embeddings):
    print("Chatbot initialized\n\n")
    self.embeddings = embeddings
    #self.blog_store = FAISS.from_documents(chatbot.blog_split, self.embeddings,) #collection_name="blog", persist_directory='./data')
    self.blog_store = Chroma.from_documents(chatbot.blog_split, self.embeddings, collection_name="blog", persist_directory='./data')
    self.blog_store.persist()
    #self.llm = llm ###
    self.llm = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        temperature=0.2,
        openai_api_key="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",
        max_tokens=512,)
   # self.format_instructions = chatbot.output_parser.get_format_instructions()
    self.BLOG_PROMPT = PromptTemplate(
        template=chatbot.blog_template, input_variables=["context", "chat_history", "question"],
       # partial_variables={"format_instructions": self.format_instructions}
    )
    self.blog_qa = RetrievalQA.from_chain_type(
        llm=self.llm,
        chain_type="stuff",
        retriever=self.blog_store.as_retriever(),
        chain_type_kwargs={"prompt": self.BLOG_PROMPT,
                          "memory": ConversationBufferMemory(
                              memory_key="chat_history",
                              input_key="question")
                          })

  def ask(self, question):
    output = self.blog_qa.run(question) # chatbot.output_parser.parse(self.blog_qa.run(question))
    return output




In [None]:
openai_key =  "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"
bot = chatbot(llm=llm_llama, embeddings= embeddings_model)
feedback_list = []
print("Welcome! You may interact with the chatbot now.\nNOTE: Leave the field empty to exit the bot.\n")

while 1:
  question = input("User:  ")
  if question == "":
    break
  output = bot.ask(question)
  print("\nAI: ", output, end='\n\n')


print("Exited the conversation")

if feedback_list != []:
  print(feedback_list)



Chatbot initialized


Welcome! You may interact with the chatbot now.
NOTE: Leave the field empty to exit the bot.

User:  who all are the cofounders of moveworks

AI:  The co-founders of Moveworks are Bhavin Shah, Vaibhav Nivargi, and Varun Singh.

User:  ig there are more cofounders 

AI:  No, there are only three co-founders of Moveworks: Bhavin Shah, Vaibhav Nivargi, and Varun Singh.

User:  What is the address for the bangalore office?

AI:  The address for the Bangalore office is not provided in the given context. I apologize for the inconvenience.

User:  What is the address for the bangaluru office?

AI:  I apologize, but the address for the Bangalore office is not provided in the given context.

User:  What is the address for the bengaluru office?

AI:  I apologize, but the address for the Bengaluru office is not provided in the given context.

User:  
Exited the conversation
