In [None]:
! pip install -q transformers accelerate sentencepiece langchain chroma unstructured chromadb
from langchain import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import VectorDBQA
from langchain.document_loaders import DirectoryLoader

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")

In [None]:
from langchain.document_loaders import DirectoryLoader
directory = "/content/Docs"  # Update the directory path accordingly

def load_docs(directory):
  loader = DirectoryLoader(directory)
  documents = loader.load()
  return documents

documents = load_docs(directory)
len(documents)

8

In [None]:
import os

embeddings = HuggingFaceHubEmbeddings()

You're using a different task than the one specified in the repository. Be sure to know what you're doing :)


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents,chunk_size=1000,chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

docs = split_docs(documents)
print(len(docs))

14


In [None]:
from langchain.vectorstores import Chroma
db = Chroma.from_documents(docs, embeddings)

In [None]:
query = "How to track my package"
num = 3
matching_docs = db.similarity_search(query,num)
print(matching_docs)

[Document(page_content="You can find tracking information in your order details. If an order includes multiple items, each may have separate delivery dates and tracking information.\n\nGo to Your Orders. Go to the order you want to track. Select Track Package next to your order. Select See all updates to view delivery updates. Depending on the shipping method you chose, it's possible that the tracking information might not be visible immediately.\n\nItems sent by third-party sellers from the Amazon Marketplace can in some cases also be tracked. It's possible if the seller has shared this information with Amazon and you have chosen a traceable shipping method.", metadata={'source': '/content/Docs/Track Your Package.txt'}), Document(page_content="Note: Some packages, such as standard international deliveries, aren't trackable. Other reasons why tracking information might not be available are listed on Missing Tracking Information. If the estimated delivery date for your package has passe

In [None]:
input_text = """
  "Summarize:
"""
for document in matching_docs:
      input_text += document.page_content
print(input_text)



  "Summarize:
You can find tracking information in your order details. If an order includes multiple items, each may have separate delivery dates and tracking information.

Go to Your Orders. Go to the order you want to track. Select Track Package next to your order. Select See all updates to view delivery updates. Depending on the shipping method you chose, it's possible that the tracking information might not be visible immediately.

Items sent by third-party sellers from the Amazon Marketplace can in some cases also be tracked. It's possible if the seller has shared this information with Amazon and you have chosen a traceable shipping method.Note: Some packages, such as standard international deliveries, aren't trackable. Other reasons why tracking information might not be available are listed on Missing Tracking Information. If the estimated delivery date for your package has passed and your tracking information hasn't changed, allow an additional day or two for the package to be 

##Summary using Langchain



In [None]:
def summary(input_text):
    max_length = 1024 # Adjust the max_length value as needed
    tokens = tokenizer.encode(input_text, truncation=True, max_length=max_length, padding='max_length', return_tensors='pt')
    tokens = tokens
    outputs = model.generate(tokens)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(answer)

summary(input_text)

Go to Your Orders. Check your tracking information. Allow for late delivery. Find missing items


##Generation using Langchain

In [None]:
query = input("Enter your query : ")

def generate(query):
    num = 3
    matching_docs = db.similarity_search(query,num)
    temp = ""
    for document in matching_docs:
          temp += document.page_content
    template = """You are a helpful assistant that that can answer questions based on : """
    template += temp
    template += """ Only use the factual information from the transcript to answer the question.Your answers should be verbose and detailed."""
    template += """Question : """
    template += query
    tokens = tokenizer.encode(input_text, return_tensors='pt')
    tokens = tokens
    outputs = model.generate(tokens)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(answer)
generate(query)

Enter your queryfind my package




Go to Your Orders. Check your tracking information. Allow for late delivery. Find missing items


##Prompt using LLM Chain


In [None]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate)

In [None]:
def get_response_from_query(db, query, k=4):
    """
    Setting the chunksize to 1000 and k to 4 maximizes
    the number of tokens to analyze.
    """

    docs = db.similarity_search(query, k=k)
    docs_page_content = " ".join([d.page_content for d in docs])

    chat = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature":1e-10})

    # Template to use for the system message prompt
    template = """
        You are a helpful assistant that that can answer questions
        based on :  {docs}
        Only use the factual information from the transcript to answer the question.
        Your answers should be verbose and detailed.
        """
    print(template)

    system_message_prompt = SystemMessagePromptTemplate.from_template(template)

    # Human question prompt
    human_template = "Answer the following question: {question}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

    chain = LLMChain(llm=chat, prompt=chat_prompt)

    response = chain.run(question=query, docs=docs_page_content)
    response = response.replace("\n", "")
    return response, docs

In [None]:
import textwrap
query = input("Enter your query : ")
response, docs = get_response_from_query(db, query)
print(response)

Enter your query : find my package

        You are a helpful assistant that that can answer questions 
        based on :  {docs}
        Only use the factual information from the transcript to answer the question. 
        Your answers should be verbose and detailed.
        
If you received your package and an item is missing, do the following: Go to Your Order
