# 1. Installing the packages

In [None]:
!pip install langchain
!pip install huggingface_hub
!pip install sentence_transformers
!pip install faiss-cpu
!pip install unstructured
!pip install chromadb
!pip install Cython
!pip install tiktoken
!pip install unstructured[local-inference]

# 2. Importing the packages

In [None]:
import os
import requests
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "Enter Huggingface API Token"
from langchain.document_loaders import TextLoader  #for textfiles
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
from langchain.vectorstores import FAISS  #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.chains import RetrievalQA

# 3. Pulling the txt file

In [None]:
import requests
url = "https://raw.githubusercontent.com/HesamSalehiACN/Documents/main/Trump_Speech.txt"
res = requests.get(url)
with open("Trump_Speech.txt", "w") as f:
  f.write(res.text)

# 4. Document loader

In [None]:
from langchain.document_loaders import TextLoader
loader = TextLoader('./Trump_Speech.txt')
documents = loader.load()
import textwrap
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')
    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

# 5. Splitting the text

In [None]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)

# 6. Embeddings

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

# 7. Creating a vectorstore

In [None]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)

# 8. Creating the chain/agent

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
# llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
# Alternative LLMs:
llm=HuggingFaceHub(repo_id="MBZUAI/LaMini-Flan-T5-783M", model_kwargs={"temperature":0, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")

# 9. Query the data
Remember that the the first run may be a little slow. If it takes more than 5 minutes means that most probably an error from the Hugging Face API will occur. Stop the run of the cell and try to run another one, and then run it again.

In [None]:
query = input("What is your question: ")
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

# -----------------------------------------------------------------------------------------------------------------------------------------