In [None]:
# install required libraries 
! pip install langchain langchain-community faiss-cpu transformers sentence-transformers PyPDF2 

In [None]:
import os 
from langchain import HuggingFaceHub, LLMChain
from langchain.prompts import PromptTemplate

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your Huggingface API Token here"

llm = HuggingFaceHub(repo_id="google/flan-t5-large",model_kwargs={'temperature':0.5,
                                                               'max_length': 512})

In [None]:
template = """Question: {question}
Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [None]:
question =  "What is the capital of Ecuador? "
llm_chain.run(question)

In [None]:
question =  "what are some key advantages of LoRA for LLM? "
llm_chain.run(question)

In [None]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

def process_text(text):
    # Split the text into chunks using Langchain's CharacterTextSplitter
    text_splitter = CharacterTextSplitter(
        separator="\n", chunk_size=256, chunk_overlap=64, length_function=len
    )
    chunks = text_splitter.split_text(text)

    # Convert the chunks of text into embeddings to form a knowledge base
    embeddings = HuggingFaceEmbeddings()
    knowledgeBase = FAISS.from_texts(chunks, embeddings)

    return knowledgeBase

In [None]:
import PyPDF2
import requests
from io import BytesIO

pdf_url = "https://arxiv.org/pdf/2106.09685.pdf" 
response = requests.get(pdf_url)
pdf_file = BytesIO(response.content)
pdf_reader = PyPDF2.PdfReader(pdf_file)

def get_vectorstore():
    # build vectorstore
    text = ""
    # Text variable will store the pdf text
    for page in pdf_reader.pages:
        text += page.extract_text()

    # Create the knowledge base object
    db = process_text(text)
    return db

db = get_vectorstore()

In [None]:
from langchain.chains.question_answering import load_qa_chain
# loading Q&A chain
chain = load_qa_chain(llm, chain_type="stuff")

query = "what are some key advantages of LoRA for LLM?"
# search database for relevant information
docs = db.similarity_search(query=query)

# Run our chain
chain.run(input_documents=docs, question=query)