# Building GenAI App using Langchain and HuggingFace

In [17]:
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_classic.chains import RetrievalQA, ConversationalRetrievalChain
from dotenv import load_dotenv
import numpy as np
import os

## Reading the pdfs

In [18]:
loader = PyPDFDirectoryLoader("./research")
documents = loader.load()

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)
split_docs[0]

Document(metadata={'producer': 'Adobe PDF Library 10.0.1', 'creator': 'Adobe InDesign CS6 (Windows)', 'creationdate': '2020-01-22T14:34:12+05:30', 'moddate': '2021-06-12T15:25:17+03:00', 'trapped': '/False', 'source': 'research\\AI.pdf', 'total_pages': 339, 'page': 1, 'page_label': 'i'}, page_content='ArtificiAl intelligence \nMAchine leArning \nAnd \ndeep leArning')

In [20]:
print("Length of document splitted:", len(split_docs))

Length of document splitted: 1210


## Creating Embeddings using HuggingFace

In [21]:
hug_embed = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)
hug_embed

HuggingFaceEmbeddings(model_name='BAAI/bge-small-en-v1.5', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [22]:
vct_store = FAISS.from_documents(
    split_docs[:300], hug_embed
)
vct_store

<langchain_community.vectorstores.faiss.FAISS at 0x1ea5b5bf690>

In [23]:
query = "What is the reason behind the rise of Artificial Intellegence?"
res_quer = vct_store.similarity_search(query, k=5)

for i, res in enumerate(res_quer):
    print(f"*********Result {i+1}************:\n {res.page_content}\n")

*********Result 1************:
 Preface xv
Chapter 1:  Introduction to AI 1
 What is Artificial Intelligence?  2
Strong AI versus Weak AI  4
 The Turing Test 5
Definition of the Turing Test 5
An Interrogator Test 6
 Heuristics 6
Genetic Algorithms 8
 Knowledge Representation 8
Logic-based Solutions 9
Semantic Networks 9
 AI and Games 10
The Success of AlphaZero 11
 Expert Systems  12
 Neural Computing 13
 Evolutionary Computation 14
 Natural Language Processing 14
 Bioinformatics 17
 Major Parts of AI  18
Machine Learning 18
Deep Learning  19
Reinforcement Learning  19
Robotics 20
 Code Samples 21
 Summary 22
CONTENTS

*********Result 2************:
 4 • Artifici Al intelligence , MAchine  leArning , Deep leArning
Now consider the following question: can inanimate objects, such as 
computers, possess intelligence? The declared goal of artificial Intelligence 
is to create computer software and/or hardware systems that exhibit think -
ing comparable to that of humans, in other words, to

In [24]:
retriever = vct_store.as_retriever(search_type='similarity', k=3)
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001EA5B5BF690>, search_kwargs={})

In [25]:
load_dotenv()

hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

## Integrating with LLM for generation

In [26]:
from langchain_huggingface import HuggingFaceEndpoint

llm = HuggingFaceEndpoint(model="google/flan-t5-base")

In [27]:

chat_qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
)

In [None]:
chat_history = []
result = chat_qa.invoke({"question": query, "chat_history": chat_history})
print(result["answer"])

chat_history.append((query, result["answer"]))
query2 = "How can i use Agentic AI in healthcare domain?"
result2 = chat_qa.invoke({"question": query2, "chat_history": chat_history})
print(result2["answer"])