In [4]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [6]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'test-rag'
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")

In [30]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader( web_paths=("https://techcrunch.com/2024/01/30/chatgpt-users-can-now-invoke-gpts-directly-in-chats/",))
data = loader.load()
print(data)

[Document(metadata={'source': 'https://techcrunch.com/2024/01/30/chatgpt-users-can-now-invoke-gpts-directly-in-chats/', 'title': 'ChatGPT users can now invoke GPTs directly in chats | TechCrunch', 'description': "Users of ChatGPT can now invoke custom GPTs, chatbots built on top of OpenAI's GenAI models, in conversations. GPTs will have the full context of those convos.", 'language': 'en-US'}, page_content="\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nChatGPT users can now invoke GPTs directly in chats | TechCrunch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLogin\n\n\n\n\n\nSearchStartupsVentureAppleSecurityAIAppsEventsStartup BattlefieldMoreFintechCloud ComputingLayoffsHardwareGoogleMicrosoftTransportationEVsMetaInstagramAmazonTikTokNewslettersPodcastsPartner ContentCrunchboard JobsContact UsSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\t\tAI\t\nCh

In [31]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
split_text = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 800, chunk_overlap = 100)
split = split_text.split_documents(data)


In [32]:
question = "What is India?"
context = "India is a co"

In [33]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
model_name = "BAAI/bge-small-en"
model_kwargs = {"device" : "cpu"}
encode_kwargs = {"normalize_embeddings" : True}
hf_embeddings = HuggingFaceBgeEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=model_kwargs)

query_embedding = hf_embeddings.embed_query(question)
document_embedding = hf_embeddings.embed_query(context)

In [34]:
from langchain_community.vectorstores import FAISS
vector_store = FAISS.from_documents(documents= split, embedding=hf_embeddings)
retriever = vector_store.as_retriever()

In [35]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001DF3A3AFC80>)

In [38]:
docs = retriever.get_relevant_documents("What is hallucination?")

In [39]:
docs

[Document(metadata={'source': 'https://techcrunch.com/2024/01/30/chatgpt-users-can-now-invoke-gpts-directly-in-chats/', 'title': 'ChatGPT users can now invoke GPTs directly in chats | TechCrunch', 'description': "Users of ChatGPT can now invoke custom GPTs, chatbots built on top of OpenAI's GenAI models, in conversations. GPTs will have the full context of those convos.", 'language': 'en-US'}, page_content='More TechCrunch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nGet the industry’s biggest tech news\nExplore all newsletters\n\n\n\n\n\n\nTechCrunch Daily News\nEvery weekday and Sunday, you can get the best of TechCrunch’s coverage.\n\n\n\n\n\nStartups Weekly\nStartups are the core of TechCrunch, so get our best coverage delivered weekly.\n\n\n\n\n\nTechCrunch Fintech\nThe latest Fintech news and analysis, delivered every Tuesday.\n\n\n\n\n\nTechCrunch Mobility\nTechCrunch Mobility is your destination for transportation news and insight.\n\n\n\n\n\n\n\nEmail address (required)\n\n\n\nSubscribe\n\n

In [41]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

template = """ answer the question based on the context.
{context}
Question: {question}"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=' answer the question based on the context.\n{context}\nQuestion: {question}'))])

In [42]:
llm = ChatGroq(model="llama3-8b-8192", temperature=0)

In [47]:
chain = prompt | llm
chain.invoke({"context":docs,"question":"What is Hallucination?"})

AIMessage(content='Based on the context of the article, there is no mention of "Hallucination". The article discusses ChatGPT, a text-generating AI chatbot, and its ability to invoke custom GPTs (chatbots built on top of OpenAI\'s GenAI models) in conversations. It also mentions the challenges of moderation and monetization for developers who create GPTs.', response_metadata={'token_usage': {'completion_tokens': 78, 'prompt_tokens': 3775, 'total_tokens': 3853, 'completion_time': 0.062550671, 'prompt_time': 0.720834241, 'queue_time': None, 'total_time': 0.783384912}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_873a560973', 'finish_reason': 'stop', 'logprobs': None}, id='run-9dc1471a-161e-416a-8ceb-73c14f83955e-0', usage_metadata={'input_tokens': 3775, 'output_tokens': 78, 'total_tokens': 3853})