In [1]:
from langchain import hub
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
from langchain_cohere import CohereEmbeddings

In [2]:
from dotenv import load_dotenv
load_dotenv('/Users/eshantdas/Desktop/YOutube_scripts/myenv/.env')

True

In [6]:
loader = PyMuPDFLoader("/Users/eshantdas/Downloads/field-guide-to-data-science.pdf")
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=600)
splits_character = text_splitter.split_documents(data)

embeddings = CohereEmbeddings(
    model="embed-english-light-v3.0"
)  

vectorstore = Chroma.from_documents(documents=splits_character, embedding=embeddings)
retriever = vectorstore.as_retriever(k=5)
prompt = hub.pull("rlm/rag-prompt")

In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

llm = ChatGroq(
    temperature=0,
    model="llama3-70b-8192",
)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [8]:
rag_chain.invoke("Tell me about Data Science?")

'Data Science is the art of turning data into actions through the creation of data products, which provide actionable information without exposing decision makers to the underlying data or analytics. It involves extracting timely, actionable information from diverse data sources to drive data products, such as movie recommendations, weather forecasts, and stock market predictions.'

## Cohere Reranker

In [26]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere

compressor = CohereRerank(model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "Tell me about Data Science"
)
compressed_docs

INFO:httpx:HTTP Request: POST https://api.cohere.com/v1/rerank "HTTP/1.1 200 OK"


[Document(metadata={'author': 'Booz Allen Hamilton', 'creationDate': "D:20170501173004Z00'00'", 'creator': 'Adobe InDesign CC 2015 (Macintosh)', 'file_path': '/Users/eshantdas/Downloads/field-guide-to-data-science.pdf', 'format': 'PDF 1.4', 'keywords': 'The Field Guide to Data Science', 'modDate': "D:20170501173004Z00'00'", 'page': 12, 'producer': 'Mac OS X 10.12.4 Quartz PDFContext', 'source': '/Users/eshantdas/Downloads/field-guide-to-data-science.pdf', 'subject': 'The Field Guide to Data Science', 'title': 'The Field Guide to Data Science', 'total_pages': 126, 'trapped': '', 'relevance_score': 0.9987157}, page_content='MEET your G U I D E S\nFred Blackburn\n(@boozallen)\nData Science is a field that is  \nevolving at a very rapid pace…be  \npart of the journey. \nAngela Zutavern\n(@angelazutavern)\nData Science is about asking bigger \nquestions, seeing future possibilities, \nand creating outcomes you desire. \nJosh Sullivan \n(@joshdsullivan)\nLeading our Data Science team \nshows

In [27]:
from langchain.chains import RetrievalQA

In [28]:
chain = RetrievalQA.from_chain_type(
    llm=llm, retriever=compression_retriever
)

In [31]:
chain.invoke("tell me about Data Science")

INFO:httpx:HTTP Request: POST https://api.cohere.com/v1/rerank "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


{'query': 'tell me about Data Science',
 'result': 'Based on the provided context, here\'s what I can tell you about Data Science:\n\n**Definition:** Data Science is the art of turning data into actions. It involves creating data products that provide actionable information without exposing decision makers to the underlying data or analytics.\n\n**Goal:** The goal of Data Science is to extract timely, actionable information from diverse data sources to drive data products.\n\n**Data Products:** Data products are the outcome of Data Science. They provide actionable information and can take many forms, such as:\n\n* Movie recommendations\n* Weather forecasts\n* Stock market predictions\n* Production process improvements\n* Health diagnosis\n* Flu trend predictions\n* Targeted advertising\n\n**History:** The term "Data Science" has been around since the 1960s-1980s, but it wasn\'t until the late 1990s that the field as we know it today began to emerge from the statistics and data mining c