In [1]:
!pip install langchain_community langchainhub chromadb langchain langchain-cohere



In [2]:
from google.colab import userdata
import os
os.environ['COHERE_API_KEY'] = userdata.get('Coherellm')

In [3]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(["https://crex.com/series/7W/indian-premier-league-2020" , "https://crex.com/series/19W/indian-premier-league-2023"])

docs = loader.load()



In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50)

splits = text_splitter.split_documents(docs)
print(splits[0].page_content)

IPL 2020 Fixtures and Stats | Indian Premier League 2020 - crex.com


In [5]:
print(len(splits))

14


In [6]:
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Chroma

embedding_model = CohereEmbeddings(user_agent="langchain")
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embedding_model,
    persist_directory="./chroma_db"
)

vectorstore.persist()

  embedding_model = CohereEmbeddings(user_agent="langchain")
  vectorstore.persist()


In [7]:
print(vectorstore._collection.count())

14


In [8]:
print(vectorstore._collection.get())

{'ids': ['dbb3ffab-7bdf-4d58-a638-fd4839ba556e', '949f3721-478e-48f6-ba3e-fb1f76806678', 'dc745dac-1af6-47aa-8cba-c87268a9d1b7', '2ba10941-fe7a-4b32-b336-7fffadf711ed', '45b0d863-7f95-42ca-b283-6ecb5f379666', '2a0f5434-7a5a-4dc2-96b3-6d71703ef1b2', '70ac0b9b-99a1-444f-b79b-5f0bc4a7f7f5', 'e32950fd-d22e-4611-9f4c-1748c72e3dae', 'deca0a45-e344-4fce-833b-5b63b9abb655', 'b930aa17-ab8b-40e8-b3d1-8cbcf5bf8fd1', 'dc86f48f-b22a-477f-8ea7-d3121dd07744', '5f7f5ee5-b461-4f98-926f-dc3fcfd4308e', '9dbf8127-2aec-4e09-a010-4071f6dbfa00', '12da6b71-8495-40e9-9dd5-3431bf8a73f1'], 'embeddings': None, 'documents': ['IPL 2020 Fixtures and Stats | Indian Premier League 2020 - crex.com', 'CREX Home(current) Series Fixtures\uf8ffüèè Stats CornerRankings LCP Element', 'Indian Premier League 2020 Sep 19 to Nov 10More Seasons >IPL 2025IPL 2024 Overview  Matches  Squads  Points Table  News  Info  Featured Matches  All Matches > MI157/518.4 Mumbai Indians won  Final  IPL 2020DC156/720.0DC189/320.0 Delhi Capitals 

In [10]:
print(vectorstore._collection.get(ids=['dbb3ffab-7bdf-4d58-a638-fd4839ba556e'], include=["embeddings","documents"]))

{'ids': ['dbb3ffab-7bdf-4d58-a638-fd4839ba556e'], 'embeddings': array([[ 0.43652344, -1.4296875 ,  1.12792969, ..., -0.17138672,
        -1.08105469,  1.52441406]]), 'documents': ['IPL 2020 Fixtures and Stats | Indian Premier League 2020 - crex.com'], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}


In [11]:
retriever = vectorstore.as_retriever()

In [12]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt-mistral")



In [13]:
from langchain_cohere import ChatCohere

llm = ChatCohere()

In [14]:
def join_doc(docs):
  return "\n".join(doc.page_content for doc in docs)

In [15]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda

In [16]:
join_docs_runnable = RunnableLambda(join_doc)


In [17]:
rag_chain = ({
    "context" : retriever | join_docs_runnable,
    "question" : RunnablePassthrough()} | prompt | llm | StrOutputParser() )

In [24]:
rag_chain.invoke("Who is Winner of IPL 2020 Season?")

'The Mumbai Indians won the IPL 2020 season. They defeated the Delhi Capitals in the final match. The context clearly states this information.'

In [25]:
rag_chain.invoke("Who won final match of season 2020 IPL?")

'The Mumbai Indians won the final match of the 2020 IPL season. They defeated the Delhi Capitals, scoring 157/5 in 18.4 overs. The match was part of the Indian Premier League 2020, which ran from September 19 to November 10.'

In [26]:
rag_chain.invoke("Who won the final in IPL 2023?")

'The context provided indicates that CSK won the final in IPL 2023 with a score of 171/5 in 15.0 overs. However, there is also a mention of GT winning a match with a score of 214/4 in 20.0 overs, but it is not explicitly stated as the final. Based on the available information, it appears that CSK won the final in IPL 2023.'