In [1]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("IPLData.txt",  encoding="utf-8")

docs = loader.load()

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size = 300,
    chunk_overlap=0,
    separators=["\n\n","\n","_",""]
)

docs_splitter = splitter.split_documents(docs)

In [3]:
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

vector_store = Chroma(
    embedding_function=HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2"),
    persist_directory="my_chroma_db",
    collection_name="sample_vector_data"
)

vector_store.add_documents(docs_splitter)

  from .autonotebook import tqdm as notebook_tqdm
  vector_store = Chroma(


['12ee3659-81da-483c-af4f-7d4e6e1ebdd5',
 '9d56c527-3850-4934-9ce0-c3cb12c7c29b',
 'b657189f-825a-42c3-84aa-b7af9158b2d0',
 'e1e518a3-4c7c-42ae-82b3-7f5b9ae7853b',
 'fa2ed8f8-faa7-4807-9de7-c8a0bdd2c4bf',
 'd2823536-387b-483d-89c7-4333da12cea5',
 '50a9a557-4b84-40b6-904a-16ed49e60b7e',
 'cb4d9420-9d72-4b6c-9540-d5fd1f3de897',
 '45f67b5a-3307-41e3-8bf5-fe65791b30d4',
 'a6c45324-5ab5-401e-9ce6-6ca2efd11785',
 '62603718-06c3-41ea-a755-009451f2cdd0',
 'fdd0c97c-cef3-4f84-970b-2bad1de96595',
 '577b0e4b-ffe0-48bf-917c-cbf9ab9973de',
 'd8c215a2-e8f1-4fb4-8267-6daf3868b238',
 '6c845e74-3407-4f4d-a360-617b9ca998f7',
 '6fef386e-d84f-4881-a37f-2a09e3665c59',
 '9d5e4bc5-699a-40ea-97b7-6130de2e19de',
 'f79ab97c-704b-49cf-ae5d-6f0cd14f69ac',
 '4901859b-3128-40b3-a617-dc3427ebf2d0',
 'e810c770-4c40-417c-918e-98dc5181d1d8']

In [4]:
vector_store.get(include=['embeddings','documents','metadatas'])

{'ids': ['12ee3659-81da-483c-af4f-7d4e6e1ebdd5',
  '9d56c527-3850-4934-9ce0-c3cb12c7c29b',
  'b657189f-825a-42c3-84aa-b7af9158b2d0',
  'e1e518a3-4c7c-42ae-82b3-7f5b9ae7853b',
  'fa2ed8f8-faa7-4807-9de7-c8a0bdd2c4bf',
  'd2823536-387b-483d-89c7-4333da12cea5',
  '50a9a557-4b84-40b6-904a-16ed49e60b7e',
  'cb4d9420-9d72-4b6c-9540-d5fd1f3de897',
  '45f67b5a-3307-41e3-8bf5-fe65791b30d4',
  'a6c45324-5ab5-401e-9ce6-6ca2efd11785',
  '62603718-06c3-41ea-a755-009451f2cdd0',
  'fdd0c97c-cef3-4f84-970b-2bad1de96595',
  '577b0e4b-ffe0-48bf-917c-cbf9ab9973de',
  'd8c215a2-e8f1-4fb4-8267-6daf3868b238',
  '6c845e74-3407-4f4d-a360-617b9ca998f7',
  '6fef386e-d84f-4881-a37f-2a09e3665c59',
  '9d5e4bc5-699a-40ea-97b7-6130de2e19de',
  'f79ab97c-704b-49cf-ae5d-6f0cd14f69ac',
  '4901859b-3128-40b3-a617-dc3427ebf2d0',
  'e810c770-4c40-417c-918e-98dc5181d1d8'],
 'embeddings': array([[ 0.00385157,  0.05127826, -0.05572198, ..., -0.09904915,
          0.03224577,  0.03322845],
        [-0.04329655, -0.00142281,  

In [None]:
vector_store.similarity_search(
    query="who is the captain of Delhi Capitals?",
    k=2
)

[Document(metadata={'source': 'IPLData.txt'}, page_content='Yuzvendra Chahal is one of the best leg-spinners in the IPL. Playing for Rajasthan Royals, he is known for taking crucial wickets and has won the Purple Cap for being the top wicket-taker.'),
 Document(metadata={'source': 'IPLData.txt'}, page_content='Axar Patel, an all-rounder for Delhi Capitals, is known for his tight left-arm spin and handy batting. He often plays a key role in restricting runs and breaking partnerships in the middle overs.')]

In [9]:
vector_store.similarity_search_with_score(
    query="who is the captain of Delhi Capitals?",
    k=2
)

[(Document(metadata={'source': 'IPLData.txt'}, page_content='Rishabh Pant, captain of Delhi Capitals, is known for his explosive batting and fearless attitude. His quick scoring ability makes him one of the most dangerous wicketkeeper-batsmen in the IPL.'),
  0.7929402589797974),
 (Document(metadata={'source': 'IPLData.txt'}, page_content='MS Dhoni, the legendary captain of Chennai Super Kings, is known for his calmness and sharp captaincy. He has led CSK to several IPL trophies and is admired for his finishing skills and leadership.'),
  0.9595600962638855)]