In [3]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key=os.environ["OPENAI_API_KEY"]

In [4]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("data/rwi.txt")
loaded_data=loader.load()

In [3]:
loaded_data[0].page_content

'About Me\n\n\nHi, I’m Rwirub Swargiary, a passionate full-stack developer with a strong foundation in Java, Python, JavaScript, and C.\n My journey into the world of development started with a curiosity for how technology works behind the scenes, and over time, it grew into a deep enthusiasm for crafting seamless, user-friendly web and app experiences.\n  I love building things that not only work efficiently but also feel intuitive and enjoyable for users.\n\nCurrently, I’m focused on deepening my knowledge in MongoDB and Data Structures & Algorithms. \nI’ve completed several MongoDB certifications covering topics like data modeling, CRUD operations, indexes, and aggregations, and I’m continuously exploring ways to apply this knowledge in real-world projects.\n My DSA practice helps me strengthen the logical and problem-solving skills that are essential for building scalable systems.\n\nOver the years, I’ve worked on a variety of projects that reflect my versatility and curiosity. I’v

## splitter

##character text splitter

In [17]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
  separator="n\n",
  chunk_size=1000,
  chunk_overlap=200,
  length_function=len,
  is_separator_regex=False,
)

In [18]:
texts = text_splitter.create_documents([loaded_data[0].page_content])

In [19]:
texts

[Document(metadata={}, page_content='About Me\n\n\nHi, I’m Rwirub Swargiary, a passionate full-stack developer with a strong foundation in Java, Python, JavaScript, and C.\n My journey into the world of development started with a curiosity for how technology works behind the scenes, and over time, it grew into a deep enthusiasm for crafting seamless, user-friendly web and app experiences.\n  I love building things that not only work efficiently but also feel intuitive and enjoyable for users.\n\nCurrently, I’m focused on deepening my knowledge in MongoDB and Data Structures & Algorithms. \nI’ve completed several MongoDB certifications covering topics like data modeling, CRUD operations, indexes, and aggregations, and I’m continuously exploring ways to apply this knowledge in real-world projects.\n My DSA practice helps me strengthen the logical and problem-solving skills that are essential for building scalable systems.\n\nOver the years, I’ve worked on a variety of projects that refle

In [20]:
len(texts)

1

##Recursive CharacterTextSplitter

In [21]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
recursive_splitter= RecursiveCharacterTextSplitter(
  chunk_size=25,
  chunk_overlap=4
)
text = recursive_splitter.create_documents([loaded_data[0].page_content])


In [22]:
text

[Document(metadata={}, page_content='About Me'),
 Document(metadata={}, page_content='Hi, I’m Rwirub'),
 Document(metadata={}, page_content='Swargiary, a passionate'),
 Document(metadata={}, page_content='full-stack developer'),
 Document(metadata={}, page_content='with a strong foundation'),
 Document(metadata={}, page_content='in Java, Python,'),
 Document(metadata={}, page_content='JavaScript, and C.'),
 Document(metadata={}, page_content='My journey into the'),
 Document(metadata={}, page_content='the world of development'),
 Document(metadata={}, page_content='started with a curiosity'),
 Document(metadata={}, page_content='for how technology works'),
 Document(metadata={}, page_content='behind the scenes, and'),
 Document(metadata={}, page_content='and over time, it grew'),
 Document(metadata={}, page_content='into a deep enthusiasm'),
 Document(metadata={}, page_content='for crafting seamless,'),
 Document(metadata={}, page_content='user-friendly web and'),
 Document(metadata={}

## embeddings

In [2]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()


In [4]:
chunks_of_text = [
  "hi there!.",
  "Hello!",
  "whats's your name?",
  "bond, james bond",
  "Hello Bond!"
  
]

In [5]:
embeddings = embeddings_model.embed_documents(chunks_of_text)

In [6]:
embeddings

[[-0.040008626878261566,
  -0.020771123468875885,
  -0.022129856050014496,
  -0.03737187758088112,
  -0.01499987207353115,
  0.01571287028491497,
  -0.003527996828779578,
  -0.018659032881259918,
  0.00417709443718195,
  -0.02001776546239853,
  0.01261872611939907,
  0.001334349624812603,
  -0.01883392035961151,
  -0.01623752899467945,
  0.010842956602573395,
  -0.02013884112238884,
  0.021672461181879044,
  -0.007984236814081669,
  0.02311190962791443,
  -0.012679263949394226,
  -0.01608954928815365,
  0.005774614401161671,
  0.015282380394637585,
  -0.013842931017279625,
  -0.012497651390731335,
  -0.002519036876037717,
  -0.0011830056319013238,
  -0.015981925651431084,
  0.0049405405297875404,
  -0.019963955506682396,
  -0.001843033591285348,
  0.01019049622118473,
  -0.010399014689028263,
  -0.01316356472671032,
  -0.006783574353903532,
  -0.020394444465637207,
  0.003164771245792508,
  -0.017892222851514816,
  0.01930476725101471,
  -0.009295884519815445,
  0.018430335447192192,
 

In [7]:
len(embeddings[0])

1536

one sentence is created 1536 dimension

In [27]:
print(embeddings[0][:5])

[-0.040008626878261566, -0.020771123468875885, -0.022129856050014496, -0.03737187758088112, -0.01499987207353115]


## vector store/ vector database

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

loaded_document = TextLoader("data/Indian politics is an intricate tap.txt").load()
text_splitter=CharacterTextSplitter(chunk_size=200,chunk_overlap=0)
chunks_of_text=text_splitter.split_documents(loaded_document)
vector_db = Chroma.from_documents(chunks_of_text, OpenAIEmbeddings())

Created a chunk of size 1190, which is longer than the specified 200
Created a chunk of size 805, which is longer than the specified 200
Created a chunk of size 766, which is longer than the specified 200


In [21]:
chunks_of_text

[Document(metadata={'source': 'data/Indian politics is an intricate tap.txt'}, page_content='Indian politics is an intricate tapestry where tradition and modernity constantly collide, evolve, and reshape the nation’s destiny. At its core lies the belief in the sovereignty of the people, as enshrined in the Constitution, which came into force on 26th January 1950, making India a sovereign, socialist, secular, democratic republic. The political sphere is shaped by a unique combination of grassroots mobilization, federal dynamics, and the strategic role of central leadership. Villages, towns, and cities each contribute to the political pulse of the nation, with the Panchayati Raj system empowering local governance and urban municipal bodies handling the complex needs of growing cities. Yet, while decentralization aims to strengthen democracy at the ground level, political patronage, corruption, and bureaucratic inefficiency often limit its effectiveness. Caste remains a deeply influential

In [9]:
len(chunks_of_text)

4

In [10]:
vector_db = Chroma.from_documents(chunks_of_text, OpenAIEmbeddings())

In [11]:
vector_db

<langchain_chroma.vectorstores.Chroma at 0x141fc64f650>

In [12]:
question = "How does the passage describe the ongoing tension in Indian politics between tradition and modernity, and what factors contribute to this dynamic?"
response=vector_db.similarity_search(question)
print(response[0].page_content)

In the international arena, Indian politics is influenced by the country’s growing role as a global power, balancing relations with the United States, China, Russia, and neighboring South Asian countries, while also asserting leadership in the Global South. Domestic politics often intertwine with foreign policy, particularly on issues like border disputes, trade agreements, and diaspora engagement. As India moves further into the 21st century, the political journey remains unpredictable—a constant tug-of-war between progressive aspirations and entrenched systems, between the urge to modernize and the pull of traditional loyalties. It is this very complexity, with its chaos and contradictions, that makes Indian politics not only a subject of study but also a living, breathing drama in which every citizen is both a participant and an observer.


## vector store as retriever
findind the embedding the best answer your question

In [13]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

loaded_document = TextLoader("data/Indian politics is an intricate tap.txt").load()
text_splitter=CharacterTextSplitter(chunk_size=200,chunk_overlap=0)
chunks_of_text=text_splitter.split_documents(loaded_document)
vector_db = Chroma.from_documents(chunks_of_text, OpenAIEmbeddings())

Created a chunk of size 1190, which is longer than the specified 200
Created a chunk of size 805, which is longer than the specified 200
Created a chunk of size 766, which is longer than the specified 200


In [14]:
vector_db

<langchain_chroma.vectorstores.Chroma at 0x141fc6cc550>

In [15]:
retriever = vector_db.as_retriever()

In [16]:
res=retriever.invoke("According to the passage, how have shifts in political leadership and party dynamics shaped India’s political narrative since independence?")
res

[Document(metadata={'source': 'data/Indian politics is an intricate tap.txt'}, page_content='The country’s political narrative has been punctuated by landmark electoral contests and transformative policies. The first few decades after independence were dominated by the Indian National Congress under leaders like Jawaharlal Nehru, Lal Bahadur Shastri, and Indira Gandhi, who oversaw nation-building, wars, and economic planning. The 1975–77 Emergency marked a turning point, highlighting the fragility of democratic freedoms. The late 20th century saw coalition politics emerge, with regional parties like DMK, AIADMK, TDP, SP, BSP, and TMC shaping national governments. In recent years, the Bharatiya Janata Party, under leaders like Atal Bihari Vajpayee and Narendra Modi, has redefined political discourse with a strong emphasis on nationalism, economic development, and centralized leadership.'),
 Document(metadata={'source': 'data/Indian politics is an intricate tap.txt'}, page_content='The c

In [17]:
len(res)

4

In [24]:
res=retriever=vector_db.as_retriever(search_kwargs={"k":10})

In [None]:
res=retriever.invoke("1What role do caste and religion play in shaping political alliances and voting patterns in India, as described in the passage?")
res

[Document(metadata={'source': 'data/Indian politics is an intricate tap.txt'}, page_content='Indian politics is an intricate tapestry where tradition and modernity constantly collide, evolve, and reshape the nation’s destiny. At its core lies the belief in the sovereignty of the people, as enshrined in the Constitution, which came into force on 26th January 1950, making India a sovereign, socialist, secular, democratic republic. The political sphere is shaped by a unique combination of grassroots mobilization, federal dynamics, and the strategic role of central leadership. Villages, towns, and cities each contribute to the political pulse of the nation, with the Panchayati Raj system empowering local governance and urban municipal bodies handling the complex needs of growing cities. Yet, while decentralization aims to strengthen democracy at the ground level, political patronage, corruption, and bureaucratic inefficiency often limit its effectiveness. Caste remains a deeply influential