In [1]:
from dotenv import load_dotenv
import os
from langchain_neo4j import Neo4jGraph

from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from pydantic import BaseModel, Field
# from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import WikipediaLoader
from langchain_text_splitters import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer

from langchain_neo4j import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_neo4j.vectorstores.neo4j_vector import remove_lucene_chars
import tiktoken
load_dotenv()

PyTorch was not found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


True

In [2]:
#Load the Data from Wikipedia
raw_documents = WikipediaLoader(query="The story of Indian Economy").load()

#Load specific topics from wikipedia
to_split_docs=[raw_documents[0], #Economy of India
               raw_documents[1], #Economic history of India
               raw_documents[3], #1991 Indian economic crisis
               raw_documents[4], #Economy of South Asia
               raw_documents[5], #Indian National Congress
               raw_documents[8]] #Economy of the British Empire





#Construct a whole chunk
naive_tag_text=f"""
            {raw_documents[0].metadata['summary']} {raw_documents[1].metadata['summary']} {raw_documents[3].metadata['summary']}
            {raw_documents[4].metadata['summary']} {raw_documents[5].metadata['summary']} {raw_documents[8].metadata['summary']}
                """.strip() 

In [None]:
import os
import uuid
import chromadb
from openai import OpenAI


# =====================================================
# CONFIG
# =====================================================



CHUNK_SIZE = 512
OVERLAP = 24
BATCH_SIZE = 100

COLLECTION_NAME = "naive_rag_chunks"

EMBED_MODEL = "text-embedding-3-small"


# =====================================================
# INIT CLIENTS
# =====================================================

client = OpenAI()

# ---- In-Memory Chroma ----
chroma_client = chromadb.Client()

collection = chroma_client.get_or_create_collection(
    name=COLLECTION_NAME
)


# =====================================================
# CHUNKING FUNCTION
# =====================================================

def chunk_string(text, chunk_size=512, overlap=24):
    step = chunk_size - overlap
    chunks = []
    metadata = []

    for i in range(0, len(text), step):
        chunk = text[i:i + chunk_size]

        if not chunk:
            continue

        chunks.append(chunk)

        metadata.append({
            "chunk_index": len(chunks) - 1,
            "start_char": i,
            "end_char": i + len(chunk),
            "source": "naive_rag_text"
        })

    return chunks, metadata


# =====================================================
# EMBEDDING FUNCTION (BATCHED)
# =====================================================

def create_embeddings(chunks):
    all_embeddings = []

    for i in range(0, len(chunks), BATCH_SIZE):
        batch = chunks[i:i + BATCH_SIZE]

        response = client.embeddings.create(
            model=EMBED_MODEL,
            input=batch
        )

        batch_embeddings = [item.embedding for item in response.data]
        all_embeddings.extend(batch_embeddings)

        print(f"Embedded batch {i // BATCH_SIZE + 1}")

    return all_embeddings


# =====================================================
# STORE IN CHROMA
# =====================================================

def store_in_chroma(chunks, embeddings, metadatas):
    ids = [str(uuid.uuid4()) for _ in chunks]

    collection.add(
        ids=ids,
        documents=chunks,
        embeddings=embeddings,
        metadatas=metadatas
    )

    print(f"Stored {len(chunks)} chunks in Chroma (In-Memory)")


# =====================================================
# PIPELINE RUNNER
# =====================================================

def ingest_text(text):
    print("Chunking text...")
    chunks, metadatas = chunk_string(
        text,
        chunk_size=CHUNK_SIZE,
        overlap=OVERLAP
    )

    print(f"Total chunks: {len(chunks)}")

    print("Creating embeddings...")
    embeddings = create_embeddings(chunks)

    print("Storing in Chroma...")
    store_in_chroma(chunks, embeddings, metadatas)

    print("Ingestion complete")


# =====================================================
# EXAMPLE USAGE
# =====================================================



ingest_text(naive_tag_text)


Chunking text...
Total chunks: 38
Creating embeddings...
Embedded batch 1
Storing in Chroma...
Stored 38 chunks in Chroma (In-Memory)
Ingestion complete


In [7]:
collection = chroma_client.get_collection("naive_rag_chunks")
def embed_query(query):
    res = client.embeddings.create(
        model="text-embedding-3-small",
        input=query
    )
    return res.data[0].embedding

    
def retrieve_context(query, k=10):
    query_embedding = embed_query(query)

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=k
    )

    
    print(f"Context:\n {results['documents']}")
    return results["documents"][0]

def build_context(chunks):
    return "\n\n".join(chunks)



In [8]:
def rag_query(user_query):
    chunks = retrieve_context(user_query, k=5)
    context = build_context(chunks)

    prompt = f"""
Answer ONLY using the provided context.
If answer is not present, say "I don't know".

Context:
{context}

Question:
{user_query}
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Answer only from context."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2
    )

    return response.choices[0].message.content


In [9]:
query = "Comment on 1991 financial crisis?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["er US$8.5 trillion of foreign exchange reserves – more than half of the world's total, and adding tertiary and quaternary sectors to expand in the share of Asia's economy.\n\n The 1991 Indian economic crisis was an economic crisis in India resulting from a balance of payments deficit due to excess reliance on imports and other external factors. India's economic problems started worsening in 1985 as imports swelled, leaving the country in a twin deficit: the Indian trade balance was in deficit at a time when t", "try's economy. The International Monetary Fund (IMF) suspended its loan program to India, and the World Bank also discontinued its assistance. These actions limited the government's options to address the crisis and forced it to take drastic measures to avoid defaulting on its payments.\nTo address the economic crisis, the government implemented various measures, including the pledge of a significant portion of India's gold reserves to the Bank of England and the U

In [10]:
query = "How did Indian Economy fare under British Empire? "

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and migration.\nFrom 1850 to 1947, India's GDP i", "The economy of India is a developing mixed economy with a notable public sector in strategic sectors. It is the world's fourth-largest economy by nominal GDP and the third-largest by purchasing power parity (PPP); on a per capita income basis, India ranked 136th by nominal GDP and 119th by PPP-adjusted GDP. From independence in 1947 until 1991, successive governments followed the Soviet model and promoted protectionist economic policies, with extensive Sovietization

MULTIHOP QUESTIONS

In [11]:
query = "How did India move from: Colonial dependency economy and Mixed socialist economy, to Liberalized global economy and Regional economic leader in South Asia?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)


Context:
 [['lonial era, South Asia has grown significantly, with India advancing because of economic liberalisation from the 1980s onwards, and extreme poverty now below 15% in the region. South Asia has been the fastest-growing region of the world since 2014.\nDespite projected growth rates of about 6.0-6.1 percent for 2024-2025, South Asia continues to face significant economic challenges. A notable slowdown in private investment, especially in key sectors such as manufacturing and services, poses a major concern. Add', 't began a policy of economic liberalisation. The economy of South Asia comprises 2 billion people (25% of the world population) living in eight countries (though Afghanistan is sometimes excluded). The Indian subcontinent was historically one of the richest regions in the world, comprising 25% of world GDP as recently as 1700, but experienced significant de-industrialisation and a doubling of extreme poverty during the colonial era of the late 18th to mid-20th centu

In [12]:
query = "How did policies from the British Empire economy influence the economic structure India inherited at independence, and how did that contribute to the 1991 crisis??"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["ed by the World Bank and the IMF, which included structural reforms. As a result, the Indian economy was opened up to foreign participation in various sectors, including state-owned enterprises. This move towards liberalisation was seen by some as necessary to secure much-needed funds and prevent defaults on its loan payments. However, it also led to concerns about the impact of foreign entities on India's economy and the potential loss of control over vital industries.\nIndia's liberalisation policies since", 'on, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This was a form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning. India has about 1,900 public sector companies, with the Indian state having complete control and ownership of railways. While the Indian government retains ownership throug

COMPARITIVE QUESTIONS

In [13]:
query = "Where do the economic goals of the Indian National Congress post-independence conflict with the structural economic realities inherited from the British colonial economy?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [['on, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This was a form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning. India has about 1,900 public sector companies, with the Indian state having complete control and ownership of railways. While the Indian government retains ownership through the National Highways Authority of India', "aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and mi

In [14]:
query = "Where do the economic goals of the Indian National Congress post-independence conflict with the structural economic realities inherited from the British colonial economy?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [['on, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This was a form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning. India has about 1,900 public sector companies, with the Indian state having complete control and ownership of railways. While the Indian government retains ownership through the National Highways Authority of India', "aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and mi

Counterfactual, (what if Scenario questions) Questions

In [15]:
query = "If the British Empire had industrialized India instead of structuring it as an extraction economy, which economic constraints leading to the 1991 crisis would likely not have existed?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and migration.\nFrom 1850 to 1947, India's GDP i", 'uring centers in international trade. This growth of manufacturing has been seen as a form of proto-industrialization, similar to 18th-century Western Europe prior to the Industrial Revolution. Indian subcontinent went through a period of deindustrialization in the latter half of the 18th century as an indirect outcome of the collapse of the Mughal Empire, and that British rule later caused further deindustrialization.\nIndia experienced deindustrialisation and cess

In [16]:
query = "If India had responded to the 1991 crisis with protectionism instead of liberalization, would its economy still dominate South Asia today?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["ed by the World Bank and the IMF, which included structural reforms. As a result, the Indian economy was opened up to foreign participation in various sectors, including state-owned enterprises. This move towards liberalisation was seen by some as necessary to secure much-needed funds and prevent defaults on its loan payments. However, it also led to concerns about the impact of foreign entities on India's economy and the potential loss of control over vital industries.\nIndia's liberalisation policies since", 't began a policy of economic liberalisation. The economy of South Asia comprises 2 billion people (25% of the world population) living in eight countries (though Afghanistan is sometimes excluded). The Indian subcontinent was historically one of the richest regions in the world, comprising 25% of world GDP as recently as 1700, but experienced significant de-industrialisation and a doubling of extreme poverty during the colonial era of the late 18th to mid-20th centu

The BIG PICTURE Questions

In [17]:
query = "How did political ideology, historical constraints, and economic crises collectively shape India’s modern economic identity?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["ed by the World Bank and the IMF, which included structural reforms. As a result, the Indian economy was opened up to foreign participation in various sectors, including state-owned enterprises. This move towards liberalisation was seen by some as necessary to secure much-needed funds and prevent defaults on its loan payments. However, it also led to concerns about the impact of foreign entities on India's economy and the potential loss of control over vital industries.\nIndia's liberalisation policies since", 'on, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This was a form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning. India has about 1,900 public sector companies, with the Indian state having complete control and ownership of railways. While the Indian government retains ownership throug

In [18]:
query = "What single unified causal narrative best explains India’s economic evolution from colonial rule to global integration?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and migration.\nFrom 1850 to 1947, India's GDP i", "0 to 1947, India's GDP in 1990 international dollar terms grew from $125.7 billion to $213.7 billion, a 70% increase, or an average annual growth rate of 0.55%. In 1820, India's GDP was 16% of the global GDP. By 1870, it had fallen to 12%, and by 1947 to 4%.\nThe Republic of India, founded in 1947, adopted central planning for most of its independent history, with extensive public ownership, regulation, red tape and trade barriers. After the 1991 economic crisis, th

TEMPORAL CAUSAL LINKAGE QUESTIONS

In [19]:
query = "Trace the causal path from British colonial economic policies to the 1991 Indian economic crisis, naming intermediate economic structures?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [['on, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This was a form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning. India has about 1,900 public sector companies, with the Indian state having complete control and ownership of railways. While the Indian government retains ownership through the National Highways Authority of India', "er US$8.5 trillion of foreign exchange reserves – more than half of the world's total, and adding tertiary and quaternary sectors to expand in the share of Asia's economy.\n\n The 1991 Indian economic crisis was an economic crisis in India resulting from a balance of payments deficit due to excess reliance on imports and other external factors. India's economic problems started worsening in 1985 as imports swelled, leaving the country in a twin deficit: the Indian tr

DEPENDENCY/RELATIONSHIPS QUESTIONS

In [20]:
query = "Which components of India’s modern economic structure are most dependent on colonial-era trade and institutional arrangements, and which are least dependent?"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [["aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and migration.\nFrom 1850 to 1947, India's GDP i", 'uring centers in international trade. This growth of manufacturing has been seen as a form of proto-industrialization, similar to 18th-century Western Europe prior to the Industrial Revolution. Indian subcontinent went through a period of deindustrialization in the latter half of the 18th century as an indirect outcome of the collapse of the Mughal Empire, and that British rule later caused further deindustrialization.\nIndia experienced deindustrialisation and cess

In [21]:
query = "Identify economic policies where the Indian National Congress indirectly reinforced colonial-era economic structures"

answer = rag_query(query)

print("\n\n ANSWER:",answer)

Context:
 [['on, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This was a form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning. India has about 1,900 public sector companies, with the Indian state having complete control and ownership of railways. While the Indian government retains ownership through the National Highways Authority of India', "aft industries under British rule, which along with fast economic and population growth in the Western world, resulted in India's share of the world economy declining from 24.4% in 1700 to 4.2% in 1950, and its share of global industrial output declining from 25% in 1750 to 2% in 1900. Due to its ancient history as a trading zone and later its colonial status, colonial India remained economically integrated with the world, with high levels of trade, investment and mi