In [1]:
import ollama
import chromadb
import csv
from langchain.schema import Document

In [2]:
def load_documents():
    docs = []
    # Define the columns we want to embed vs which ones we want in metadata
    columns_to_embed = ["College","Branch","Category","Gender","OpeningRank","ClosingRank","Year", "Round"]
    columns_to_metadata = ["College","Branch","Category","Gender","Year", "Round"]
    with open('iit-closing-ranks.csv', newline="", encoding='utf-8-sig') as csvfile:
        csv_reader = csv.DictReader(csvfile)
        for i, row in enumerate(csv_reader):
            to_metadata = {col: row[col] for col in columns_to_metadata if col in row}
            values_to_embed = {k: row[k] for k in columns_to_embed if k in row}
            to_embed = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in values_to_embed.items())
            newDoc = Document(page_content=to_embed, metadata=to_metadata)
            docs.append(newDoc)
    print(f"Loaded {len(docs)} documents.")
    return docs

In [67]:
client = chromadb.PersistentClient("chroma_db_v2")
collection = client.get_or_create_collection(name="docs_corrected_v2")

In [68]:
documents = load_documents()

Loaded 30458 documents.


In [69]:
documents[0]

Document(metadata={'College': 'Indian Institute  of Technology Bhubaneswar', 'Branch': 'Civil Engineering (4 Years Bachelor of Technology)', 'Category': 'OPEN', 'Gender': 'Gender-Neutral', 'Year': '2023'}, page_content='College: Indian Institute  of Technology Bhubaneswar\nBranch: Civil Engineering (4 Years Bachelor of Technology)\nCategory: OPEN\nGender: Gender-Neutral\nOpeningRank: 9462\nClosingRank: 12911\nYear: 2023')

In [7]:
# Step-1 - Create Embeddings
# # store each document in a vector embedding database
print("Creating embeddings for documents...")
for i, d in enumerate(documents):
#   print(d.type)
#   print(d.page_content)
  response = ollama.embed(model="nomic-embed-text:latest", input=d.page_content)
  embeddings = response["embeddings"]
  collection.add(
    ids=[str(i)],
    embeddings=embeddings,
    documents=[d.page_content],
    metadatas=[d.metadata]
  )
  # add log for every 1000 documents
  if i % 1000 == 0:
      print(f"Processed {i} documents.")

print("Embeddings created and stored in the collection.")

Creating embeddings for documents...
Processed 0 documents.
Processed 1000 documents.
Processed 2000 documents.
Processed 3000 documents.
Processed 4000 documents.
Processed 5000 documents.
Processed 6000 documents.
Processed 7000 documents.
Processed 8000 documents.
Processed 9000 documents.
Processed 10000 documents.
Processed 11000 documents.
Processed 12000 documents.
Processed 13000 documents.
Processed 14000 documents.
Processed 15000 documents.
Processed 16000 documents.
Processed 17000 documents.
Processed 18000 documents.
Processed 19000 documents.
Processed 20000 documents.
Processed 21000 documents.
Processed 22000 documents.
Processed 23000 documents.
Processed 24000 documents.
Processed 25000 documents.
Processed 26000 documents.
Processed 27000 documents.
Processed 28000 documents.
Processed 29000 documents.
Processed 30000 documents.
Embeddings created and stored in the collection.


In [56]:
input_prompt = "What is cutoff for IIT Bhubaneswar Civil in 2023 for EWS category female?"


In [57]:
# generate an embedding for the input and retrieve the most relevant doc
input_prompt_embedding = ollama.embed(
  model="nomic-embed-text:latest",
  input=input_prompt
)

In [58]:
input_prompt_embedding

EmbedResponse(model='nomic-embed-text:latest', created_at=None, done=None, done_reason=None, total_duration=48263250, load_duration=8804958, prompt_eval_count=22, prompt_eval_duration=None, eval_count=None, eval_duration=None, embeddings=[[-0.013225602, 0.017746378, -0.14077987, 0.0068097888, 0.04106113, 0.07477679, 0.064623944, -0.035706736, -0.022891425, 0.06475187, -0.014319079, 0.03102917, 0.107436955, 0.010460729, 0.010427102, -0.017433656, -0.004301423, -0.042128216, -0.02142775, 0.010631569, -0.06982596, -0.015255871, -0.041722927, 0.008139004, 0.10330227, 0.06252138, 0.044583928, 0.04567022, -0.009122506, 0.02202899, 0.0139455255, -0.0024045806, 0.0051151216, -0.0633241, 0.013715263, -0.0410565, 0.05018297, -0.014090066, -0.033158254, -0.06405512, 0.11343658, -0.034406826, 0.015446071, -0.03649666, 0.012203893, 0.03649368, 0.05556488, -0.0071874065, 0.05837849, 0.016281119, -0.008885176, -0.0057657557, 0.01649967, -0.009789251, 0.023660036, 0.009418236, -0.09176736, 0.010364407

In [59]:
print(f"Retrieving relevant document for the input prompt {input_prompt}.")
results = collection.query(
  query_embeddings=input_prompt_embedding["embeddings"],
  n_results=10
)

Retrieving relevant document for the input prompt What is cutoff for IIT Bhubaneswar Civil in 2023 for EWS category female?.


In [60]:
data = results['documents'][0][0]

In [61]:
results['documents']

[['College: Indian Institute  of Technology Bhubaneswar\nBranch: Civil Engineering (4 Years Bachelor of Technology)\nCategory: EWS\nGender: Female-only (including Supernumerary)\nOpeningRank: 3056\nClosingRank: 3056\nYear: 2023',
  'College: Indian Institute  of Technology Bhubaneswar\nBranch: Civil Engineering (4 Years Bachelor of Technology)\nCategory: EWS\nGender: Female-only (including Supernumerary)\nOpeningRank: 3056\nClosingRank: 3056\nYear: 2023',
  'College: Indian Institute  of Technology Bhubaneswar\nBranch: Civil Engineering (4 Years Bachelor of Technology)\nCategory: EWS\nGender: Female-only (including Supernumerary)\nOpeningRank: 3056\nClosingRank: 3056\nYear: 2023',
  'College: Indian Institute  of Technology Bhubaneswar\nBranch: Civil Engineering (4 Years Bachelor of Technology)\nCategory: EWS\nGender: Female-only (including Supernumerary)\nOpeningRank: 3056\nClosingRank: 3056\nYear: 2023',
  'College: Indian Institute  of Technology Bhubaneswar\nBranch: Civil Engineeri

In [62]:
print(f"Retrieved data: {data}")

Retrieved data: College: Indian Institute  of Technology Bhubaneswar
Branch: Civil Engineering (4 Years Bachelor of Technology)
Category: EWS
Gender: Female-only (including Supernumerary)
OpeningRank: 3056
ClosingRank: 3056
Year: 2023


In [63]:
## Step-3 - generate a response combining the prompt and data we retrieved in step 2
print(f"Generating response using the retrieved data and input prompt.")

output = ollama.generate(
  model="gemma3:12b",
  prompt=f"You are given below information about closing rank in a college: {data}. \
    Using above information, respond to this prompt: {input_prompt}"
)

print(output['response'])

Generating response using the retrieved data and input prompt.
Based on the provided information, the cutoff (or closing rank) for IIT Bhubaneswar Civil Engineering in 2023 for the EWS category female is **3056**.



The opening and closing ranks are the same, indicating that the seat was filled at that rank.


In [64]:
input_prompt2 = "my rank is 220. general category, male. where can I get cse branch?"
input_prompt_embedding2 = ollama.embed(
  model="nomic-embed-text:latest",
  input=input_prompt2
)
results2 = collection.query(
  query_embeddings=input_prompt_embedding2["embeddings"],
  n_results=1
)

In [65]:
data2 = results2['documents'][0][0]

In [66]:

output2 = ollama.generate(
  model="gemma3:12b",
  prompt=f"You are given below information about closing rank in a college: {data2}. \
    Using above information, respond to this prompt: {input_prompt2}"
)

print(output2['response'])

Okay, let's break down your situation and see about getting into a CSE (Computer Science and Engineering) branch.

**Understanding the IIT Kharagpur 2024 Data**

The provided information shows the closing rank for SC (PwD) - Gender Neutral students in ECE at IIT Kharagpur in 2024 was 9. This is *extremely* low, reflecting a very specific category and disability status.  **This does not give us much information about your chances.**

**Your Situation (General Category, Male, Rank 220)**

You're in a significantly different position. You're a General Category, Male candidate with a rank of 220. This means you're competing with the largest and most competitive pool of applicants.

**Can you get CSE at IIT Kharagpur with a Rank of 220?**

*   **Highly Unlikely, but not Impossible:** Based on past trends, getting CSE at IIT Kharagpur with a General Category, Male rank of 220 would be a significant challenge. Closing ranks for CSE at IIT Kharagpur for General Category, Male students are cons