In [4]:
from pymilvus import connections, db, utility, FieldSchema, CollectionSchema, DataType, Collection
import ollama

In [14]:
def get_embedding(model:str, prompt: str) -> list:
    response = ollama.embeddings(model=model, prompt=prompt)
    return response.get('embedding', [])


em = get_embedding(model="nomic-embed-text:v1.5", prompt="Hello world")
len(em)

768

In [3]:
conn = connections.connect(host='localhost', port='19530')

In [4]:
db.list_database()

['default', 'JobOrder']

In [5]:
db.create_database('JobOrder')

2025-06-21 23:10:01,705 [ERROR][handler]: RPC error: [create_database], <MilvusException: (code=65535, message=database already exist: JobOrder)>, <Time:{'RPC start': '2025-06-21 23:10:01.701086', 'RPC error': '2025-06-21 23:10:01.705883'}> (decorators.py:140)


MilvusException: <MilvusException: (code=65535, message=database already exist: JobOrder)>

In [6]:
db.using_database('JobOrder')

In [7]:
utility.list_collections()

[]

In [15]:
id_feild = FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True)
clinet_name_feild = FieldSchema(name='client_name', dtype=DataType.VARCHAR, max_length=100)
job_title_feild = FieldSchema(name='job_title', dtype=DataType.VARCHAR, max_length=100)
job_desc_feild = FieldSchema(name='job_desc', dtype=DataType.VARCHAR, max_length=10000)
job_desc_embedding_feild = FieldSchema(name='job_desc_embedding', dtype=DataType.FLOAT_VECTOR, dim=768)

job_oreder_schema = CollectionSchema([id_feild, clinet_name_feild, job_title_feild, job_desc_feild, job_desc_embedding_feild])

In [16]:
collection = Collection(name='job_order', schema=job_oreder_schema)

In [17]:
index_params = {
    'metric_type': 'L2',
    'index_type': 'HNSW',
    'params': {
        'M': 16,
        'efConstruction': 200
    }
}

collection.create_index(field_name='job_desc_embedding', index_params=index_params)

Status(code=0, message=)

In [20]:
collection.load()

In [30]:
sample_job_orders = {
    'client_name': [
        'Sammy',
        'Bob',
        'Charlie',
        'Diana'
    ],
    'job_title': [
        'Software Engineer',
        'Data Scientist',
        'Backend Developer',
        'Frontend Developer'
    ],
    'job_desc': [
        '''
        As a Software Engineer, your responsibilities will include writing clean and maintainable code, collaborating with team members to define software requirements, participating in code reviews, troubleshooting and upgrading existing software, and ensuring the performance and responsiveness of applications. You will also be expected to stay current with emerging technologies and industry trends.
        ''',
        '''
        As a Data Scientist, you will analyze large amounts of raw information to find patterns that will help improve our company. We will rely on you to build data products to extract valuable business insights.
        ''',
        '''
        As a Backend Developer, you will design and implement server-side logic, maintain databases, and ensure high performance and responsiveness to requests from the front-end.
        ''',
        '''
        As a Frontend Developer, you will be responsible for implementing visual elements that users see and interact with in a web application, ensuring a seamless user experience.
        '''
    ]
}

# Example: To add embeddings for each job_desc
sample_job_orders['job_desc_embedding'] = [
    get_embedding(model='nomic-embed-text:v1.5', prompt=desc)
    for desc in sample_job_orders['job_desc']
]


In [31]:
collection.insert([sample_job_orders['client_name'], sample_job_orders['job_title'], sample_job_orders['job_desc'], sample_job_orders['job_desc_embedding']])

(insert count: 4, delete count: 0, upsert count: 0, timestamp: 458890654672224259, success count: 4, err count: 0

In [36]:
result = collection.query(expr="id >= 0", output_fields=["id", "client_name", "job_title", "job_desc", "job_desc_embedding"])
for item in result:
    print(item.get('id'))

458886263163361655
458886263163361656
458886263163361657
458886263163361658


In [35]:
ids_to_delete = [458886263163361653]
expr = f"id in {ids_to_delete}"
collection.delete(expr=expr)

(insert count: 0, delete count: 1, upsert count: 0, timestamp: 458890677751644162, success count: 0, err count: 0

In [39]:
sample_job_order = {
    'client_name': ['Alice'],
    'job_title': ['DevOps Engineer'],
    'job_desc': ['''As a DevOps Engineer, you will be responsible for automating and streamlining operations and processes, building and maintaining tools for deployment, monitoring, and operations, and troubleshooting and resolving issues in our development, test, and production environments.
''']
}
sample_job_order['job_desc_embedding'] = [get_embedding(model='nomic-embed-text:v1.5', prompt=sample_job_order['job_desc'][0])]
collection.insert([[458886263163361653], sample_job_order['client_name'], sample_job_order['job_title'], sample_job_order['job_desc'], sample_job_order['job_desc_embedding']])

DataNotMatchException: <DataNotMatchException: (code=1, message=The data doesn't match with schema fields, expect 4 list, got 5)>

In [12]:
%pip install --quiet --upgrade langgraph


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
pip install -qU "langchain[google-genai]"

Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pymilvus 2.5.11 requires grpcio<=1.67.1,>=1.49.1, but you have grpcio 1.73.1 which is incompatible.

[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
pip install -qU langchain-ollama

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# pip install bs4

In [2]:
pip install -qU langchain-milvus

Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
grpcio-status 1.73.1 requires grpcio>=1.73.1, but you have grpcio 1.67.1 which is incompatible.

[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import getpass
import os

if os.environ.get("GOOGLE_API_KEY") == "":
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.0-flash", model_provider="google_genai")

In [6]:
from langchain_ollama import OllamaEmbeddings
embedding = OllamaEmbeddings(model="nomic-embed-text:v1.5")

In [7]:
from langchain_milvus import Milvus

vector_store = Milvus(
    embedding_function=embedding,
    collection_name="candidates",
    connection_args={
        "host": os.environ.get("milvus_db_host"),
        "port": os.environ.get("milvus_db_port"),
        "db_name": "ResumeMatcher"
    },
    index_params={"index_type": "FLAT", "metric_type": "L2"},
)

2025-06-28 15:00:34,459 [DEBUG][_create_connection]: Created new connection using: fe00b69d2899436ea47932da9ea04b5d (async_milvus_client.py:599)


In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

# Load and chunk contents of the blog
loader = PyPDFLoader(r"D:\Work\LLM task\ResumeMatcher\Sample Resume\Dhinesh_Resume.pdf")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

def clean_metadata(metadata):
    return {k.replace('.', '_'): v for k, v in metadata.items() if k.replace('.', '_').isalnum() or '_' in k}

for doc in all_splits:
    doc.metadata = clean_metadata(doc.metadata)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)


# Define prompt for question-answering
# N.B. for non-US LangSmith endpoints, you may need to specify
# api_url="https://api.smith.langchain.com" in hub.pull.


2025-06-27 16:00:12,999 [ERROR][handler]: RPC error: [insert_rows], <DataNotMatchException: (code=1, message=Attempt to insert an unexpected field `text` to collection without enabling dynamic field)>, <Time:{'RPC start': '2025-06-27 16:00:12.994913', 'RPC error': '2025-06-27 16:00:12.999424'}> (decorators.py:140)
Failed to insert batch starting at entity: 1/3. First entity data: {'text': 'DHINESH V\n+91 88078 77786 ⋄ Kadpadi, Vellore\ndhineshvkumar2003@gmail.com ⋄ LinkedIN ⋄ GitHub\nOBJECTIVE\nI am passionated in the IT field where I can leverage my technical skills, gain valuable experience, and contribute to\nthe company’s success. My goal is to be part of a dynamic team, drive innovative solutions, and make a significant\nimpact on the organization’s growth and development.\nEDUCATION\nMaster of Computer Application , Vellore Institute of Technology 2023 - 2025\nCumulative GPA: 9.16/10.0\nRelevant Coursework:\nProgramming in Java(advanced), Python Programming, Machine Learning, Mob

DataNotMatchException: <DataNotMatchException: (code=1, message=Attempt to insert an unexpected field `text` to collection without enabling dynamic field)>

In [47]:
from langchain import hub
from typing_extensions import List, TypedDict
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph

prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"], k=5)
    # print(retrieved_docs)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(f'chunk: {doc.page_content}\nMetadata: {doc.metadata}'for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()



In [53]:
response = graph.invoke({"question": "I want candidate names who has AWS certification?"})
print(response["answer"])

Candidate 4 has an AWS Certified Machine Learning – Specialty (2020) certification. Candidate 2 has an AWS Certified Developer - Associate Certificate. I don't have information about other candidates.


langchain_core.prompts.chat.ChatPromptTemplate

In [27]:
print(all_splits)

[Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-06-12T11:47:32+00:00', 'author': '', 'keywords': '', 'moddate': '2025-06-12T11:47:32+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'D:\\Work\\LLM task\\ResumeMatcher\\Sample Resume\\Dhinesh_Resume.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='DHINESH V\n+91 88078 77786 ⋄ Kadpadi, Vellore\ndhineshvkumar2003@gmail.com ⋄ LinkedIN ⋄ GitHub\nOBJECTIVE\nI am passionated in the IT field where I can leverage my technical skills, gain valuable experience, and contribute to\nthe company’s success. My goal is to be part of a dynamic team, drive innovative solutions, and make a significant\nimpact on the organization’s growth and development.\nEDUCATION\nMaster of Computer Application , Vellore Institute of Technology 2023 - 2025\nCumulative G

In [52]:
print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]
