In [1]:
import boto3

In [10]:
session = boto3.session.Session(profile_name='aditya')
s3_vector_client = session.client("s3vectors", region_name="us-east-1")

In [45]:
# bedrock client 
bedrock = session.client("bedrock-runtime", region_name="us-east-1")


In [46]:
# load the resume text from a file
with open(r"../data/resume.txt", "r", encoding="utf-8") as f:
    resume_text = f.read()

In [47]:
# chunking the resume text to smaller text


'''
    Heer we are defining a function to chunk the resume on the basis of chunk size and overlap.
    The chunk_size and overlap has signinficant impact on the performance of the RAG system.
    If the chunk size is too small, then the system will not be able to capture the context of the resume and will not be able to answer the questions accurately.
    If the chunk size is too large, then the system will not be able to process the resume efficiently and will take more time to answer the questions.
    The overlap is used to capture the context of the resume and to ensure that the system can answer the questions accurately.
'''
def chunk_text(text, chunk_size = 500, overlap = 50):
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start += chunk_size - overlap
        
    return chunks

resume_chunks = chunk_text(resume_text)

In [None]:
import json

def get_embbeddings(text):
    response = bedrock.invoke_model(
        modelId = "amazon.titan-embed-text-v2:0", 
        body = json.dumps({"inputText": text})
    )
    # print(response)
    result = json.loads(response['body'].read())

    return result['embedding']

In [None]:
# create the vector bucket 
s3_vector_client.create_vector_bucket(
    vectorBucketName="resume-rag-bucket"
)

In [None]:
s3_vector_client.delete_index(
    vectorBucketName = "resume-rag-bucket",
    indexName = "resume-index"
)

In [None]:
s3_vector_client.create_index(
    vectorBucketName = "resume-rag-bucket",
    indexName = "resume-index",
    dimension = 1024,
    distanceMetric = "cosine",
    dataType = "float32"
)

In [None]:
vectors = []

for i, chunk in enumerate(resume_chunks):
    embedding = get_embbeddings(chunk)

    vectors.append({
        "key": f"chunk-{i}",
        "data": {
            "float32": embedding
        },
        "metadata": {
            "text": chunk
        }
    })


s3_vector_client.put_vectors(
    vectorBucketName="resume-rag-bucket",
    indexName="resume-index",
    vectors=vectors
)


In [116]:
def search_similar(query, top_k=3):

    query_embedding = get_embbeddings(query)

    response = s3_vector_client.query_vectors(
        vectorBucketName="resume-rag-bucket",
        indexName="resume-index",
        queryVector={
            "float32": query_embedding
        },
        topK=top_k,
        returnMetadata=True   # IMPORTANT
    )


    print(response["vectors"])  # keep during debugging

    contexts = []

    for item in response["vectors"]:
        meta = item.get("metadata") or item.get("attributes")

        if meta and "text" in meta:
            contexts.append(meta["text"])
    print(contexts)  # keep during debugging
    return "\n".join(contexts)


In [117]:
def ask_llm(question, context):

    prompt = f"""
You are an AI assistant answering questions based on resume data.

Context:
{context}

Question:
{question}

Answer clearly:
"""

    response = bedrock.invoke_model(
        modelId="amazon.nova-pro-v1:0",
        body=json.dumps({
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "text": prompt
                        }
                    ]
                }
            ]
        })
    )

    result = json.loads(response['body'].read())

    return result["output"]["message"]["content"][0]["text"]


In [118]:
def ask_resume(question):
    context = search_similar(question)
    answer = ask_llm(question, context)
    return answer


In [120]:
ask_resume("what are the key skills in the resume and who is there ?")


{'ResponseMetadata': {'RequestId': 'b595f121-e190-4c3d-8dd9-a7f463beeb44', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 17 Feb 2026 17:46:36 GMT', 'content-type': 'application/json', 'content-length': '43371', 'connection': 'keep-alive', 'x-amzn-requestid': 'b595f121-e190-4c3d-8dd9-a7f463beeb44', 'x-amzn-bedrock-invocation-latency': '94', 'x-amzn-bedrock-input-token-count': '14'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'body': <botocore.response.StreamingBody object at 0x000001514F7A8CD0>}
[{'key': 'chunk-14', 'metadata': {'text': 'DO NOT INCLUDE IN 1 PAGE RESUME, CAN BE TAILORED FOR JOB SPECIFIC ROLES]\n\nKaafila.org \t   \t    \t\t\t\t\t\t\t\t\t\t    Sep 2021- Oct 2021\n●\tDeveloped and designed more than 20 components in the ReactJs. \n●\tExecuted the components using the Bootstrap library for frontend.\n\n\nYouTube Transcript Summarizer, Team Leader \t\t\t\t\t\t\t\tNov 2021 — Dec 2021\n●\tApplied the YouTube Transcript API to fetch the transcripts from the 

'The key skills and certifications listed in the resume are as follows:\n\n**Key Skills:**\n1. **Programming Languages and Frameworks:**\n   - ReactJs\n   - Python\n   - Bootstrap library\n\n2. **Libraries and Tools:**\n   - spaCy library\n   - Docker\n   - Postman\n   - Git\n   - GitHub\n   - Ollama\n   - Microservices\n   - Restful APIs\n   - Prompt Engineering\n   - Gen AI\n   - CI/CD\n   - AIops\n   - LLMops\n\n3. **Cloud Platforms:**\n   - AWS\n   - Azure\n\n4. **Problem-Solving:**\n   - Solved more than 800 problems on LeetCode, GeeksforGeeks (GFG), and CodeChef.\n\n5. **Operating Systems and Databases:**\n   - Linux\n   - DBMS\n   - OS\n   - Computer Networks\n\n**Certifications:**\n1. AWS Solution Architect Associate (SAA-C02)\n2. AWS Developer Associate (Note: The certification name seems to be incomplete in the provided text)\n\n**Education:**\n- Bachelor of Technology in Computer Science Engineering (CSE) from Acropolis Institute of Technology and Research, Indore, with a GP