In [1]:
import boto3
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests_aws4auth import AWS4Auth
from opensearchpy import OpenSearch, RequestsHttpConnection
from dotenv import load_dotenv
import os

In [2]:
# Khởi tạo client Bedrock Runtime
bedrock = boto3.client("bedrock-runtime", region_name="ap-southeast-2")
model_id = "amazon.titan-embed-text-v2:0"
load_dotenv()

True

In [3]:
cv_json = {
    "full_name": "Truong Quoc Huan",
    "email_address": "truonghuan0709@gmail.com",
    "phone_number": "+84 335597676",
    "location": "Ho Chi Minh City, Viet Nam",
    "summary": "Over 2 years of experience in programming with good communication and quick learning skills. Strengths include back-end web application development, with solid expertise in backend technologies and architecture design. Experienced in working in Agile environments using Scrum methodology.",
    "skills": [
        "JavaScript",
        "TypeScript",
        "Go",
        "Node (ExpressJS, NestJS)",
        "Go (Gin)",
        "MySQL",
        "PostgreSQL",
        "NoSQL (MongoDB)",
        "API & DevOps: RESTful API, gRPC, Swagger/OpenAPI, Postman, Docker, CI/CD (GitHub Actions, Jenkins)",
        "Infrastructure: Kubernetes (K8s), Apache, Nginx, Redis, Memcached, Queue systems, Logging systems, Cronjobs",
        "Monitoring & Automation: Zabbix, Selenium"
    ],
    "work_experience": [
        {
            "job_title": "DevOps Engineer",
            "company": "Personal Project",
            "duration": "04/2025 - Present",
            "description": "Learn and practice devops project. System implementation and maintenance."
        },
        {
            "job_title": "Developer",
            "company": "Practice and final project for graduate scheme",
            "duration": "01/2025 - 04/2025",
            "description": "Developer for a series of practical laboratory exercises focused on distributed systems concepts and implementations."
        },
        {
            "job_title": "Backend developer",
            "company": "Personal Project",
            "duration": "12/2024 - 04/2025",
            "description": "Project to learn and practice developing backend applications with Go language with the content of building a train ticket sales website."
        },
        {
            "job_title": "Backend developer",
            "company": "Personal Project",
            "duration": "09/2024 - 03/2025",
            "description": "Modular backend system using NestJS, MongoDB, JWT Auth and Swagger. The project is suitable for recruitment, user management, and enterprise platforms."
        },
        {
            "job_title": "Backend developer",
            "company": "College graduation project",
            "duration": "02/2024 - 06/2024",
            "description": "Student management website and uses AI to generate SQL questions for database subjects."
        }
    ],
    "education": [
        {
            "degree": "Major - Information System",
            "institution": "VNU - Ho Chi Minh University of Science",
            "graduation_year": "2024/09"
        }
    ],
    "certifications": [
        "Not Available"
    ],
    "languages": [
        "English"
    ],
    "projects": [
        {
            "name": "ElroyDevops",
            "description": "Learn and practice devops project.",
            "duration": "04/2025 - Present",
            "technologies_used": [
                "ReactJS",
                "Java Spring Boot",
                "MariaDB",
                "Docker",
                "Kubernetes",
                "Zabbix"
            ]
        },
        {
            "name": "Distributed System",
            "description": "This repository contains a series of practical laboratory exercises focused on distributed systems concepts and implementations.",
            "duration": "01/2025 - 04/2025",
            "technologies_used": [
                "Go (Golang)",
                "TCP/IP sockets",
                "Remote Procedure Call (RPC)",
                "Message Queue patterns",
                "Consistent Hashing algorithm",
                "Two-Phase Commit (2PC) protocol"
            ]
        },
        {
            "name": "Go to Work",
            "description": "Project to learn and practice developing backend applications with Go language with the content of building a train ticket sales website.",
            "duration": "12/2024 - 04/2025",
            "technologies_used": [
                "Golang",
                "Gin framework",
                "MySQL",
                "GORM",
                "Redis",
                "Kafka",
                "Docker",
                "Docker Compose",
                "Makefile"
            ]
        },
        {
            "name": "NestProject",
            "description": "Modular backend system using NestJS, MongoDB, JWT Auth and Swagger. The project is suitable for recruitment, user management, and enterprise platforms.",
            "duration": "09/2024 - 03/2025",
            "technologies_used": [
                "NestJS",
                "TypeScript",
                "MongoDB (Mongoose)",
                "JWT Auth",
                "Swagger",
                "EJS"
            ]
        },
        {
            "name": "SIEU-DAO-CHICH GAME",
            "description": "Student management website and uses AI to generate SQL questions for database subjects.",
            "duration": "02/2024 - 06/2024",
            "technologies_used": [
                "Node.js",
                "Express.js",
                "Sequelize",
                "Passport.js",
                "JWT",
                "bcrypt",
                "dotenv",
                "Nodemailer",
                "EJS",
                "PostgreSQL/MySQL"
            ]
        }
    ],
    "references": [
        "Not Available"
    ],
    "linkedin_profile": "Not Available",
    "github_profile": "Not Available",
    "portfolio_website": "Not Available"
}

In [4]:
cv_fields = ["role", "work_experience", "skills", "summary", "projects"]
jd_fields = ["job_title", "responsibilities", "requirements"]

In [5]:
def get_bedrock_emb(text: str):
    if not text.strip():
        return None
    body = json.dumps({
        "inputText": text
    })
    response = bedrock.invoke_model(
        modelId=model_id,
        body=body,
        contentType="application/json",
        accept="application/json"
    )
    result = json.loads(response["body"].read().decode())
    return result["embedding"]

In [6]:
def emb_json_fields(json_data: dict, fields: list, max_workers: int = 6):
    embeddings = {}
    def process_field(field):
        value = json_data.get(field, "")
        
        if isinstance(value, list):
            text = " ".join([json.dumps(item) if isinstance(item, dict) else str(item) for item in value])
        elif isinstance(value, dict):
            text = json.dumps(value)
        else:
            text = str(value)
        
        vector = get_bedrock_emb(text)
        return field, vector
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_field, field): field for field in fields}
        for future in as_completed(futures):
            field = futures[future]
            try:
                field, vector = future.result()
                embeddings[f"Embed_{field}"] = vector
            except Exception as e:
                print(f"Error processing field {field}: {e}")
                embeddings[field] = None
    return embeddings


In [7]:
cv_embeddings = emb_json_fields(cv_json, cv_fields)
cv_json.update(cv_embeddings)

In [8]:
#save cv_data to json file
with open("cv_data_with_embeddings.json", "w") as f:
    json.dump(cv_json, f, indent=4)

In [9]:
region = "ap-southeast-2"
service = "es"
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key,
                   credentials.secret_key,
                   region, service,
                   session_token=credentials.token)

In [10]:
open_search_domain = os.getenv("OPENSEARCH_DOMAIN_ENDPOINT")
open_search_domain = open_search_domain.replace("https://", "")
print("OpenSearch Domain:", open_search_domain)

OpenSearch Domain: search-cv-job-database-sw3qxa7bamcbgr7cu3uafl2cny.aos.ap-southeast-2.on.aws


In [11]:
client = OpenSearch(
    hosts=[{"host": open_search_domain, "port": 443}],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection
)

In [15]:
info = client.info()
print("✅ Connected to:", info["cluster_name"])

✅ Connected to: 477476326438:cv-job-database
