### API KEY Load

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

### Data Preprocess

In [None]:
import os
import pandas as pd

JD_csv = pd.read_csv('./data/USA_jobs_total.csv')

# num_urgent_words - 이건 무슨 칼럼인지 모르겠움..
# column drop - interval, min_amount, max_amount, currency, num_urgent_words, benefits, emails
JD_csv.drop(['interval', 'min_amount', 'max_amount', 'currency', 'num_urgent_words', 'benefits', 'emails'], axis=1, inplace=True)

# description null인 행만 지우기
JD_csv.dropna(subset=['description', 'is_remote'], inplace=True)

# is_remote null 값 처리 - description을 llm에 전달한 후 job_type 넣기
JD_csv['job_type'] = JD_csv['job_type'].fillna('fulltime')

JD_csv.info()

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 0 to 39
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   job_url      20 non-null     object
 1   site         20 non-null     object
 2   title        20 non-null     object
 3   company      20 non-null     object
 4   company_url  20 non-null     object
 5   location     20 non-null     object
 6   job_type     20 non-null     object
 7   date_posted  20 non-null     object
 8   is_remote    20 non-null     object
 9   description  20 non-null     object
dtypes: object(10)
memory usage: 1.7+ KB


### null값 채우기 - LLM이용하여 판단하기

In [None]:
# job_type_null= JD_csv[JD_csv['job_type'].isnull()]['description'].to_list()
# job_type_null

["Riverbed. Empower the Experience:\n\nRiverbed, the leader in AI observability, helps organizations optimize their\nuser's experiences by leveraging AI automation for the prevention,\nidentification, and resolution of IT issues. With over 20 years of experience\nin data collection and AI and machine learning, Riverbed's open and **AI-\npowered observability platform** and solutions optimize digital experiences\nand greatly improves IT efficiency. Riverbed also offers industry-leading\n**Acceleration solutions** that provide fast, agile, secure acceleration of\nany app, over any network, to users anywhere. Together with our thousands of\nmarket-leading customers globally - including 95% of the FORTUNE 100 - we are\nempowering next-generation digital experiences.\n\nPosition:\n\n**Title:** Solutions Engineer\n\n **Location:** Remote/Home Office - greater Chicago, IL or Michigan.\n\n **Technology Specialization:** Unified Observability\n\n  \n\nWe are seeking a Solutions Engineer for imm

In [None]:
# from langchain_openai import ChatOpenAI
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.prompts import PromptTemplate

# prompt = PromptTemplate.from_template('''
# {context}

# Can you tell what type of job this is by reading this article? What type do you think it is: contract, full-time, intern, or part-time? 
# Answer with only one word out of the four. And if you don't know, just say you don't know.
# ''')

# llm = ChatOpenAI(model='gpt-3.5-turbo')

# chain = (
#     {"context": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# results = []
# for context in job_type_null:
#     response = chain.invoke(context)
#     results.append(response)
# print(results)

['Full-time', 'Full-time', 'Full-time', 'Full-time', 'Full-time']


In [2]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    # 텍스트를 분할할 때 사용할 구분자를 지정합니다. 기본값은 "\n\n"입니다.
    # separator=" ",
    # 분할된 텍스트 청크의 최대 크기를 지정합니다.
    chunk_size=4000,
    # 분할된 텍스트 청크 간의 중복되는 문자 수를 지정합니다.
    chunk_overlap=0,
    # 텍스트의 길이를 계산하는 함수를 지정합니다.
    length_function=len,
    # 구분자가 정규식인지 여부를 지정합니다.
    is_separator_regex=False,
)


In [None]:
total_chunks = []
for f_path in file_paths:
    with open(os.path.join(folder_path, f_path)) as f:
        file = f.read()  # 파일의 내용을 읽어서 file 변수에 저장합니다.
        chunk = text_splitter.create_documents([file])
        total_chunks.extend(chunk)

In [13]:
print(total_chunks)

[Document(metadata={}, page_content='**Sample full stack developer job description**\n\nAt [Company X], we rely on a dynamic team of engineers to solve the many challenges and puzzles of our rapidly evolving technical stack. We’re seeking a full stack developer who is ready to work with new technologies and architectures in a forward-thinking organization that’s always pushing boundaries. This person will have complete, end-to-end ownership of projects. The ideal candidate has experience building products across the stack and a firm understanding of web frameworks, APIs, databases, and multiple back-end languages. The full stack developer will join a small team that uses new technology to solve challenges for both the front-end and back-end architecture, ultimately delivering amazing experiences for global users.\n\n**Objectives of this role**\n\n- Work across the full stack, building highly scalable distributed solutions that enable positive user experiences and measurable business gr

In [4]:
# embedding
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

embedding = OpenAIEmbeddings()

In [6]:
# vector store
from langchain_chroma import Chroma

DB_PATH = "./chromaDB"

# 한번만 실행
# 청크를 디스크에 저장. 저장시 persist_directory에 저장할 경로 지정
# VectorDB에 저장할 때 임베딩 모델도 지정 (이 경우는 OpenAIEmbeddings)
db = Chroma.from_documents(
    total_chunks, OpenAIEmbeddings(), persist_directory=DB_PATH, collection_name="my_db"
)


In [7]:
# 디스크에서 문서를 로드
persist_db = Chroma(
    persist_directory=DB_PATH,
    embedding_function=OpenAIEmbeddings(),
    collection_name="my_db",
)


In [8]:
retriever = persist_db.as_retriever()

In [17]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template(""" Here is a job description (JD) and a resume. \n
Please evaluate how the resume aligns with the qualifications, skills, and experiences required by the JD. \n
Compare each requirement in the JD with the corresponding sections in the resume, explain where they match, and identify any areas that may be lacking."

JD:
{context}

Resume:
{question}
""")

In [18]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""Based on the following two input data, evaluate how appealing the user’s resume would be to the company posting this JD. \n
The first text is the user’s resume, and the second text is the Job Description (JD).\n
Identify the key competencies required in the JD and assess whether these are included in the resume.\n

Then, rate the resume on a scale of 10 according to the criteria below:\n
Core Competency Match (1-5 points): Extract and evaluate the primary keywords and core competencies mentioned in the JD to see if they are included in the resume.\n
Additional Skills Match (1-3 points): Assess whether additional competencies or preferred skills from the JD are present in the resume.\n
Overall Assessment (1-2 points): Provide an overall assessment of how well the resume aligns with the JD and how appealing it may be for this position.\n

Output Requirements:\n
Rate the Core Competency and Additional Skills match on a scale of 10 based on the alignment between the JD and resume.\n
Explain in detail the reasoning behind this score.\n

Input data:
JD:\n
{context}

Resume:\n
{question}

Output format:
Score: 
Reason:
""")

In [9]:

question = """
**Comon Kim, Software Engineer**

**Contacts**

010-1234-5678 · youtube@codingmonster.tv · [https://codingmonster.net](https://codingmonster.net/)

**Position Applied**

Junior Backend Engineer @ Chat Server Team

**Summary of Skills**

**Backend Engineering**

- Experience in developing 6 individual and collaborative projects
- Proficient in developing simple Backend API servers using Spring Framework, JPA or Query
- Transitioned from manual deployment using SFTP to automated deployment through Git Branch integration with Jenkins
- Experience in deploying services in private IDC, NCP, AWS (EC2)
- Experience in managing Docker images via Docker Registry and deploying servers using Docker and Docker-Compose

**Software Development**

- Experience performing tests using Junit, Mockito, Jmeter
- Experience in setting up and executing integration test environments using TestContainers
- Experience documenting APIs using Swagger (OAS 3) and Rest Docs

**Machine Learning Engineering**

- Completed a 6-month ABC bootcamp and acquired basic understanding of machine learning engineering through team projects [Link]
- Experience in developing a machine learning inference server using FastAPI + RabbitMQ in a toy project [GitHub Link]
- Re-implemented ResNet using Tensorflow based on a reference post [Link]

---

**Careers**

**ComonSoft / Junior Backend Engineer @ AIPlatform Team**

(August 2021 - Present)

- Led a pilot project (3 weeks)
- Conducted performance tests of paging APIs using 11.5 million test data records
- Created and tuned indexes to reduce query time and applied Redis-based count caching
- Carried out Unit Tests, Integration Tests, Performance Tests (Jmeter)
- Developed a Messenger Chatbot
- Implemented a custom authentication server complying with OAuth 2.0 Password Grant-type
- Supported QA, deployment, and operations of messenger and monitoring servers
- Experience with build automation via GitLab-CI, deployment using Docker and Docker-Compose
- Collected and stored metrics using Prometheus, configured Grafana Dashboard, and performed provisioning
- Developed a TTV (Text to Face) service
- Improved TTS API response times by caching sentences converted within the last hour and only generating speech for modified parts

**ComonTelecom / Junior Software Engineer @ Core Dev Team**

(June 2020 - August 2021)

- Developed an AI-based object detection solution
- Developed a C++/C# SDK and WPF-based Windows App
- Reproduced vision algorithms from academic papers (CAM, Attention, etc.) using Tensorflow and OpenCV and integrated them into products
- Performed numerous code-level optimizations and refactoring

---

**Experiences**

**Open-source Contribution**

- Submitted a PR for ARM 64-bit processor instruction indexing to Ghidra, a project by the NSA [Link]
- Corrected a typo in the stackTwoBitmaps() method in Google Tensorflow's example code [Link]

**Personal Project / Short Term (April 29, 2021 - May 22, 2021)** [Link]

- URL Shortener project that provides shortened URLs
- Generated Short URLs using Base62 Encoder and XOR Cipher
- Mapped Short URLs to target URLs using Redis caching, and returned values to the client
- Configured integration testing using TestContainers to closely mimic the production environment (Documented the learning process)
- Learned and utilized Docker, TestContainers, Redis

**Personal Project / Somaeja (November 20, 2020 - March 1, 2021)** [Link]

- Developed a RESTful API server for a local information and goods trading platform
- Designed URIs and developed APIs compliant with RESTful conventions (HTTP Methods, Status Codes)
- Protected sensitive configuration data by encrypting configuration files using Jasypt Library
- Upgraded from local session synchronization, which caused network traffic, to a stateless JWT-based approach
- Automated the manual build-copy-paste-execute deployment process using Git Branch and Jenkins integration
- Used Spring Boot, Mybatis, NCP, AWS EC2, Github, Jenkins

---

**Education**

**Comon University / Master's Degree in Computer Science (February 2017 - March 2019)**

- Research Area: Lightweight Deep Learning Model Architecture (Advisor: Dr. Kim Coding)
- GPA: 4.3/4.5 (Graduated 2nd in class)

**Comon University / Bachelor's Degree in Computer Science (February 2012 - February 2017)**

**SW Maestro 9th Batch (June 2018 - December 2018)**

- Developed a real-time English study assistant chatbot module
- Collected, preprocessed, and automated training for tens of thousands of translation sentence pairs
- Developed REST APIs using Spring Framework

---

**ETC**

**Awards**

- ACM-ICPC Korea Regional Contest: 11th (2016), 13th (2013), 17th (2012)
- National College Programming Contest: Silver (2015), Bronze (2013)

**Lectures & Publications**

- Published lectures on "Junior Developer Employment Guide" on Class101 [Link]
- Published a 10-week algorithm coding test lecture series on GooroomEDU [Link]

Want more information on document writing?

Subscribe to CodingMonsterTV on YouTube!
"""

In [10]:
retriever.invoke(question)

[Document(metadata={}, page_content='TextNow is looking for a curious and motivated Software Developer who is eager to learn, is passionate about our mission and excited to help drive the future development of the TextNow product for our customers.\n\nTextNow\'s Backend Platform team uses modern technologies like AWS, Golang, gRPC, a micro-services architecture, high performing databases, streaming and queueing. We manage hundreds of millions of user interactions and terabytes of raw data in real-time. As a member of the Backend Platform Team you will design, develop, and maintain scalable backend services, ensuring high performance and reliability across our service landscape. You will also contribute to key projects and initiatives, helping to drive technical excellence and innovation.\n\nWhat You\'ll Do\n\nDevelop and Maintain Core Backend Services helping to design and implement microservices \nMaintain a High Bar of Quality in producing readable code, writing testable code, refact

In [20]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

llm = ChatOpenAI(model="gpt-3.5-turbo")

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke(question)

'Score: 6\n\nReason: \n\nCore Competency Match (1-5 points): The resume matches some of the core competencies mentioned in the JD, such as backend development experience, Docker skills, and experience in writing clean and efficient code. However, there is no specific mention of FastAPI, which is a key requirement in the JD. \n\nAdditional Skills Match (1-3 points): The resume does show additional skills in machine learning engineering and software development, which could be beneficial for the role. However, there is no mention of Kubernetes experience, which is a preferred skill in the JD.\n\nOverall Assessment (1-2 points): The resume aligns well with some of the core competencies required for the position, but it lacks specific experience in FastAPI and Kubernetes, which are mentioned in the JD. The resume showcases relevant experience and skills, but it could be more appealing if it included a direct match to all the key requirements mentioned in the JD.'