## Setup Environment

In [None]:
from dotenv import load_dotenv
import os

# langsh*t (should find an alternative asap!)

from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings

In [2]:
# get ai provider api endpoints and keys from .env
# load_dotenv()

# Todo: We'll use ollama for now, specify the models down below

FAST_LLM_MODEL_NAME ="llama3.1:8b" # keyword extraction
CONTEXT_LLM_MODEL_NAME="gemma3:12b" # job description extraction from url
EMBED_LLM_MODEL_NAME="embeddinggemma:latest" # embeddings
SMART_LLM_MODEL_NAME="gemma3:12b" # final cv generation

## Get Inputs

In [3]:
# jobPostingUrl="https://www.indeed.com/viewjob?jk=10a05154d8eed5da"

In [4]:
cvPath="examples/cv.pdf"
jobPostingPath = "examples/jobPostingText.txt"
additionalInfoPath = "examples/additionalInfo.txt"

# check if jobPostingUrl is defined
try:
    print(jobPostingUrl)
except:
    jobPostingUrl=""


## Static inputs
Prompts & Template

In [5]:
def load_config_file(filename):
    """Load configuration files from the config directory"""
    config_path = os.path.join('config', filename)
    with open(config_path, 'r', encoding='utf-8') as file:
        return file.read().strip()

cvTemplate = load_config_file('cvTemplate.txt')
cvKeywordsPrompt = load_config_file('cvKeywordsPrompt.txt')
jobKeywordsPrompt = load_config_file('jobKeywordsPrompt.txt')

print("Everything loaded successfully!")

Everything loaded successfully!


## Process Inputs

Handle CV and Additonal Info

In [6]:
# read cv:
loader = PyPDFLoader(cvPath)
pages = []
for page in loader.load():
    pages.append(page)
# join pages into a single string
cvRawText = "\n".join([page.page_content for page in pages])

# read additional info
with open(additionalInfoPath, 'r', encoding='utf-8') as file:
    additionalInfo = file.read().strip()

# combine cv and additional info
if additionalInfo:
    cvText = f"""
------------
CV Raw Text:
------------
{cvRawText}
---------------
Additional Info:
---------------
{additionalInfo}
    """
else:
    cvText = cvRawText
print(cvText)


------------
CV Raw Text:
------------
Ahmed Taha
Fresh Software Engineer
 ahmedtaha1234@gmail.com  +201557528856  Cairo, Egypt  creative-geek.tech  github.com/Creative-Geek
 linkedin.com/in/ahmed-taha-thecg  Exempted
PROFILE
Freshly graduated Software Engineer with hands-on experience in web development, AI integrations & Automation, and
multimedia production. Skilled in React, Nodejs, Flask, and Python, aspires to create dynamic, user-friendly applications. Has
delivered projects from web solutions to AI-driven tools‚Äîincluding an Arabic Handwriting E2E OCR system. Strong in UI/UX
design and committed to crafting efficient, engaging digital experiences.
PROJECTS
Tasky,AI-Powered Todo List 04/2025 ‚Äì 05/2025
Developed a fullstack todo list app with React, Node.js, and Prisma, focusing on user-friendly design and smooth
animations.
Deployed the client, server, and Postgres database, while enforcing security best practices.
Integrated an AI that turns pasted coworker messages into ta

Handle Job Posting

In [7]:
# define a function for url job posting extraction (probably the longest part of this program)
def fetchUrl(url):
    pass
def doRAG(url):
    pass


def extract_job_posting_from_url(url):
    jobPostingHTML = fetchUrl(url)
    jobPostingText = doRAG(jobPostingHTML)
    return jobPostingText


In [8]:
# check if job posting is a url or direct text:

if jobPostingUrl:
    jobPostingText = extract_job_posting_from_url(jobPostingUrl)
else:
    with open(jobPostingPath, 'r', encoding='utf-8') as file:
        jobPostingText = file.read().strip()


## Make Some AI Calls ü§ô

First, prepare prompts

In [9]:
# format cv keyword extraction prompt
cvKeywordsPrompt = cvKeywordsPrompt.format(cvText=cvText)
# format job posting keyword extraction prompt
jobKeywordsPrompt = jobKeywordsPrompt.format(jobPostingText=jobPostingText)

In [10]:
print(cvKeywordsPrompt)

You are an expert keyword extraction system specializing in resume and CV analysis. Your task is to extract relevant professional keywords from the provided CV text.

**Instructions:**
1. Extract keywords that are professionally relevant and valuable for job matching, skill assessment, or career analysis
2. Focus on the following categories:
   - Technical skills (programming languages, software, tools, frameworks)
   - Professional skills (project management, leadership, analysis, etc.)
   - Industry terms and domain expertise
   - Job titles and roles
   - Certifications and qualifications
   - Relevant technologies and methodologies
   - Important action verbs that demonstrate capabilities

**Guidelines:**
- Extract single words or short phrases (2-3 words maximum)
- Include both explicit skills mentioned and implied competencies
- Normalize variations (e.g., "JavaScript" and "JS" should be "JavaScript")
- Include both technical and soft skills
- Avoid generic words like "the", "and

012 üòÑ

...I mean, hit the api endpoints

In [15]:
FAST_LLM = ChatOllama(model=FAST_LLM_MODEL_NAME,
                      base_url="http://localhost:11434",
                      format="json")

cvKeywordsMessages = [(
    "human",
    cvKeywordsPrompt,
)]
cvKeywords = FAST_LLM.invoke(cvKeywordsMessages)
print(cvKeywords.content)


{"Python":"", "React":"", "Nodejs":"", "Flask":"", "JavaScript":"", "Typescript":"", "Vue":"", "Django":"", "Next.js":"", "Godot Engine":"", "QT5":"", "TensorFlow":"", "Vertex AI Platform":"", "LLMs":"", "Agent AIs":"", "LangChain":"", "Stable Diffusion":"", "Flux":"", "Google Cloud":"", "Azure":"", "Docker":"", "GitHub":"", "Git":"", "Jira":"", "Linux":"", "Prisma":"", "SQLite":"", "PostgreSQL":"", "MongoDB":"", "UI/UX Design":"", "Graphic Design":"", "Video Editing":"", "Motion Graphics":"", "Adobe Creative Suite":"", "Technical Writing":"", "Content Creation":"", "Communication":"", "Teamwork":"", "Problem-Solving":"", "Adaptability":"", "Creativity":"", "Time Management":"", "Detail-Oriented":"", "Agile":"", "Data Analysis":"", "AWS":"", "Automation":"", "OCR":"", "E2E":"", "Machine Learning":"", "Artificial Intelligence":"", "AI":"", "Software Development":"", "Cloud Computing":"", "Containerization":"", "File Server Management":"", "Scripting":"", "Excel Automation":"", "Autocad 