In [None]:
!ollama pull deepseek-r1:1.5b

In [None]:
# imports

import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI
from dotenv import load_dotenv


In [None]:
HOST = "http://localhost:11434"
llm = OpenAI(base_url=f'{HOST}/v1', api_key='ollama')
HEADERS = {"Content-Type": "application/json"}

MODEL_DEEPSEEK = "deepseek-r1:1.5b"
MODEL_LLAMA = "llama3.2"
MODEL_GPT4O = "gpt-4o"
MODEL_GPT4O_MINI = "gpt-4o-mini"

In [None]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

class JSWebsite:
    url: str
    title: str
    text: str

    def __init__(self, url):
        self.url = url

        options = Options()

        options.add_argument("--headless")  # Run in headless mode
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")

        # service = Service(PATH_TO_CHROME_DRIVER)
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=options)
        driver.get(url)

        # input("Please complete the verification in the browser and press Enter to continue...")
        # time.sleep(5)  # Wait for the page to load completely
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )

        page_source = driver.page_source
        driver.quit()

        soup = BeautifulSoup(page_source, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup(["script", "style", "img", "input", "nav", "footer", "header"]):
            irrelevant.decompose()
        self.text = soup.get_text(separator="\n", strip=True)

In [None]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."
system_prompt = """You are an AI assistant designed to evaluate an applicant's resume against a given job posting to determine their fit for the role. Your response should:
1. Provide a structured analysis
   - List 3 reasons why the applicant is a good fit for the job.
   - List 3 reasons why the applicant may not be a good fit for the job.
2. Ignore irrelevant content
   - Exclude navigation-related or non-informative text from both the job posting and the resume.
3. Score the applicant's fit
   - Assign a fit score between 0 and 100, where:
     - 100 = Perfect fit
     - 0 = No match
   - The score should be based on skills, experience, and qualifications relative to the job description.
   - Consider years of experience as a factor but not as a strict requirement (e.g., someone slightly below the experience requirement may still be a strong candidate).
4. Provide Resume Improvement Suggestions
   - Offer 3 specific and actionable suggestions to improve the applicant’s resume.
   - Focus on aligning their resume better with the job posting by:
     - Highlighting relevant skills or projects.
     - Adjusting wording to match industry terms.
     - Emphasizing transferable experience.
5. Format the response in Markdown
   - Ensure readability with headers, bullet points, and clear separation of sections.
The Example Response Format should be in Markdown, using the default template below:

## Applicant Fit Analysis

Score: 75/100

### ✅ Strengths (Good Fit)
- Bullet point 1
- Bullet point 2
- Bullet point 3

### ❌ Weaknesses (Possible Gaps)
- Bullet point 1
- Bullet point 2
- Bullet point 3

### ✍️ Resume Improvement Suggestions
1. Suggestion 1
2. Suggestion 2
3. Suggestion 3
"""

In [None]:
def user_prompt_for(resume, job_post_url):
    job_post = JSWebsite(job_post_url)
    user_prompt = f"Given the applicant profile and job post; \n"
    user_prompt += f"Job Post: {job_post.text}\n"
    user_prompt += f"Resume: {resume}\n"
    return user_prompt

In [None]:
# a python notebook vbox for uploading pdf files
from PyPDF2 import PdfReader

def read_pdf(path):
    text = ""
    with open(path, 'rb') as file:
        reader = PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
yuan_resume = read_pdf('/Users/yualin/Desktop/Dropbox/資料/Resume/Resume Jan 30 2025.pdf')
# Let's try one out
genstudio_job_post_url = "https://adobe.wd5.myworkdayjobs.com/external_experienced/job/San-Jose/Software-Development-Engineer_R152283"
yuan_genstudio_prompt = user_prompt_for(yuan_resume, genstudio_job_post_url)

In [None]:
yuan_genstudio_prompt

In [None]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": yuan_genstudio_prompt}
]

In [None]:
llm = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [None]:
response = llm.chat.completions.create(
    model=MODEL_DEEPSEEK,
    messages=messages
)
display(Markdown(response.choices[0].message.content))

In [None]:
response = llm.chat.completions.create(
    model=MODEL_LLAMA,
    messages=messages
)
display(Markdown(response.choices[0].message.content))

In [None]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [None]:
openai = OpenAI()

In [None]:
response = openai.chat.completions.create(
    model=MODEL_GPT4O_MINI,
    messages=messages
)
display(Markdown(response.choices[0].message.content))

In [None]:
# UI for upload file
# The job posting and the CV are required to define the user prompt
# The user will input the job posting as text in a box here
# The user will upload the CV in PDF format, from which the text will be extracted

# You might need to install PyPDF2 via pip if it's not already installed
# !pip install PyPDF2

import io
import time
import requests
import PyPDF2
from bs4 import BeautifulSoup
from ipywidgets import Text, FileUpload, Button, VBox, HTML

# Create widgets for job posting URL input
job_posting_url = Text(
    placeholder='Enter the job posting URL here...',
    description='Job URL:',
    disabled=False,
    layout={'width': '600px'}
)

# File upload for CV
cv_upload = FileUpload(
    accept='.pdf',  # Only accept PDF files
    multiple=False,  # Only allow single file selection
    description='Upload CV (PDF)'
)

# Status display
status = HTML(value="<b>Status:</b> Waiting for inputs...")

# Submit Button
submit_button = Button(description='Submit', button_style='success')

# Dictionary to store extracted data
for_user_prompt = {
    'job_posting': '',
    'cv_text': ''
}

# Function to process PDF and extract text
def extract_cv_text(uploaded_file):
    print("Extracting CV text...")
    try:
        content = io.BytesIO(uploaded_file['content'])
        print("Reading PDF content...")
        pdf_reader = PyPDF2.PdfReader(content)
        print("Extracting text from PDF...")
        cv_text = "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
        print("Text extraction complete.")
        return cv_text if cv_text else "xxxxError extracting text from CV."
    except Exception as e:
        return f"xxxxError processing PDF: {str(e)}"

# Function to handle submission
def submit_action(b):
    print("Submit button clicked.")
    status.value = "<b>Status:</b> Processing..."

    # Process job posting
    job_url = job_posting_url.value.strip()
    status.value = f"<b>Status:</b> URL is {job_url}..."

    for_user_prompt['job_posting'] = job_url

    # Process CV
    status.value = f"<b>Status:</b> Checking CV..."
    if cv_upload.value:
        status.value = f"<b>Status:</b> CV uploaded fine..."
        uploaded_file = cv_upload.value[0]
        status.value = f"<b>Status:</b> Extracting CV..."
        for_user_prompt['cv_text'] = extract_cv_text(uploaded_file)
        if "xxxxError" in for_user_prompt['cv_text']:
            status.value = f"<b>Status:</b> {for_user_prompt['cv_text']}"
            return
        status.value = f"<b>Status:</b> CV extracted successfully!"
    else:
        status.value = "<b>Status:</b> Please upload a CV before submitting."
        return

    # Final status update
    status.value = "<b>Status:</b> Job posting and CV submitted successfully!"

    # Print extracted data (for debugging)
    print("Extracted Job Posting Text:", for_user_prompt['job_posting'][:500])  # Show first 500 chars
    print("Extracted CV Text:", for_user_prompt['cv_text'][:500])


# Attach submit action to button
submit_button.on_click(submit_action)

# Layout
layout = VBox([
    HTML(value="<h3>Input Job Posting URL and Upload CV</h3>"),
    job_posting_url,
    cv_upload,
    submit_button,
    status
])

# Display the UI
display(layout)


In [None]:
# Send to LLM openai
# https://adobe.wd5.myworkdayjobs.com/external_experienced/job/San-Jose/Software-Development-Engineer_R152283
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt_for(for_user_prompt['cv_text'], for_user_prompt['job_posting'])}
]
response = openai.chat.completions.create(
    model=MODEL_GPT4O_MINI,
    messages=messages
)
display(Markdown(response.choices[0].message.content))

In [23]:
response = openai.chat.completions.create(
    model=MODEL_GPT4O,
    messages=messages
)
display(Markdown(response.choices[0].message.content))

KeyboardInterrupt: 