In [1]:
from langchain.document_loaders import SeleniumURLLoader
import os
from dotenv import load_dotenv
import openai
import logging
import shutil


load_dotenv(".env")
openai.api_key = os.environ.get("OPENAI_API_KEY")
# Create a logger
logging.basicConfig(filename='example.log',level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
def chatbot_completition(system_message, user_message, temp=None):
    if temp is not None:
        response = openai.ChatCompletion.create(
            model = 'gpt-4',
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
            ],
            temperature = temp,
        )
    else:
        response = openai.ChatCompletion.create(
            model = 'gpt-4',
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
            ],
        )    
    
    prompt, reply = response['usage']['prompt_tokens'], response['usage']['completion_tokens']
    cost = (0.03/1000)*prompt + (0.06/1000)*reply
    ## calculate api costs and log it
    logger.info(f"Prompt tokens: {prompt}, Completion tokens: {reply}, Cost: {cost}")

    return response["choices"][0]["message"]["content"]

In [3]:
# def extract_key_words(text):
#     """
#     Extracts key words from a text
#     """
#     SYS = """You are an expert career coach and hiring manager. You will be given a job position and you must extract the key reponsibilities, skills and words from the posting without adding any further detail.
#             You do not extract details such as:
            
#             "Having a current valid driver's license and a reliable vehicle
#             Potential for a part of the global company
#             Global blue-chip client
#             3+ years sales experience within the international freight industry
#             Australian Citizen/Permanent Resident
#             Super (Superannuation)
#             American Express
#             Driver's license 
#             Full time maximum term contract 
#             Regional/interstate travel
#             Tools of the trade (tablet and phone) provided
#             Uncapped commission 
#             Weekly bonuses and commissions
#             Career change
#             Entry level sales
#             Development program
#             Job training
#             Mentorship
#             Personalised coaching
#             High achievers
#             Future leaders
#             Melbourne.
#             "
#             ie things that are not intagible or techincal skills or competencies.
            
#                 Respond using markdown"""
#     USER = "Extract the key reponsibilities, skills/competencies and words/phrases from the following job posting: \n\n"
#     words = chatbot_completition(SYS, USER + text)
    
    
#     return words

In [4]:
def extract_key_words_and_resp(text):
    """
    Extracts key words from a text
    """
    SYS = """You are an expert career coach and hiring manager, responsible for extracting key aspects from a job posting. Your task includes identifying essential responsibilities, skills, and keywords. Do not introduce any additional information.

    The following examples should be excluded from your extraction as they do not represent intangible or technical skills, competencies, or job responsibilities:

    Location specific information (e.g., "Melbourne")
    Citizenship requirements (e.g., "Australian Citizen/Permanent Resident")
    Job perks and incentives (e.g., "Uncapped commission", "Weekly bonuses and commissions")
    Specific company details or affiliations (e.g., "Potential for a part of the global company", "Global blue-chip client", "American Express")
    Physical mobility and travel needs (e.g., "Having a current valid driver's license and a reliable vehicle", "Regional/interstate travel")
    Provided equipment (e.g., "Tools of the trade (tablet and phone) provided")
    Specific experience in certain industry other than sales or account management  (e.g., "3+ years sales experience within the international freight industry")
    Employment type (e.g., "Full time maximum term contract")
    Personal and career development opportunities (e.g., "Career change", "Entry level sales", "Development program", "Job training", "Mentorship", "Personalised coaching", "High achievers", "Future leaders")
    Superannuation details (e.g., "Super")
    Specific initiatives or programs (e.g., "Microcredentials initiatives", "Learning and Leadership Enterprise (LLE)")
    Instructions to review documentation (e.g., "Review the Position Description")
    Application process details (e.g., "Responses to the Selection Criteria and Core Competencies", "Online responses to pre-screening employment questions", "Application process understanding")
    Job application preparation (e.g., "Resume creation", "Cover letter writing")
    Support offered to candidates (e.g., "Support for candidates requiring adjustments", "Business School support")
    Privacy concerns (e.g., "Privacy of personal information")
    
    ie DONT NOT EXTRACT things that are not intagible skills/competencies or techincal skills or competencies.Your task is to focus on the technical and intangible skills, competencies, and job responsibilities that are described in the job posting. 
            
    Respond using markdown"""
    
    USER = """Given the following job posting, extract keywords that would be relevant to include in the experiences of a resume. Think this through step by step. Look for the relevant detail and keywords from the job posting:
                \n\n"""
    words = chatbot_completition(SYS, USER + text)
    
    
    return words



In [5]:
def extract_key_words(text):
    """
    Extracts key words from a text
    """
    SYS = """You are an expert career coach and hiring manager, responsible for extracting key aspects from a job posting. Your task is to identify keywords relating to skills, competencies and responsibilites. Do not introduce any additional information.

    The following examples should be excluded from your extraction as they do not represent intangible or technical skills, competencies, or job responsibilities:

    Location specific information (e.g., "Melbourne")
    Citizenship requirements (e.g., "Australian Citizen/Permanent Resident")
    Job perks and incentives (e.g., "Uncapped commission", "Weekly bonuses and commissions")
    Specific company details or affiliations (e.g., "Potential for a part of the global company", "Global blue-chip client", "American Express")
    Physical mobility and travel needs (e.g., "Having a current valid driver's license and a reliable vehicle", "Regional/interstate travel")
    Provided equipment (e.g., "Tools of the trade (tablet and phone) provided")
    Specific experience in certain industry other than sales or account management  (e.g., "3+ years sales experience within the international freight industry")
    Employment type (e.g., "Full time maximum term contract")
    Personal and career development opportunities (e.g., "Career change", "Entry level sales", "Development program", "Job training", "Mentorship", "Personalised coaching", "High achievers", "Future leaders")
    Superannuation details (e.g., "Super")
    Specific initiatives or programs (e.g., "Microcredentials initiatives", "Learning and Leadership Enterprise (LLE)")
    Instructions to review documentation (e.g., "Review the Position Description")
    Application process details (e.g., "Responses to the Selection Criteria and Core Competencies", "Online responses to pre-screening employment questions", "Application process understanding")
    Job application preparation (e.g., "Resume creation", "Cover letter writing")
    Support offered to candidates (e.g., "Support for candidates requiring adjustments", "Business School support")
    Privacy concerns (e.g., "Privacy of personal information")
    
    ie DONT NOT EXTRACT things that are not intagible skills/competencies or techincal skills or competencies.Your task is to focus on the technical and intangible skills, competencies, and job responsibilities that are described in the job posting. 
            
    Format as a single list of key words. Respond using markdown"""
    
    USER = """Given the following job posting, extract keywords that would be relevant to include in the experiences of a resume. Think this through step by step. Look for the relevant detail and keywords from the job posting:
                \n\n"""
    words = chatbot_completition(SYS, USER + text)
    
    
    return words

In [6]:
# job_urls = [
#     "https://www.seek.com.au/job/68801342?type=standard#sol=e17d1421904e325347205c9167400bdd5c7a5d27",
#     "https://www.seek.com.au/job/68982589?type=standout#sol=a9f3fa61762231385bd75a9da80a43d0600ea29c",
#     "https://www.seek.com.au/job/68954499?type=standard#sol=107f2b3f1e1d99f08406205d42f254516cd97019",
#     "https://www.seek.com.au/job/68766133?type=standout#sol=fcae9827342ce28d57852f76458bc407d827f490",
#     "https://www.seek.com.au/job/68912013?type=standout#sol=f52041b683b7c944c9878364ef8e73962c495328",
#     "https://www.seek.com.au/job/68937311?type=standout#sol=19b248d348659b4a305783437236a4a6420f82ab"
# ]



job_urls = ["https://www.seek.com.au/job/68922342?type=standout#sol=3947ab745533e8751f51dfd6055c802d86c49572",
"https://www.seek.com.au/job/68897730?type=standout#sol=61e7dab73a859f8e22d73fe8f1e4d244858a1925"]

loader = SeleniumURLLoader(urls=job_urls)

data = loader.load()
texts = [i.page_content for i in data]

In [7]:
output_dir = "Eleni"

# Make sure the output directory exists, create it if not.
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
# Specify the desired save location by joining the output directory with the filename.

In [8]:
#extract key words only from the text fo resume building
for i,text in enumerate(texts):
    key_words = extract_key_words(text)
    file_path_resume = os.path.join(output_dir, f'extracted_resume_key_words{i}')
    with open(file_path_resume, 'w') as f:
        f.write(key_words)
    logging.info(f'Extracted resume key words {i}th text')

In [9]:
#extract key words and repsondibilities from job description for cover letter
for i,text in enumerate(texts):
    key_words = extract_key_words_and_resp(text)
    file_path_cover_letter = os.path.join(output_dir, f'extracted_cover_letter_key_words{i}')
    with open(file_path_cover_letter, 'w') as f:
        f.write(key_words)
    logging.info(f'Extracted cover letter key words {i}th text')