In [None]:
# Imports
from openai import OpenAI
import requests
from bs4 import BeautifulSoup
from pydantic import BaseModel
import json
import datetime
import your_information
from IPython.display import JSON

In [None]:
# Get job description from URL

# Send a GET request to the URL
url_response = requests.get(your_information.job_description_url)

# Extract the HTML content from the response
html_content = url_response.text

# Create a BeautifulSoup object to parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Extract the text from the webpage
job_description = soup.get_text()

# Remove whitespace characters
job_description = " ".join(job_description.split())  

print(job_description)

In [None]:
# Initialize OpenAI client to connect to the ChatGPT API
client = OpenAI(api_key=your_information.OPENAI_API_KEY)

# Define json output format
class JobDescriptionExtraction(BaseModel):
    employer: str
    job_title: str
    requirements: list[str]
    tasks: list[str]
    contact_person: str
    address: str

# Define system prompt for data extraction of job descriptions
extraction_system_prompt = """
You are an expert at structured data extraction. You will be given unstructured text from a job description 
and should convert it into the given structure.
"""

# Helper function: Get ChatGPT response from text prompt using API
def get_completion(prompt, system_prompt="", model="gpt-4o-mini", temperature=0): 
    response = client.beta.chat.completions.parse(
        model = model,
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        response_format = JobDescriptionExtraction,
        temperature = temperature,  # degree of randomness of ChatGPT response
    )
    return response

In [None]:
# Get the raw extraction response
extraction_response_raw = get_completion(prompt=job_description, system_prompt=extraction_system_prompt)  
extraction_response_raw

In [None]:
# Extract information from job description

# Delimiter used to mark the start and end of the job description
delimiter = "####"

# Prompt for extracting information from job description in English and German language
extraction_prompt_dict = {
    "en": f"""
    Your task is to extract information about the employer, job title, requirements, tasks, 
    contact person, and address from the job description marked with {delimiter} characters.
    
    Format your answer as a Python dictionary with "employer", "job title", "requirements", 
    "tasks", "contact person", and "address" as keys.
    
    Format the "requirements" and "tasks" as lists.
    
    If the information is missing in the job description, use "unknown" as the value.
    
    Respond as concisely as possible.
    
    Job description: {delimiter}{job_description}{delimiter}
    """,
    "de": f"""
    Deine Aufgabe ist es, Informationen über Arbeitgeber, Stellenbezeichnung, Anforderungen, Aufgaben,
    Kontaktperson und Adresse aus der Stellenbeschreibung, die mit {delimiter} Zeichen markiert ist, 
    zu extrahieren.
    
    Formattiere deine Antwort als Python Dictionary mit "Arbeitgeber", "Stellenbezeichnung", 
    "Anforderungen", "Aufgaben", "Kontaktperson", und "Adresse" als Schlüssel. 
    
    Formattiere die "Anforderungen" und "Aufgaben" jeweils als Liste. 
    
    Wenn die Information in der Stellenbeschreibung fehlt, verwende "unbekannt" als Wert. 
    
    Antworte so kurz wie möglich. 
    
    Stellenbeschreibung: {delimiter}{job_description}{delimiter}
    """
}

# Get the raw extraction response
extraction_response_raw = get_completion(extraction_prompt_dict["de"])  # use "en" for English
# Get extraction response text and clean it 
extraction_response = extraction_response_raw.choices[0].message.content.strip('```python').strip()
# Convert extraction response to json
extraction_response = json.loads(extraction_response)

# Show extracted information
JSON(extraction_response, indent=2)

In [None]:
# Create cover letter

# Cover letter prompt
cover_letter_prompt = f"""
Deine Aufgabe ist es, ein professionelles Bewerbungsanschreiben zu erstellen.

Adressiere das Anschreiben an folgenden Arbeitgeber, Adresse, Stelle und Kontaktperson:
Arbeitgeber: {extraction_response["Arbeitgeber"]}
Adresse: {extraction_response["Adresse"]}
Stelle: {extraction_response["Stellenbezeichnung"]}
Kontaktperson: {extraction_response["Kontaktperson"]}

Verwende folgenden Absender:
Name: {your_information.name}
Adresse: {your_information.address}
Telefonnummer: {your_information.phone}
E-Mail: {your_information.email}

Verwende im Briefkopf des Anschreibens Ort und Datum. 
Verwende den Ort aus: {your_information.address}
Verwende das folgende, aktuelle Datum im deutschen Datumsformat: {datetime.date.today()}

Beschreibe inwiefern die Anforderungen und Aufgaben erfüllt werden durch die 
Ausbildung, Arbeitserfahrung, Kompetenzen und Motivation. 
Verwende hierfür die folgenden Informationen:
Anforderungen: {extraction_response["Anforderungen"]}
Aufgaben: {extraction_response["Aufgaben"]}
Ausbildung: {your_information.education}
Arbeitserfahrung: {your_information.work_experience}
Kompetenzen: {your_information.skills}
Motivation: {your_information.motivation}

Nenne folgende Gehaltsvorstellung und mögliches Eintrittsdatum:
Gehaltsvorstellung : {your_information.salary_expectations}
Mögliches Eintrittsdatum: {your_information.possible_start_date}

Schreibe in einem professionellen, präzisen und kompakten Ton.

Unterschreibe das Anschreiben als {your_information.name}.
"""

# Create 3 cover letter suggestions
cover_letter_ls = []
for i in range(3):
    # Generate a single cover letter via API request
    cover_letter_response = get_completion(cover_letter_prompt, temperature=0.7).choices[0].message.content
    # Remove ** symbols
    cover_letter_response = cover_letter_response.replace('**', '')
    # Append cover letter to list
    cover_letter_ls.append(cover_letter_response)

# Print cover letter suggestions
for i in range(len(cover_letter_ls)):
    print(f"Anschreiben {i+1}")
    print("=" * 5)
    print(cover_letter_ls[i])
    print("=" * 30)