#Installing all libraries and packages needed

In [None]:
!pip install torch
!pip install transformers
!pip install spacy
!python -m spacy download en_core_web_sm
!pip install scikit-learn


# Importing all libraries 

In [None]:
import csv
import re
import torch
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer

import spacy

# Function to clean job titles

In [None]:
def clean_job_title(job_title):
    # Remove whitespace and punctuation
    cleaned_title = job_title.strip().lower()

    # Replace non-alphanumeric characters with spaces
    cleaned_title = re.sub(r'[^a-zA-Z0-9\s]', ' ', cleaned_title)

    return cleaned_title

# Original data

In [None]:
# Original data
data = [
    ["Data Analyst", "Data-related roles"],
    ["Software Engineer", "Engineering roles"],
    ["Product Manager", "Management roles"],
    ["Data Scientist", "Data-related roles"],
    ["Machine Learning Engineer", "Data-related roles"],
    ["Natural Language Processing Engineer", "Data-related roles"],
    ["Artificial Intelligence Engineer", "Data-related roles"],
    ["Data Architect", "Data-related roles"],
    ["Data Engineer", "Data-related roles"],
    ["Data Visualization Specialist", "Data-related roles"],
    ["Software Engineer", "Engineering roles"],
    ["Research Scientist", "Research roles"],
    ["Professor", "Education roles"],
    ["Consultant", "Business roles"],
    ["Entrepreneur", "Business roles"],
    ["Data Storyteller", "Data-related roles"],
    ["Data Ethics Engineer", "Data-related roles"],
    ["AI Ethics Engineer", "Data-related roles"],
    ["MLOps Engineer", "Data-related roles"],
    ["Cloud Data Engineer", "Data-related roles"],
    ["Cloud Data Scientist", "Data-related roles"],
    ["Cloud Machine Learning Engineer", "Data-related roles"],
    ["Blockchain Engineer", "Data-related roles"],
    ["Cryptocurrency Engineer", "Data-related roles"],
    ["Natural Language Processing Engineer", "Data-related roles"],
    ["Computer Vision Engineer", "Data-related roles"],
    ["Robotics Engineer", "Data-related roles"],
    ["Speech Recognition Engineer", "Data-related roles"],
    ["Text Analytics Engineer", "Data-related roles"],
    ["Fraud Detection Engineer", "Data-related roles"],
    ["Risk Management Engineer", "Data-related roles"],
    ["Compliance Engineer", "Data-related roles"],
    ["Security Engineer", "Data-related roles"],
    ["Quality Assurance Engineer", "Data-related roles"],
    ["Test Engineer", "Data-related roles"],
    ["DevOps Engineer", "Data-related roles"],
    ["Site Reliability Engineer", "Data-related roles"],
    ["Systems Engineer", "Data-related roles"],
    ["Network Engineer", "Data-related roles"],
    ["Database Administrator", "Data-related roles"],
    ["Security Analyst", "Data-related roles"],
    ["Business Analyst", "Data-related roles"],
    ["Project Manager", "Data-related roles"],
    ["Product Manager", "Data-related roles"],
    ["Marketing Manager", "Data-related roles"],
    ["Sales Manager", "Data-related roles"],
    ["Customer Success Manager", "Data-related roles"],
    ["Human Resources Manager", "Data-related roles"],
    ["Finance Manager", "Data-related roles"],
    ["Operations Manager", "Data-related roles"],
    ["Legal Counsel", "Data-related roles"],
    ["Compliance Officer", "Data-related roles"],
    ["Risk Manager", "Data-related roles"],
    ["Data Wrangler", "Data-related roles"],
    ["Data Scientist Intern", "Data-related roles"],
    ["Machine Learning Engineer Intern", "Data-related roles"],
    ["Natural Language Processing Engineer Intern", "Data-related roles"],
    ["Artificial Intelligence Engineer Intern", "Data-related roles"],
    ["Data Architect Intern", "Data-related roles"],
    ["Data Engineer Intern", "Data-related roles"],
    ["Data Visualization Specialist Intern", "Data-related roles"],
    ["Software Engineer Intern", "Engineering roles"],
    ["Research Scientist Intern", "Research roles"],
    ["Professor Intern", "Education roles"],
    ["Consultant Intern", "Business roles"],
    ["Entrepreneur Intern", "Business roles"],
    ["Data Storyteller Intern", "Data-related roles"],
    ["Data Ethics Engineer Intern", "Data-related roles"],
    ["AI Ethics Engineer Intern", "Data-related roles"],
    ["MLOps Engineer Intern", "Data-related roles"],
    ["Cloud Data Engineer Intern", "Data-related roles"],
    ["Cloud Data Scientist Intern", "Data-related roles"],
    ["Cloud Machine Learning Engineer Intern", "Data-related roles"],
    ["Business Intelligence Analyst Intern", "Data-related roles"],
    ["Customer Data Analyst Intern", "Data-related roles"],
    ["Financial Analyst Intern", "Data-related roles"],
    ["Healthcare Data Analyst Intern", "Data-related roles"],
    ["Marketing Data Analyst Intern", "Data-related roles"],
    ["Operations Data Analyst Intern", "Data-related roles"]]

# Cleaned and processed data

In [None]:
# Cleaned and processed data
cleaned_data = []

for job in data:
    cleaned_title = clean_job_title(job[0])
    category = job[1]
    cleaned_data.append([cleaned_title, category])


# Load the pre-trained NER model

In [None]:

# Load the pre-trained NER model
nlp = spacy.load("en_core_web_sm")

# Preparing the data and loading the gpt model for training

In [None]:
# Preparing the data
job_titles = [job[0] for job in cleaned_data]
categories = [job[1] for job in cleaned_data]

# Load the GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-xl")
model = TFGPT2LMHeadModel.from_pretrained("gpt2-xl")

# Vectorize the input features
encoded_inputs = tokenizer(job_titles, padding=True, truncation=True, return_tensors="tf")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(encoded_inputs.input_ids, categories, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# Save the trained model
model_filename = "job_title_classification_model.pkl"
model.save(model_filename)


print(f"Trained model saved as '{model_filename}'")


# Preprocess the text (job description or company information text)

In [None]:
# Preprocess the text (job description or company information text)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text


# User's resume

In [None]:
# Getting the user's CV or resume for further processing
def get_user_cv(filename):
    with open(filename, "rb") as f:
        return f.read()

# Extracting the user's skills and experience from resume 

In [None]:
# Extract the user's skills and experience from their CV or resume
def extract_user_info(cv):
    doc = nlp(cv)
    skills = []
    experience = []
    for entity in doc.ents:
        if entity.label_ == "SKILL":
            skills.append(entity.text)
        elif entity.label_ == "ORG":
            experience.append(entity.text)
    return skills, experience

#Finally generating the cover letter which is tailored 

In [None]:

def generate_cover_letter(job_title, company_name, recipient_name, skills, experience):
    """
    Generates a cover letter for the given job title, company name, recipient name, skills, and experience.

    Args:
        job_title: The job title for the cover letter.
        company_name: The name of the company the cover letter is being sent to.
        recipient_name: The name of the person the cover letter is being addressed to.
        skills: The user's skills.
        experience: The user's experience.

    Returns:
        The generated cover letter.
    """

    # Create the input text for the cover letter
    input_text = """
Dear [Recipient's Name],

I am writing to express my interest in the [Job Title] position at [Company Name]. As a [Current Job Title] with [Previous Company], I have [Experience/Interest/Expertise] in [Domain/Industry]. I am confident that my skills and experience would be a valuable asset to your team.

In my previous role, I was responsible for [list of responsibilities]. I have a proven track record of success in [list of accomplishments]. I am also a highly motivated and results-oriented individual.

I am eager to learn more about the [Job Title] position and how I can contribute to your company's success. I am available for an interview at your earliest convenience.

Thank you for your time and consideration.

Sincerely,
[Your Name]
"""

    # Replace the placeholders with the user's information
    for placeholder, value in zip(["Recipient's Name", "Job Title", "Company Name", "Current Job Title", "Previous Company", "Domain/Industry"],
                                   [recipient_name, job_title, company_name, skills[0], experience[0], skills[1], skills[2], skills[3]]):
        input_text = input_text.replace(placeholder, value)

    # Generate the cover letter content
    cover_letter = input_text

    # Return the cover letter
    return cover_letter


#Main class

In [None]:


if __name__ == "__main__":

    # Get the user's information
    job_title = input("Please enter the job title you are applying for: ")
    company_name = input("Please enter the name of the company you are applying to: ")
    recipient_name = input("Please enter the name of the person you are addressing the cover letter to: ")
    try:
        skills = input("Please enter your skills, separated by commas: ").split(",")
        experience = input("Please enter your experience, separated by commas: ").split(",")
    except ValueError:
        print("Please enter valid information.")
        return

    # Check if the user has entered all of the required information
    check_required_information(job_title, company_name, recipient_name, skills, experience)

    # Generate the cover letter
    cover_letter = generate_cover_letter(job_title, company_name, recipient_name, skills, experience)

    # Print the cover letter
    print(cover_letter)

    # Save the cover letter to a file
    with open("cover_letter.txt", "w") as f:
        f.write(cover_letter)


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/689 [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/6.23G [00:00<?, ?B/s]