In [None]:
import csv
import re
import pdfplumber
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [44]:
job_search = input("Enter the job title you're looking for: ").strip()

In [None]:
def clean_job_title(job_title_from_url):
    cleaned_title = job_title_from_url.replace("-", " ").title()
    return cleaned_title

matching_job = None
matching_skills = None

try:
    with open('job_skills.csv', mode='r', newline='', encoding='utf-8') as file:
        csv_reader = csv.reader(file)

        for row in csv_reader:

            job_url, required_skills = row

            job_title_from_url = re.search(r'view/(.*?)-\d+', job_url)

            if job_title_from_url:
                job_title = clean_job_title(job_title_from_url.group(1))
            else:
                continue 

            if job_search.lower() in job_title.lower():
                matching_job = job_title
                matching_skills = required_skills
                break

except FileNotFoundError:
    print("The file 'job_skills.csv' was not found.")
    exit()

if matching_job:
    print(f"Matching job found: {matching_job}")
    print(f"Associated skills: {matching_skills}")
else:
    print("No matching job found.")

# Enter Skills Manually

In [None]:
user_skills = input("Enter your skills (comma separated): ").strip()
new_skills = matching_skills

user_skills_list = [skill.strip() for skill in user_skills.split(",")]
new_skills_list = [skill.strip() for skill in new_skills.split(",")]

user_skills_set = set(skill.lower() for skill in user_skills_list)
new_skills_set = set(skill.lower() for skill in new_skills_list)

missing_skills = new_skills_set - user_skills_set

if missing_skills:
    missing_skills_list = [skill for skill in new_skills_list if skill.lower() in missing_skills]
    grouped_skills = [", ".join(missing_skills_list[i:i+3]) for i in range(0, len(missing_skills_list), 3)]
    print("Other Skills Required:")
    print("\n".join(grouped_skills))
else:
    print("The user has all the required skills.")


# PDF

In [None]:
def clean_job_title(job_title_from_url):
    cleaned_title = job_title_from_url.replace("-", " ").title()
    return cleaned_title

def extract_resume_text(pdf_path):
    full_text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            full_text += page.extract_text()
    return full_text

def clean_resume_text(resume_text):
    cleaned_text = re.sub(r'http\S+|www\S+|[\d]{4}-[\d]{2}-[\d]{2}|[\d]{4}-[\d]{2}|[\d]{2}/\d{2}/\d{4}', '', resume_text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    return cleaned_text

def split_into_sections(resume_text):
    sections = re.split(r'\n\s*\n', resume_text)
    return sections

def extract_skills_from_section(section):
    section = section.lower()
    section = re.sub(r'[^\w\s,•]', '', section)
    skill_list = [skill.strip() for skill in re.split(r'[,\n•]', section) if skill.strip()]
    return skill_list

def identify_skill_sections(sections):
    vectorizer = TfidfVectorizer()

    section_vectors = vectorizer.fit_transform(sections)

    similarity_matrix = cosine_similarity(section_vectors)

    skill_sections = []
    for i, section in enumerate(sections):
        if similarity_matrix[i].max() > 0.2:
            skill_sections.append(section)

    return skill_sections

def compare_skills(extracted_skills, job_skills_required):
    extracted_skills_set = set(extracted_skills.lower().split(","))
    job_skills_set = set(job_skills_required.lower().split(","))

    extracted_skills_set = {skill.strip() for skill in extracted_skills_set}
    job_skills_set = {skill.strip() for skill in job_skills_set}

    missing_skills = job_skills_set - extracted_skills_set

    if missing_skills:
        missing_skills_list = list(missing_skills)
        grouped_skills = [", ".join(missing_skills_list[i:i+3]) for i in range(0, len(missing_skills_list), 3)]
        return f"Other Skills Required:\n" + "\n".join(grouped_skills)
    else:
        return "The resume has all the required skills."

if __name__ == "__main__":
    pdf_path = r"" # PUT CV/RESUME path here
    resume_text = extract_resume_text(pdf_path)
    cleaned_resume_text = clean_resume_text(resume_text)

    sections = split_into_sections(cleaned_resume_text)
    skill_sections = identify_skill_sections(sections)
    all_skills = []
    for section in skill_sections:
        skills = extract_skills_from_section(section)
        all_skills.extend(skills)

    extracted_skills = ", ".join(all_skills)

    result = compare_skills(extracted_skills, matching_skills)
    print(result)

# Add other skills (not mentioned in the resume)

In [None]:
add_more = input("\nDo you have more skills to add? (yes/no): ").strip().lower()
if add_more == "yes":
    additional_skills = input("Enter the additional skills you have (comma separated): ").strip()
    updated_extracted_skills = extracted_skills + ", " + additional_skills.lower()

    updated_result = compare_skills(updated_extracted_skills, matching_skills)
    print("\nUpdated Missing Skills:")
    print(updated_result)
else:
    print("No additional skills added.")