In [5]:
import os
import csv
import PyPDF2
import re  # Import the regex module
from docx import Document
import re
import zipfile
import uuid



def get_job_description():
    job_description = input("Enter the job description: ")
    return job_description

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + "\n"
    return text

def remove_empty_lines(text):
    return text.replace("\n", " ")

def extract_emails(text):
    email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b"
    emails = re.findall(email_pattern, text)
    return emails

def process_resumes(folder_path):
    resumes_data = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        candidate_id = str(uuid.uuid4())
        
        if filename.lower().endswith(".pdf"):
            candidate_name = os.path.splitext(filename)[0]  # Extract candidate name without extension
            resume_text = extract_text_from_pdf(file_path)
        elif filename.lower().endswith(".docx"):
            candidate_name = os.path.splitext(filename)[0]
            resume_text = extract_text_from_docx(file_path)
        elif filename.lower().endswith(".txt"):
            candidate_name = os.path.splitext(filename)[0]
            with open(file_path, "r") as txt_file:
                resume_text = txt_file.read()
        elif filename.lower().endswith(".zip"):
            candidate_name = os.path.splitext(filename)[0]
            zip_folder = os.path.join(folder_path, candidate_name)
            with zipfile.ZipFile(file_path, "r") as zip_ref:
                zip_ref.extractall(zip_folder)
            resume_texts = []
            for nested_filename in os.listdir(zip_folder):
                nested_file_path = os.path.join(zip_folder, nested_filename)
                if nested_filename.lower().endswith((".pdf", ".docx", ".txt")):
                    nested_resume_text = ""
                    if nested_filename.lower().endswith(".pdf"):
                        nested_resume_text = extract_text_from_pdf(nested_file_path)
                    elif nested_filename.lower().endswith(".docx"):
                        nested_resume_text = extract_text_from_docx(nested_file_path)
                    elif nested_filename.lower().endswith(".txt"):
                        with open(nested_file_path, "r") as txt_file:
                            nested_resume_text = txt_file.read()
                    resume_texts.append(nested_resume_text)
            combined_resume_text = "\n".join(resume_texts)
            cleaned_resume_text = remove_empty_lines(combined_resume_text)
            emails = extract_emails(cleaned_resume_text)
            resumes_data.append((candidate_id, candidate_name, cleaned_resume_text, ", ".join(emails)))
            # Clean up extracted zip folder
            os.remove(file_path)
            os.rmdir(zip_folder)
        else:
            print(f"Unsupported format for file: {filename}")
            continue
        
        cleaned_resume_text = remove_empty_lines(resume_text)
        emails = extract_emails(cleaned_resume_text)
        resumes_data.append((candidate_id,candidate_name, cleaned_resume_text, ", ".join(emails)))

    
    return resumes_data



def main():
    folder_path = input("Enter the path to the folder containing resumes: ")
    resumes_data = process_resumes(folder_path)
    
    # job_description = get_job_description()
    
    # print("\nJob Description:")
    # print(job_description)

    csv_filename = "resumes_data.csv"
    with open(csv_filename, "w", newline="", encoding="utf-8") as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(["Candidate ID","Candidate Name", "Resume Text","Emails"])
        csv_writer.writerows(resumes_data)

    print(f"Data saved to {csv_filename}")

if __name__ == "__main__":
    main()


Data saved to resumes_data.csv


In [20]:
import openai
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Retrieve the API key from the environment variables
openai.api_key = os.getenv("ACCESS_TOKEN")



def score_job_description(job_title, job_description):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "user",
                "content": f"Rate this job description out of 10: {job_title} - {job_description}. Do not be lenient while rating and output only a single number out of 10 in first line and followed by explanation."
            }
        ],
        temperature=1,
        max_tokens=150,  # Increase max tokens to accommodate the explanation
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )

    response_content = response.choices[0].message['content']
    score_job_description = float(response_content.split('\n')[0])
    explanation = '\n'.join(response_content.split('\n')[1:])  # Extract the explanation

    data_jd = {
        "job_title": job_title,
        "old_job_description": job_description,
        "job_description_score": score_job_description,
        "explanation": explanation
    }

    return data_jd

# Load job title and description from a text file
with open("job_description.txt", "r") as file:
    lines = file.readlines()
    job_title = lines[0].strip().split(": ")[1]
    job_description = lines[1].strip().split(": ")[1]

# Read candidate information from the CSV file
candidates = []
with open("resumes_data.csv", "r", newline="", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    for row in reader:
        candidate_id = row["Candidate ID"]  # Replace with actual candidate ID column name
        candidate_data_jd = score_job_description(job_title, job_description)
        candidates.append((candidate_id, candidate_data_jd))

# Print the list of tuples containing candidate ID, data dictionary
for candidate_id, data in candidates:
    print(f"Candidate ID: {candidate_id}")
    print(data)

print("Resumes rated and scores stored in the list of tuples.")








Candidate ID: 6294b12b-48f7-416a-ae23-d948655b18eb
{'job_title': 'Software Engineer', 'old_job_description': 'We are looking for a software engineer with experience in Python and web development. The ideal candidate should have a strong understanding of software architecture, be proficient in modern web frameworks, and have a track record of delivering high-quality code. The role involves collaborating with cross-functional teams to develop and deploy software solutions that meet customer needs.', 'job_description_score': 8.5, 'explanation': '\nThe job description is comprehensive and clearly outlines the required skills and responsibilities. It includes specific qualifications such as experience in Python and web development, along with a strong understanding of software architecture. The mention of collaborating with cross-functional teams and delivering high-quality code indicates that teamwork and quality are valued in the role. However, a slight deduction is made due to the lack o

In [18]:
data


{'job_title': 'Software Engineer',
 'old_job_description': 'We are looking for a software engineer with experience in Python and web development. The ideal candidate should have a strong understanding of software architecture, be proficient in modern web frameworks, and have a track record of delivering high-quality code. The role involves collaborating with cross-functional teams to develop and deploy software solutions that meet customer needs.',
 'job_description_score': 7.0,
 'explanation': '\nThe job description is clear about the required skills and experience, specifying Python and web development. It also emphasizes the importance of understanding software architecture and delivering high-quality code. The mention of collaborating with cross-functional teams demonstrates the need for effective teamwork. While the description covers the basics, it could provide more details about specific frameworks or technologies the candidate should be familiar with.'}