In [5]:
import os
import csv
import PyPDF2
import re  # Import the regex module
from docx import Document
import re
import zipfile
import uuid



def get_job_description():
    job_description = input("Enter the job description: ")
    return job_description

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + "\n"
    return text

def remove_empty_lines(text):
    return text.replace("\n", " ")

def extract_emails(text):
    email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b"
    emails = re.findall(email_pattern, text)
    return emails

def process_resumes(folder_path):
    resumes_data = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        candidate_id = str(uuid.uuid4())
        
        if filename.lower().endswith(".pdf"):
            candidate_name = os.path.splitext(filename)[0]  # Extract candidate name without extension
            resume_text = extract_text_from_pdf(file_path)
        elif filename.lower().endswith(".docx"):
            candidate_name = os.path.splitext(filename)[0]
            resume_text = extract_text_from_docx(file_path)
        elif filename.lower().endswith(".txt"):
            candidate_name = os.path.splitext(filename)[0]
            with open(file_path, "r") as txt_file:
                resume_text = txt_file.read()
        elif filename.lower().endswith(".zip"):
            candidate_name = os.path.splitext(filename)[0]
            zip_folder = os.path.join(folder_path, candidate_name)
            with zipfile.ZipFile(file_path, "r") as zip_ref:
                zip_ref.extractall(zip_folder)
            resume_texts = []
            for nested_filename in os.listdir(zip_folder):
                nested_file_path = os.path.join(zip_folder, nested_filename)
                if nested_filename.lower().endswith((".pdf", ".docx", ".txt")):
                    nested_resume_text = ""
                    if nested_filename.lower().endswith(".pdf"):
                        nested_resume_text = extract_text_from_pdf(nested_file_path)
                    elif nested_filename.lower().endswith(".docx"):
                        nested_resume_text = extract_text_from_docx(nested_file_path)
                    elif nested_filename.lower().endswith(".txt"):
                        with open(nested_file_path, "r") as txt_file:
                            nested_resume_text = txt_file.read()
                    resume_texts.append(nested_resume_text)
            combined_resume_text = "\n".join(resume_texts)
            cleaned_resume_text = remove_empty_lines(combined_resume_text)
            emails = extract_emails(cleaned_resume_text)
            resumes_data.append((candidate_id, candidate_name, cleaned_resume_text, ", ".join(emails)))
            # Clean up extracted zip folder
            os.remove(file_path)
            os.rmdir(zip_folder)
        else:
            print(f"Unsupported format for file: {filename}")
            continue
        
        cleaned_resume_text = remove_empty_lines(resume_text)
        emails = extract_emails(cleaned_resume_text)
        resumes_data.append((candidate_id,candidate_name, cleaned_resume_text, ", ".join(emails)))

    
    return resumes_data



def main():
    folder_path = input("Enter the path to the folder containing resumes: ")
    resumes_data = process_resumes(folder_path)
    
    # job_description = get_job_description()
    
    # print("\nJob Description:")
    # print(job_description)

    csv_filename = "resumes_data.csv"
    with open(csv_filename, "w", newline="", encoding="utf-8") as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(["Candidate ID","Candidate Name", "Resume Text","Emails"])
        csv_writer.writerows(resumes_data)

    print(f"Data saved to {csv_filename}")

if __name__ == "__main__":
    main()


Data saved to resumes_data.csv


In [1]:
!pip install transformers


Collecting transformers
  Using cached transformers-4.31.0-py3-none-any.whl (7.4 MB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-win_amd64.whl (3.5 MB)
     ---------------------------------------- 3.5/3.5 MB 2.7 MB/s eta 0:00:00
Collecting safetensors>=0.3.1
  Downloading safetensors-0.3.2-cp39-cp39-win_amd64.whl (266 kB)
     -------------------------------------- 266.6/266.6 kB 3.3 MB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.14.1
  Using cached huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers
Successfully installed huggingface-hub-0.16.4 safetensors-0.3.2 tokenizers-0.13.3 transformers-4.31.0


In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

def improve_description(job_description):
    # Load the pre-trained T5 model and tokenizer
    model_name = "t5-large"  # You can change this to a different T5 model if needed
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    tokenizer = T5Tokenizer.from_pretrained(model_name)

    # Preprocess the input for T5
    input_text = "improve: " + job_description
    input_ids = tokenizer.encode(input_text, return_tensors="pt")

    # Generate improved description
    output_ids = model.generate(input_ids, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2)

    # Decode the generated output
    improved_description = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return improved_description

def main():
    job_description = input("Enter the job description: ")
    improved_description = improve_description(job_description)

    print("\nOriginal Job Description:")
    print(job_description)

    print("\nImproved Job Description:")
    print(improved_description)

if __name__ == "__main__":
    main()

Downloading (…)lve/main/config.json: 100%|██████████| 1.21k/1.21k [00:00<00:00, 1.21MB/s]
Downloading model.safetensors:   3%|▎         | 83.9M/2.95G [00:14<08:28, 5.64MB/s]

: 

: 

In [1]:
import torch
from transformers import XLNetTokenizer, XLNetForSequenceClassification,XLNetLMHeadModel

# Load fine-tuned XLNet model and tokenizer
model_name = "xlnet-base-cased"
tokenizer = XLNetTokenizer.from_pretrained(model_name)
model = XLNetLMHeadModel.from_pretrained(model_name)

# Example job title
job_title = "Software Engineer"

# Generate improved job description
input_text = f"Job Title: {job_title}\nImprove the following job description: software engineer c, c++, java, uml, xamp, agile. defence/communications our client is a successful and expanding company developing air defence systems, information systems, targeting systems, communication systems. they now require an additional software engineer to support the design evolution from requirements into software code. identification of test requirements and the development of component test harnesses. supporting design and code review activities to derisk the design evolution qualifications for the software engineer. degree in software engineering, computer science, maths, physics or equivalent good degree pref 1st or ****:1, meng or equiv. essential requirements: software engineer experience in one or more of the following: object oriented analysis and design (preferably  uml)  unix /linux or windows, c, c++, java. html, php candidates from a defence background experience of agile development an advantage. experience of xamp technologies useful. desirable requirements:  software engineer  experience in one or more of the following: realtime design. mathematical or algorithm coding  network protocols  oo case tools  structured design. methods  data and voice networks  computer telephony integration (cti) an appreciation of the following: testing techniques and strategies  configuration management personal attributes flexibility, adaptability, team player, good communication skills, discipline the company provide excellent career prospects and career development recent software graduates will also be considered for these roles. keywords software engineer, c, c++, java, realtime, embedded, uml, linux, unix, oo, agile, xamp. defence, communication systems. salary ****k****k location fleet, hampshire "
input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Generate improved job description using the model
with torch.no_grad():
    output_ids = model.generate(input_ids,max_new_tokens = 300,max_length=300)

# Decode and print the improved job description
improved_description = tokenizer.decode(output_ids[0], skip_special_tokens=True,max_new_tokens = 500)
print(improved_description)

  from .autonotebook import tqdm as notebook_tqdm
Both `max_new_tokens` (=300) and `max_length`(=300) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (-1). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Job Title: Software Engineer Improve the following job description: software engineer c, c++, java, uml, xamp, agile. defence/communications our client is a successful and expanding company developing air defence systems, information systems, targeting systems, communication systems. they now require an additional software engineer to support the design evolution from requirements into software code. identification of test requirements and the development of component test harnesses. supporting design and code review activities to derisk the design evolution qualifications for the software engineer. degree in software engineering, computer science, maths, physics or equivalent good degree pref 1st or ****:1, meng or equiv. essential requirements: software engineer experience in one or more of the following: object oriented analysis and design (preferably uml) unix /linux or windows, c, c++, java. html, php candidates from a defence background experience of agile development an advantag

In [16]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

def paraphrase_text(original_text, model, tokenizer, max_length=500, num_return_sequences=1):
    input_text = "paraphrase: " + original_text
    input_ids = tokenizer.encode(input_text, return_tensors="pt")

    # Generate paraphrases
    output_ids = model.generate(input_ids, max_length=max_length, num_return_sequences=num_return_sequences, no_repeat_ngram_size=2, top_k=50, top_p=0.95)

    paraphrases = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in output_ids]
    return paraphrases

def main():
    # Load the pre-trained T5 model and tokenizer
    model_name = "t5-small"  # You can change this to a different T5 model if needed
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    tokenizer = T5Tokenizer.from_pretrained(model_name)

    original_text = input("Enter the original text: ")
    paraphrases = paraphrase_text(original_text, model, tokenizer)

    print("\nOriginal Text:")
    print(original_text)

    print("\nParaphrases:")
    for i, paraphrase in enumerate(paraphrases, start=1):
        print(f"Paraphrase {i}: {paraphrase}")

if __name__ == "__main__":
    main()



Original Text:
user experience developer our client requires a user experience developer to help develop and maintain new and existing projects. the successful candidate will report directly to the technical lead. the post will be based in the farnham, surrey area. applicants applicants will have 1**** years flex development experience and should be able to provide a portfolio of their work. successful applicants will be those seeking a challenging opportunity with an exciting and expanding organisation and who are enthusiastic, flexible and personable, able to work well on both an individual basis and as part of a development team. the positions require good communication skills, both oral and written. specific technical skills/knowledge actionscript 3 flex adobe air flashbuilder basic technical skills/knowledge oo programming and/or mvc framework (highly desirable) software design using uml (desirable) test driven development (highly desirable) version control (desirable) other skil

In [2]:
!pip install torch


Collecting torch
  Downloading torch-2.0.1-cp311-cp311-win_amd64.whl (172.3 MB)
                                              0.0/172.3 MB ? eta -:--:--
                                              0.0/172.3 MB ? eta -:--:--
                                              0.0/172.3 MB ? eta -:--:--
                                              0.0/172.3 MB ? eta -:--:--
                                              0.0/172.3 MB ? eta -:--:--
                                              0.0/172.3 MB ? eta -:--:--
                                            0.0/172.3 MB 115.9 kB/s eta 0:24:47
                                            0.0/172.3 MB 115.9 kB/s eta 0:24:47
                                            0.0/172.3 MB 115.9 kB/s eta 0:24:47
                                            0.0/172.3 MB 115.9 kB/s eta 0:24:47
                                            0.0/172.3 MB 115.9 kB/s eta 0:24:47
                                            0.0/172.3 MB 115.9 kB/s eta 0:24:47
  


[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
!pip install openai


Collecting openai
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
                                              0.0/73.6 kB ? eta -:--:--
     ----------------                         30.7/73.6 kB ? eta -:--:--
     -------------------------------------  71.7/73.6 kB 787.7 kB/s eta 0:00:01
     -------------------------------------- 73.6/73.6 kB 673.5 kB/s eta 0:00:00
Collecting aiohttp (from openai)
  Downloading aiohttp-3.8.5-cp311-cp311-win_amd64.whl (320 kB)
                                              0.0/320.6 kB ? eta -:--:--
     ----                                    41.0/320.6 kB 2.0 MB/s eta 0:00:01
     -----------                             92.2/320.6 kB 1.3 MB/s eta 0:00:01
     ----------------                       143.4/320.6 kB 1.4 MB/s eta 0:00:01
     -----------------------                194.6/320.6 kB 1.3 MB/s eta 0:00:01
     ------------------------------         256.0/320.6 kB 1.3 MB/s eta 0:00:01
     ------------------------------------   307.2/320.6


[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [18]:
import openai

# Replace 'your_api_key' with your actual GPT-3 API key
api_key = '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'
openai.api_key = api_key

# Prompt for generating text
prompt = "Once upon a time"

# Generate text using GPT-3
response = openai.Completion.create(
    engine="text-davinci-003",  # Specify the engine you want to use
    prompt=prompt,
    max_tokens=50  # Set the maximum number of tokens in the generated output
)

# Print the generated text
print(response.choices[0].text.strip())


RateLimitError: You exceeded your current quota, please check your plan and billing details.