In [1]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumerajviifrs9.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must.

Generated JD:
  "Job Title: Model Validation Specialist - IFRS9

Section 1: Responsibilities
• Conduct regular reviews of IFRS9 models to ensure compliance with regulatory requirements and internal policies
• Identify and document any issues or anomalies found during validation process and work with relevant teams to rectify
• Maintain documentation of all validation activities and results for auditing purposes
• Collaborate with other functions to ensure accurate and timely data input for model inputs and parameters

Section 2: Requirements
• Bachelor's or Master's degree in Finance, Accounting, Mathematics, or a related field
• Familiarity with IFRS9 accounting standards and their application to financial institutions
• Strong analytical skills and attention to detail required
• Proficiency in SAS is re

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Rajvi Doshi_customised_resumerajviifrs9.txt'

In [2]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resume.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Nidhika-Tomar.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must.

Generated JD:
  "Job Description:

Title: Retail Scorecards Model Development

Section 1: Responsibilities
- Develop and implement models to analyze sales data and generate insights for our retail stores
- Collaborate with cross-functional teams to ensure models are aligned with business objectives and strategies
- Monitor model performance and make necessary adjustments for optimal results

Section 2: Requirements
- Bachelor's or Master's degree in Mathematics, Statistics, Computer Science, or a related field
- Strong proficiency in SQL database management
- Experience working with big data analytics tools such as Hadoop, Spark, or Cassandra
- Ability to communicate complex technical information to non-technical stakeholders

Section 3: Preferred Skills
- Knowledge of Python or R for data science a

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Nidhika-Tomar_customised_resume.txt'

In [3]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnab.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must.

Generated JD:
 
Job Title: IFRS9 Model Validation Specialist

Responsibilities:
1. Review and validate financial reports prepared using the IFRS9 standard to ensure accuracy and compliance with regulatory requirements.
2. Identify any errors, inconsistencies or anomalies in financial data and analyze their root causes.
3. Develop and implement effective solutions to address identified issues and improve model validation processes.
4. Collaborate with cross-functional teams, including finance, risk management, and compliance, to ensure alignment with regulatory requirements.

Requirements:
1. Bachelor's or Master's degree in Finance, Accounting, Mathematics or a related field.
2. At least 3 years of experience in IFRS9 model validation or financial reporting.
3. Strong analytical skills and attention to de

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnab.txt'

In [4]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnabifrs91.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must."
resume_path =  r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must.

Generated JD:
 
Job Title: Model Validation Specialist - IFRS9

Job Summary:
We are looking for an experienced professional to join our team as a Model Validation Specialist focused on IFRS9. This individual will be responsible for ensuring that the models used in our financial reporting processes meet regulatory requirements and are accurate and reliable. The candidate must have a strong background in financial analysis, SAS programming skills, and a deep understanding of IFRS9 standards.

Responsibilities:
• Review models used in IFRS9 reporting, including impairment calculations and provisioning
• Validate models for accuracy and reliability based on regulatory requirements
• Identify and document any issues or inconsistencies with models and work with model owners to address them
• Collaborate with fi

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnabifrs91.txt'

In [5]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnabirb1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IRB model validation requirement. Mention that SAS is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IRB model validation requirement. Mention that SAS is a must.

Generated JD:
 
Job Title: Model Validation Specialist - Retail Industry

Responsibilities:
- Ensure the accuracy, reliability and compliance of models used in the retail industry by conducting regular model validations.
- Collaborate with data scientists, business analysts and other stakeholders to develop validation plans and strategies.
- Identify potential risks and issues associated with models and develop mitigation strategies.
- Monitor model performance over time and recommend improvements as needed.
- Generate comprehensive reports and documentation on model validation results for regulatory and compliance purposes.

Requirements:
- Bachelor's degree in a related field such as Mathematics, Statistics, Computer Science or Business Administration.
- At least 3 years of experience in model validation,

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnabirb1.txt'

In [6]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnabirb.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IRB model validation requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IRB model validation requirement. Mention that SQL is a must.

Generated JD:
 
Job Title: Senior Validation Specialist - Retail Model Validation

Job Summary:
We are seeking an experienced Senior Validation Specialist to join our team and lead the validation of our retail models. As a Senior Validation Specialist, you will play a pivotal role in ensuring that our models are accurate, reliable, and compliant with regulatory requirements. This is a fast-paced environment where you will work closely with cross-functional teams to deliver high-quality models. SQL proficiency is a must for this position.

Responsibilities:
• Lead the validation of retail models for our organization
• Collaborate with cross-functional teams, including data scientists, developers, and subject matter experts, to ensure that models are accurate, reliable, and compliant with regulatory requireme

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnabirb.txt'

In [7]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanifrs91.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must.

Generated JD:
 
Job Title: IFRS9 Model Validation Specialist

Section 1: Responsibilities
- Review and validate financial models based on IFRS9 accounting standards to ensure compliance with regulatory guidelines
- Analyze complex financial data using SQL databases and generate reports for management review
- Collaborate with cross-functional teams, including finance, risk management, and operations, to ensure accurate and timely reporting of financial results
- Develop and implement model validation frameworks to improve process efficiency and reduce operational risks
- Conduct regular testing and audits to verify the accuracy and reliability of financial models and data

Section 2: Requirements
- Bachelor's or Master's degree in Finance, Accounting, or a related field
- Minimum of 5 years of experience 

In [13]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanifrs9.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must.

Generated JD:
 
Job Title: IFRS9 Model Validation Specialist

Job Summary: We are seeking an experienced and skilled specialist to join our team of financial experts to validate the effectiveness of IFRS9 models used in our retail business. The ideal candidate will have a strong background in finance, including expertise in SAS programming, and be able to work closely with stakeholders across various departments to ensure accurate and compliant financial reporting.

Key Responsibilities:
• Validate the accuracy and completeness of IFRS9 models used in the retail business
• Identify any deficiencies or gaps in model performance and suggest improvements
• Collaborate with stakeholders to develop corrective actions and implement them effectively
• Ensure compliance with regulatory requirements regarding fina

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumekundanifrs9.txt'

In [9]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanIRB1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IRB model validation requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IRB model validation requirement. Mention that SQL is a must.

Generated JD:
 
Job Description:

Job Title: Retail Model Validation Specialist - SQL Required

Company Overview:
We are seeking a talented Retail Model Validation Specialist to join our team and help us achieve excellence in our model validation efforts. As a member of the retail industry, you will play an integral role in ensuring that our models adhere to regulatory requirements while providing reliable and accurate insights for our business partners.

Responsibilities:
• Conduct regular model validation reviews to ensure compliance with regulatory requirements and best practices.
• Identify any issues or inconsistencies and work with stakeholders to address them promptly and effectively.
• Collaborate with data scientists, analysts, and developers to ensure that models are designed and developed in acco

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumekundanIRB1.txt'

In [10]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanIRB.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IRB model validation requirement. Mention that SAS is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IRB model validation requirement. Mention that SAS is a must.

Generated JD:
  Job Title: Model Validation Specialist - Retail IRB

Job Summary:
The Model Validation Specialist will be responsible for reviewing and validating models utilized in the retail industry, ensuring that they are accurate and compliant with regulatory standards. This position requires a strong background in statistical analysis and SAS programming skills. The ideal candidate will have experience working in a high-pressure environment and thrive under tight deadlines.

Responsibilities:
• Review and validate models utilized in the retail industry to ensure compliance with regulatory standards
• Identify any potential issues or errors and work to resolve them in a timely manner
• Collaborate with cross-functional teams to ensure that model output is accurate and reliable
• Maintain detailed recor

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumekundanIRB.txt'

In [11]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumereemairb.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt =  "Create a JD for a wholesale IRB model development requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Resume - Reema Panday.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a wholesale IRB model development requirement. Mention that SQL is a must.

Generated JD:
 
Job Title: Wholesale IRB Model Development Specialist

Job Summary:
We are seeking an experienced and self-motored Wholesale IRB Model Development Specialist to join our team. The candidate will be responsible for developing and implementing wholesale IRB model solutions, enhancing existing models as needed, and collaborating with cross-functional teams to ensure compliance with regulatory requirements. To succeed in this role, the candidate must have strong SQL skills, as well as a solid understanding of clinical research processes and best practices.

Responsibilities:
• Develop and implement wholesale IRB model solutions for clinical trials
• Enhance existing models as needed to ensure efficiency and compliance
• Collaborate with cross-functional teams, including operations, medical, 

In [12]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumereemairb1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt =  "Create a JD for a wholesale IRB model development requirement. Mention that SAS is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Resume - Reema Panday.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a wholesale IRB model development requirement. Mention that SAS is a must.

Generated JD:
 
Job Title: Wholesale IRB Model Development Specialist

1. Responsibilities:
- Develop and implement new wholesale IRB model to streamline the review process of research proposals for external partners
- Collaborate with cross-functional teams, including researchers, physicians, and administrators, to ensure compliance with regulatory requirements and institutional policies
- Conduct thorough evaluation of proposed studies to assess potential risks and benefits
- Prepare detailed reports on IRB decisions and outcomes
- Facilitate communication with external partners throughout the review process

2. Requirements:
- Bachelor's or master's degree in life sciences, healthcare, or a related field
- Strong analytical skills with attention to detail
- Proficient in SAS programming language for 

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Resume - Reema Panday_customised_resumereemairb1.txt'