In [1]:
import os
import re
import docx
import fitz  # PyMuPDF
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --------------- CONFIG -----------------
RAW_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
JD_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\JDs"
OUTPUT_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\LLM_Customised_Resumes"
SAMPLE_TEXT_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\Kartik\EY_ClientFacing_Resume.txt"
LLM_URL = "http://127.0.0.1:1234/v1/chat/completions"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# --------------- HELPERS -----------------
def read_txt(path):
    with open(path, "r", encoding="utf-8") as f:
        return f.read().strip()

def read_pdf(path):
    doc = fitz.open(path)
    return "\n".join([page.get_text() for page in doc])

def read_docx(path):
    doc = docx.Document(path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

def read_any_resume(path):
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        return read_pdf(path)
    elif ext == ".docx":
        return read_docx(path)
    elif ext == ".txt":
        return read_txt(path)
    return ""

def scrub_pii(text):
    text = re.sub(r'\b\d{10}\b', '', text)  # phone
    text = re.sub(r'\S+@\S+', '', text)     # emails
    text = re.sub(r'http\S+|www.\S+', '', text)  # URLs
    return text

def call_llm(prompt):
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": "mistral",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.4,
        "max_tokens": 1800,
        "stream": False
    }
    try:
        res = requests.post(LLM_URL, headers=headers, json=payload)
        return res.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print(f"[ERROR] LLM Call Failed: {e}")
        return ""

def compute_similarity(a, b):
    vecs = TfidfVectorizer().fit_transform([a, b])
    return round(float(cosine_similarity(vecs[0:1], vecs[1:2])[0][0]) * 100, 2)

def match_best_jd(resume_text, jd_texts):
    best_match = ""
    best_score = -1
    for name, jd in jd_texts:
        sim = compute_similarity(resume_text, jd)
        if sim > best_score:
            best_score = sim
            best_match = (name, jd)
    return best_match

def build_prompt(resume_text, jd_text, sample_text):
    return f"""
You are a resume writing expert.

Format this resume to align with the job description and exactly follow the structure and tone of the sample. Be concise and role-aligned.

[SAMPLE FORMAT]
{sample_text.strip()}

[JOB DESCRIPTION]
{jd_text.strip()}

[RAW RESUME]
{resume_text.strip()}

[FORMATTED OUTPUT]
"""

# --------------- MAIN PIPELINE -----------------
def run_customization_pipeline():
    print("[INFO] Starting resume customization for batch of resumes...")

    sample_text = read_txt(SAMPLE_TEXT_PATH)

    jd_texts = []
    for file in os.listdir(JD_FOLDER):
        path = os.path.join(JD_FOLDER, file)
        if path.endswith(".txt"):
            jd_texts.append((file, read_txt(path)))
        elif path.endswith(".docx"):
            jd_texts.append((file, read_docx(path)))

    if not jd_texts:
        print("[ERROR] No job descriptions found.")
        return

    results = []

    for resume_file in os.listdir(RAW_FOLDER):
        path = os.path.join(RAW_FOLDER, resume_file)
        if not path.lower().endswith((".pdf", ".docx", ".txt")):
            continue

        resume_text = read_any_resume(path)
        resume_text = scrub_pii(resume_text)

        if not resume_text.strip():
            print(f"[SKIPPED] Empty resume: {resume_file}")
            continue

        best_jd_name, best_jd_text = match_best_jd(resume_text, jd_texts)
        if not best_jd_text.strip():
            print(f"[SKIPPED] No matching JD found: {resume_file}")
            continue

        prompt = build_prompt(resume_text, best_jd_text, sample_text)
        llm_output = call_llm(prompt)

        if not llm_output:
            print(f"[ERROR] LLM returned nothing: {resume_file}")
            continue

        output_path = os.path.join(OUTPUT_FOLDER, f"{os.path.splitext(resume_file)[0]}_formatted.txt")
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(llm_output)

        score = compute_similarity(sample_text, llm_output)
        print(f"[DONE] {resume_file} | Match with Sample: {score}%")
        results.append((resume_file, score))

    print("\n[SUMMARY]")
    for name, score in results:
        print(f"{name}: {score}%")

# ------------------ TRIGGER ---------------------
if __name__ == "__main__":
    run_customization_pipeline()


[INFO] Starting resume customization for batch of resumes...
[DONE] Anuska Das.pdf | Match with Sample: 51.75%
[DONE] CV_Aarsh.pdf | Match with Sample: 82.83%
[DONE] CV_Arnab Roy_Model_Validation.pdf | Match with Sample: 45.24%
[DONE] G N Sindhur_resume_2025.pdf | Match with Sample: 60.44%
[DONE] Isha Porwal Resume.pdf | Match with Sample: 80.58%
[DONE] KartikMohan.pdf | Match with Sample: 77.63%
[DONE] Kshitij_Sahdev_CV (002).pdf | Match with Sample: 76.28%
[DONE] Kundan_Kumar.pdf | Match with Sample: 73.53%
[DONE] Munna Choudhary.pdf | Match with Sample: 78.97%
[DONE] Narendra_Sahu_CreditRisk.pdf | Match with Sample: 52.97%
[DONE] Naukri_AbhimanyuSingh[3y_7m].pdf | Match with Sample: 54.71%
[DONE] Naukri_AnandKumar[10y_0m].pdf | Match with Sample: 71.89%
[DONE] Naukri_DEBSUBHRAGHOSH[7y_0m].pdf | Match with Sample: 52.08%
[DONE] Naukri_GirishKhandelwal[11y_0m].docx | Match with Sample: 60.79%
[DONE] Naukri_SAHILPATIL[2y_11m].pdf | Match with Sample: 71.93%
[DONE] Naukri_SusmitaMisra[3

In [None]:
# import os
# import re
# import fitz
# import docx
# import openpyxl
# import requests
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # --------- PATH CONFIGURATION ----------
# RAW_RESUME_DIR = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
# MANUAL_RESUME_DIR = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\ManualResumes"
# OUTPUT_DIR = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\GeneratedResumes"
# EY_SAMPLE_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_format1.txt"
# PROMPT_EXCEL = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\JD_Sentence_Variants.xlsx"
# LLM_URL = "http://127.0.0.1:1234/v1/chat/completions"

# os.makedirs(OUTPUT_DIR, exist_ok=True)

# # --------- UTILITY FUNCTIONS ----------
# def read_txt(path):
#     with open(path, "r", encoding="utf-8") as f:
#         return f.read().strip()

# def read_pdf(path):
#     return "\n".join([page.get_text() for page in fitz.open(path)])

# def read_docx(path):
#     return "\n".join([p.text for p in docx.Document(path).paragraphs if p.text.strip()])

# def read_resume(path):
#     ext = os.path.splitext(path)[1].lower()
#     if ext == ".pdf":
#         return read_pdf(path)
#     elif ext == ".docx":
#         return read_docx(path)
#     elif ext == ".txt":
#         return read_txt(path)
#     return ""

# def scrub_pii(text):
#     text = re.sub(r'\b\d{10}\b', '', text)
#     text = re.sub(r'\S+@\S+', '', text)
#     text = re.sub(r'http\S+|www.\S+', '', text)
#     return text

# def compute_similarity(a, b):
#     vecs = TfidfVectorizer().fit_transform([a, b])
#     return round(float(cosine_similarity(vecs[0:1], vecs[1:2])[0][0]) * 100, 2)

# def call_llm(prompt):
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral",
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.3,
#         "max_tokens": 2000,
#         "stream": False
#     }
#     try:
#         res = requests.post(LLM_URL, headers=headers, json=payload)
#         return res.json()["choices"][0]["message"]["content"].strip()
#     except Exception as e:
#         print(f"[ERROR] LLM call failed: {e}")
#         return ""

# # --------- JD CONSTRUCTION FROM EXCEL ---------
# def load_jd_sentences():
#     wb = openpyxl.load_workbook(PROMPT_EXCEL)
#     sheet = wb.active
#     jd_map = {}

#     for i, row in enumerate(sheet.iter_rows(min_row=2), start=1):
#         resume_name = row[0].value
#         jd_parts = [str(cell.value).strip() for cell in row[1:] if cell.value]
#         if resume_name and jd_parts:
#             jd_text = "\n".join(jd_parts)
#             jd_map[resume_name.strip()] = jd_text
#     return jd_map

# # --------- PROMPT BUILDER ---------
# def build_prompt(resume_text, jd_text, sample_text):
#     return f"""
# You are a resume writing expert.

# Format this resume to align with the job description and exactly follow the structure and tone of the sample. Do not include personal details or redundant education.

# [SAMPLE FORMAT]
# {sample_text}

# [JOB DESCRIPTION]
# {jd_text}

# [RAW RESUME]
# {resume_text}

# [FORMATTED OUTPUT]
# """

# # --------- MAIN PIPELINE ----------
# def run_automation():
#     sample_text = read_txt(EY_SAMPLE_PATH)
#     jd_map = load_jd_sentences()

#     for resume_file in os.listdir(RAW_RESUME_DIR):
#         resume_name = os.path.splitext(resume_file)[0]
#         resume_path = os.path.join(RAW_RESUME_DIR, resume_file)

#         if resume_name not in jd_map:
#             print(f"[SKIPPED] No JD mapped for: {resume_name}")
#             continue

#         resume_text = read_resume(resume_path)
#         resume_text = scrub_pii(resume_text)

#         if not resume_text.strip():
#             print(f"[SKIPPED] Empty resume: {resume_name}")
#             continue

#         jd_text = jd_map[resume_name]
#         prompt = build_prompt(resume_text, jd_text, sample_text)
#         llm_output = call_llm(prompt)

#         output_path = os.path.join(OUTPUT_DIR, f"{resume_name}_customised.txt")
#         with open(output_path, "w", encoding="utf-8") as f:
#             f.write(llm_output)

#         # --- Compare with manual ---
#         manual_path = os.path.join(MANUAL_RESUME_DIR, f"{resume_name}.txt")
#         if os.path.exists(manual_path):
#             manual_text = read_txt(manual_path)
#             score = compute_similarity(llm_output, manual_text)
#             print(f"[DONE] {resume_file} | Match with Manual: {score}%")
#         else:
#             print(f"[NOTE] Manual resume not found for comparison: {resume_name}")

# # --------- TRIGGER ----------
# if __name__ == "__main__":
#     run_automation()


[SKIPPED] No JD mapped for: Anuska Das
[SKIPPED] No JD mapped for: CV_Aarsh
[SKIPPED] No JD mapped for: CV_Arnab Roy_Model_Validation
[SKIPPED] No JD mapped for: G N Sindhur_resume_2025
[SKIPPED] No JD mapped for: Isha Porwal Resume
[SKIPPED] No JD mapped for: KartikMohan
[SKIPPED] No JD mapped for: Kshitij_Sahdev_CV (002)
[SKIPPED] No JD mapped for: Kundan_Kumar
[SKIPPED] No JD mapped for: Munna Choudhary
[SKIPPED] No JD mapped for: Narendra_Sahu_CreditRisk
[SKIPPED] No JD mapped for: Naukri_AbhimanyuSingh[3y_7m]
[SKIPPED] No JD mapped for: Naukri_AnandKumar[10y_0m]
[SKIPPED] No JD mapped for: Naukri_DEBSUBHRAGHOSH[7y_0m]
[SKIPPED] No JD mapped for: Naukri_GirishKhandelwal[11y_0m]
[SKIPPED] No JD mapped for: Naukri_SAHILPATIL[2y_11m]
[SKIPPED] No JD mapped for: Naukri_SusmitaMisra[3y_9m]
[SKIPPED] No JD mapped for: Naukri_Vaibhav[3y_6m]
[SKIPPED] No JD mapped for: Naukri_YashRai[4y_0m]
[SKIPPED] No JD mapped for: Nidhika-Tomar
[SKIPPED] No JD mapped for: Praveen R- Resume
[SKIPPED] No

In [None]:
# import os
# import re
# import fitz  # PyMuPDF
# import docx
# import pandas as pd
# import requests
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # --------------- CONFIG -----------------
# RAW_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
# OUTPUT_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Prompt_Customised_Resumes"
# SAMPLE_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_format1.txt"
# JD_PROMPT_EXCEL = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\JD_Sentence_Variants.xlsx"
# JD_PROMPT_SHEET = "Sheet1"  # update if the sheet has a different name
# JD_PROMPT_COL = "Prompt"   # update if prompt column is named differently

# os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# # --------------- HELPERS -----------------
# def normalize_name(name):
#     name = os.path.splitext(name)[0]
#     name = name.lower()
#     return re.sub(r'[^a-z0-9]', '', name)  # keep only alphanumeric

# def read_txt(path):
#     with open(path, "r", encoding="utf-8") as f:
#         return f.read().strip()

# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_any_resume(path):
#     ext = os.path.splitext(path)[1].lower()
#     if ext == ".pdf":
#         return read_pdf(path)
#     elif ext == ".docx":
#         return read_docx(path)
#     elif ext == ".txt":
#         return read_txt(path)
#     return ""

# def scrub_pii(text):
#     text = re.sub(r'\b\d{10}\b', '', text)
#     text = re.sub(r'\S+@\S+', '', text)
#     text = re.sub(r'http\S+|www.\S+', '', text)
#     return text

# def call_llm(prompt):
#     url = "http://127.0.0.1:1234/v1/chat/completions"
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral",
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.4,
#         "max_tokens": 1800,
#         "stream": False
#     }
#     try:
#         res = requests.post(url, headers=headers, json=payload)
#         return res.json()["choices"][0]["message"]["content"].strip()
#     except Exception as e:
#         print(f"[ERROR] LLM Call Failed: {e}")
#         return ""

# def compute_similarity(a, b):
#     vecs = TfidfVectorizer().fit_transform([a, b])
#     return round(float(cosine_similarity(vecs[0:1], vecs[1:2])[0][0]) * 100, 2)

# def build_prompt(resume_text, jd_prompt, sample_text):
#     return f"""
# You are a resume writing expert.

# Format this resume to align with the job description and exactly follow the structure and tone of the sample. Be concise and role-aligned.

# [SAMPLE FORMAT]
# {sample_text.strip()}

# [JOB DESCRIPTION]
# {jd_prompt.strip()}

# [RAW RESUME]
# {resume_text.strip()}

# [FORMATTED OUTPUT]
# """

# # --------------- MAIN PIPELINE -----------------
# def run_customization_pipeline():
#     print("[INFO] Starting prompt-based resume customization...")

#     sample_text = read_txt(SAMPLE_PATH)

#     # Load JD prompt mapping and normalize keys
#     df = pd.read_excel(JD_PROMPT_EXCEL, sheet_name=JD_PROMPT_SHEET)
#     jd_map = {}
#     for _, row in df.iterrows():
#         raw_key = str(row[0])
#         prompt_text = str(row[JD_PROMPT_COL]) if JD_PROMPT_COL in row else ""
#         if raw_key and prompt_text:
#             normalized_key = normalize_name(raw_key)
#             jd_map[normalized_key] = prompt_text.strip()

#     results = []

#     for resume_file in os.listdir(RAW_FOLDER):
#         path = os.path.join(RAW_FOLDER, resume_file)
#         if not path.lower().endswith((".pdf", ".docx", ".txt")):
#             continue

#         resume_text = read_any_resume(path)
#         resume_text = scrub_pii(resume_text)

#         if not resume_text.strip():
#             print(f"[SKIPPED] Empty resume: {resume_file}")
#             continue

#         resume_key = normalize_name(resume_file)
#         if resume_key not in jd_map:
#             print(f"[SKIPPED] No JD mapped for: {resume_file}")
#             continue

#         jd_prompt = jd_map[resume_key]
#         prompt = build_prompt(resume_text, jd_prompt, sample_text)
#         llm_output = call_llm(prompt)

#         if not llm_output:
#             print(f"[ERROR] LLM returned nothing for: {resume_file}")
#             continue

#         output_path = os.path.join(OUTPUT_FOLDER, f"{resume_key}_customized.txt")
#         with open(output_path, "w", encoding="utf-8") as f:
#             f.write(llm_output)

#         score = compute_similarity(sample_text, llm_output)
#         print(f"[DONE] {resume_file} | Match with Sample: {score}%")
#         results.append((resume_file, score))

#     print("\n[SUMMARY]")
#     for name, score in results:
#         print(f"{name}: {score}%")

# # --------------- TRIGGER -----------------
# if __name__ == "__main__":
#     run_customization_pipeline()


[INFO] Starting prompt-based resume customization...


ValueError: Worksheet named 'Sheet1' not found

In [None]:
# import os
# import re
# import fitz  # PyMuPDF
# import docx
# import requests
# import pandas as pd
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # ---------------- CONFIGURATION ----------------
# RAW_RESUME_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
# OUTPUT_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\LLM_Customised_Resumes"
# SAMPLE_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_format1.txt"
# JD_PROMPT_EXCEL = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\JD_Sentence_Variants.xlsx"
# JD_PROMPT_SHEET = 'in'  # Corrected sheet name
# LLM_URL = "http://127.0.0.1:1234/v1/chat/completions"

# os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# # ---------------- HELPERS ----------------

# def read_txt(path):
#     with open(path, "r", encoding="utf-8") as f:
#         return f.read().strip()

# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_any_resume(path):
#     ext = os.path.splitext(path)[1].lower()
#     if ext == ".pdf":
#         return read_pdf(path)
#     elif ext == ".docx":
#         return read_docx(path)
#     elif ext == ".txt":
#         return read_txt(path)
#     return ""

# def scrub_pii(text):
#     text = re.sub(r'\b\d{10}\b', '', text)
#     text = re.sub(r'\S+@\S+', '', text)
#     text = re.sub(r'http\S+|www.\S+', '', text)
#     return text.strip()

# def call_llm(prompt):
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral",
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.4,
#         "max_tokens": 1800,
#         "stream": False
#     }
#     try:
#         res = requests.post(LLM_URL, headers=headers, json=payload)
#         return res.json()["choices"][0]["message"]["content"].strip()
#     except Exception as e:
#         print(f"[ERROR] LLM call failed: {e}")
#         return ""

# def compute_similarity(a, b):
#     vecs = TfidfVectorizer().fit_transform([a, b])
#     return round(float(cosine_similarity(vecs[0:1], vecs[1:2])[0][0]) * 100, 2)

# def normalize_name(name):
#     return re.sub(r'[^a-zA-Z0-9]', '', name.lower())

# # ---------------- PROMPT BUILDING ----------------

# def build_prompt(resume_text, jd_prompt, sample_text):
#     return f"""
# You are a resume customization expert.

# Using the below sample format and tone, customize the raw resume strictly based on the job description and keep the formatting style of the sample.

# [SAMPLE FORMAT]
# {sample_text}

# [JOB DESCRIPTION]
# {jd_prompt}

# [RAW RESUME]
# {resume_text}

# [FORMATTED OUTPUT]
# """

# # ---------------- MAIN ----------------

# def run_customization_pipeline():
#     print("[INFO] Starting prompt-based resume customization...")

#     sample_text = read_txt(SAMPLE_PATH)

#     # Load JD prompt mapping and normalize keys
#     df = pd.read_excel(JD_PROMPT_EXCEL, sheet_name=JD_PROMPT_SHEET)
#     jd_map = {}
#     for _, row in df.iterrows():
#         raw_name = row['Resume File Name']
#         jd_prompt = row['JD Prompt']
#         norm_name = normalize_name(os.path.splitext(raw_name)[0])
#         jd_map[norm_name] = jd_prompt

#     results = []

#     for file in os.listdir(RAW_RESUME_FOLDER):
#         if not file.lower().endswith((".pdf", ".docx", ".txt")):
#             continue

#         resume_path = os.path.join(RAW_RESUME_FOLDER, file)
#         resume_text = scrub_pii(read_any_resume(resume_path))

#         norm_name = normalize_name(os.path.splitext(file)[0])
#         jd_prompt = jd_map.get(norm_name, "")

#         if not jd_prompt:
#             print(f"[SKIPPED] No JD mapped for: {file}")
#             continue

#         prompt = build_prompt(resume_text, jd_prompt, sample_text)
#         llm_output = call_llm(prompt)

#         if not llm_output.strip():
#             print(f"[ERROR] No output from LLM for: {file}")
#             continue

#         output_path = os.path.join(OUTPUT_FOLDER, f"{os.path.splitext(file)[0]}_customized.txt")
#         with open(output_path, "w", encoding="utf-8") as f:
#             f.write(llm_output)

#         match_score = compute_similarity(sample_text, llm_output)
#         print(f"[DONE] {file} | Match Score with Sample: {match_score}%")
#         results.append((file, match_score))

#     print("\n[SUMMARY]")
#     for name, score in results:
#         print(f"{name}: {score}%")

# # ---------------- EXECUTE ----------------

# if __name__ == "__main__":
#     run_customization_pipeline()


[INFO] Starting prompt-based resume customization...


KeyError: 'Resume File Name'

In [None]:
# import os
# import re
# import docx
# import fitz  # PyMuPDF
# import requests
# import pandas as pd
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # ---------------- CONFIG -----------------
# RAW_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
# OUTPUT_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\LLM_Customised_Resumes"
# SAMPLE_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\\raw and ey curated samples\EY_sample_format1.txt"
# JD_PROMPT_EXCEL = r"C:\Users\HN338QQ\OneDrive - EY\Documents\\raw and ey curated samples\JD_Sentence_Variants.xlsx"
# JD_PROMPT_SHEET = "in"
# LLM_URL = "http://127.0.0.1:1234/v1/chat/completions"
# os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# # ---------------- HELPERS ----------------
# def read_txt(path):
#     with open(path, "r", encoding="utf-8") as f:
#         return f.read().strip()

# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_any_resume(path):
#     ext = os.path.splitext(path)[1].lower()
#     if ext == ".pdf":
#         return read_pdf(path)
#     elif ext == ".docx":
#         return read_docx(path)
#     elif ext == ".txt":
#         return read_txt(path)
#     return ""

# def scrub_pii(text):
#     text = re.sub(r'\b\d{10}\b', '', text)
#     text = re.sub(r'\S+@\S+', '', text)
#     text = re.sub(r'http\S+|www.\S+', '', text)
#     return text

# def normalize_name(name):
#     return re.sub(r'\W+', '', name).lower()

# def call_llm(prompt):
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral",
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.4,
#         "max_tokens": 1800,
#         "stream": False
#     }
#     try:
#         res = requests.post(LLM_URL, headers=headers, json=payload)
#         return res.json()["choices"][0]["message"]["content"].strip()
#     except Exception as e:
#         print(f"[ERROR] LLM call failed: {e}")
#         return ""

# def compute_similarity(a, b):
#     vecs = TfidfVectorizer().fit_transform([a, b])
#     return round(float(cosine_similarity(vecs[0:1], vecs[1:2])[0][0]) * 100, 2)

# def build_prompt(resume_text, jd_prompt, sample_text):
#     return f"""
# You are a resume writing expert.

# Format this resume to align with the job description and exactly follow the structure and tone of the sample. Be concise and role-aligned.

# [SAMPLE FORMAT]
# {sample_text.strip()}

# [JOB DESCRIPTION]
# {jd_prompt.strip()}

# [RAW RESUME]
# {resume_text.strip()}

# [FORMATTED OUTPUT]
# """

# # ---------------- PIPELINE ----------------
# def run_customization_pipeline():
#     print("[INFO] Starting resume customization for batch of resumes...\n")

#     sample_text = read_txt(SAMPLE_PATH)

#     # Load JD prompt mapping and normalize keys
#     df = pd.read_excel(JD_PROMPT_EXCEL, sheet_name=JD_PROMPT_SHEET)
#     jd_map = {}
#     for _, row in df.iterrows():
#         raw_name = str(row['Resume'])  # Excel column name
#         jd_prompt = str(row['Prompt'])  # Excel column name
#         norm_name = normalize_name(os.path.splitext(raw_name)[0])
#         jd_map[norm_name] = jd_prompt

#     results = []

#     for file in os.listdir(RAW_FOLDER):
#         resume_path = os.path.join(RAW_FOLDER, file)
#         if not resume_path.lower().endswith((".pdf", ".docx", ".txt")):
#             continue

#         raw_text = read_any_resume(resume_path)
#         scrubbed = scrub_pii(raw_text)

#         base_name = normalize_name(os.path.splitext(file)[0])
#         jd_prompt = jd_map.get(base_name)

#         if not jd_prompt:
#             print(f"[SKIPPED] No JD mapped for: {file}")
#             continue

#         prompt = build_prompt(scrubbed, jd_prompt, sample_text)
#         llm_output = call_llm(prompt)

#         if not llm_output:
#             print(f"[ERROR] LLM returned nothing: {file}")
#             continue

#         out_path = os.path.join(OUTPUT_FOLDER, f"{os.path.splitext(file)[0]}_formatted.txt")
#         with open(out_path, "w", encoding="utf-8") as f:
#             f.write(llm_output)

#         sim_score = compute_similarity(sample_text, llm_output)
#         print(f"[DONE] {file} | Match with Sample: {sim_score}%")
#         results.append((file, sim_score))

#     print("\n[SUMMARY]")
#     for name, score in results:
#         print(f"{name}: {score}%")

# # ---------------- EXECUTE ----------------
# if __name__ == "__main__":
#     run_customization_pipeline()


[INFO] Starting resume customization for batch of resumes...



KeyError: 'Resume'

In [None]:
# import os
# import re
# import docx
# import fitz  # PyMuPDF
# import requests
# import pandas as pd
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # ---------------- CONFIG ----------------
# RAW_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
# OUTPUT_FOLDER = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Prompt_Customised_Resumes"
# SAMPLE_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_format1.txt"
# JD_PROMPT_EXCEL = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\JD_Sentence_Variants.xlsx"
# JD_PROMPT_SHEET = "in"
# LLM_URL = "http://127.0.0.1:1234/v1/chat/completions"

# os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# # ---------------- HELPERS ----------------
# def read_txt(path):
#     with open(path, "r", encoding="utf-8") as f:
#         return f.read().strip()

# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_any_resume(path):
#     ext = os.path.splitext(path)[1].lower()
#     if ext == ".pdf":
#         return read_pdf(path)
#     elif ext == ".docx":
#         return read_docx(path)
#     elif ext == ".txt":
#         return read_txt(path)
#     return ""

# def scrub_pii(text):
#     text = re.sub(r'\b\d{10}\b', '', text)
#     text = re.sub(r'\S+@\S+', '', text)
#     text = re.sub(r'http\S+|www.\S+', '', text)
#     return text

# def call_llm(prompt):
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral",
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.4,
#         "max_tokens": 1800,
#         "stream": False
#     }
#     try:
#         res = requests.post(LLM_URL, headers=headers, json=payload)
#         return res.json()["choices"][0]["message"]["content"].strip()
#     except Exception as e:
#         print(f"[ERROR] LLM Call Failed: {e}")
#         return ""

# def compute_similarity(a, b):
#     vecs = TfidfVectorizer().fit_transform([a, b])
#     return round(float(cosine_similarity(vecs[0:1], vecs[1:2])[0][0]) * 100, 2)

# def normalize_name(name):
#     return re.sub(r'[^a-z0-9]', '', name.lower())

# # ---------------- PROMPT CONSTRUCTION ----------------
# def build_prompt(resume_text, prompt_text, sample_text):
#     return f"""
# You are a resume writing expert.

# Format this resume to align with the following prompt and exactly follow the structure and tone of the sample. Be concise and role-aligned.

# [SAMPLE FORMAT]
# {sample_text.strip()}

# [JD PROMPT]
# {prompt_text.strip()}

# [RAW RESUME]
# {resume_text.strip()}

# [FORMATTED OUTPUT]
# """

# # ---------------- MAIN PIPELINE ----------------
# def run_customization_pipeline():
#     print("[INFO] Starting resume customization using JD prompts...")

#     sample_text = read_txt(SAMPLE_PATH)

#     # Load JD prompt mapping and normalize keys
#     df = pd.read_excel(JD_PROMPT_EXCEL, sheet_name=JD_PROMPT_SHEET)
#     jd_map = {}
#     for _, row in df.iterrows():
#         raw_name = row['JD Resume Mapping']
#         jd_prompt = row['Prompt']
#         norm_name = normalize_name(os.path.splitext(raw_name)[0])
#         jd_map[norm_name] = jd_prompt

#     results = []

#     for file in os.listdir(RAW_FOLDER):
#         path = os.path.join(RAW_FOLDER, file)
#         if not path.lower().endswith((".pdf", ".docx", ".txt")):
#             continue

#         resume_text = read_any_resume(path)
#         resume_text = scrub_pii(resume_text)

#         if not resume_text.strip():
#             print(f"[SKIPPED] Empty resume: {file}")
#             continue

#         norm_resume = normalize_name(os.path.splitext(file)[0])
#         if norm_resume not in jd_map:
#             print(f"[SKIPPED] No JD mapped for: {file}")
#             continue

#         prompt_text = jd_map[norm_resume]
#         prompt = build_prompt(resume_text, prompt_text, sample_text)
#         llm_output = call_llm(prompt)

#         if not llm_output:
#             print(f"[ERROR] LLM failed for: {file}")
#             continue

#         output_path = os.path.join(OUTPUT_FOLDER, f"{os.path.splitext(file)[0]}_customized.txt")
#         with open(output_path, "w", encoding="utf-8") as f:
#             f.write(llm_output)

#         score = compute_similarity(sample_text, llm_output)
#         print(f"[DONE] {file} | Match with Sample: {score}%")
#         results.append((file, score))

#     print("\n[SUMMARY]")
#     for name, score in results:
#         print(f"{name}: {score}%")

# # ---------------- EXECUTE ----------------
# if __name__ == "__main__":
#     run_customization_pipeline()


[INFO] Starting resume customization using JD prompts...


KeyError: 'JD Resume Mapping'

In [None]:
# import os
# import json
# import requests
# import time
# import zipfile
# import re
# import docx
# import fitz  # PyMuPDF
# import pandas as pd
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # === PATHS ===
# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
# jd_excel_path = os.path.join(base_path, "JD_Sentence_Variants.xlsx")
# ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
# raw_zip = os.path.join(base_path, "Rawresumes.zip")
# manual_zip = os.path.join(base_path, "manualcustomisedresumes.zip")

# jd_output_dir = os.path.join(base_path, "generated_jds")
# custom_output_dir = os.path.join(base_path, "customized_resumes")
# log_output_dir = os.path.join(base_path, "match_logs")

# os.makedirs(jd_output_dir, exist_ok=True)
# os.makedirs(custom_output_dir, exist_ok=True)
# os.makedirs(log_output_dir, exist_ok=True)

# # === JD GENERATION (LM STUDIO) ===
# def generate_jd(prompt):
#     url = "http://localhost:1234/v1/chat/completions"
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral-7b-instruct-v0.3-q4_k_m-gguf",
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.7
#     }
#     try:
#         res = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60)
#         res.raise_for_status()
#         return res.json()['choices'][0]['message']['content'].strip()
#     except Exception as e:
#         print(f"LM Studio error for prompt: {prompt[:60]}... -> {e}")
#         return ""

# def generate_all_jds():
#     df = pd.read_excel(jd_excel_path)
#     for i, row in df.iterrows():
#         prompt = str(row[0]).strip()
#         if not prompt:
#             continue
#         print(f"Generating JD_{i+1}.txt")
#         jd_text = generate_jd(prompt)
#         if jd_text:
#             with open(os.path.join(jd_output_dir, f"JD_{i+1}.txt"), "w", encoding="utf-8") as f:
#                 f.write(jd_text)
#         time.sleep(1)

# # === FILE LOADERS ===
# def extract_zip(zip_path, extract_to):
#     os.makedirs(extract_to, exist_ok=True)
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)

# def read_pdf(file_path):
#     doc = fitz.open(file_path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx(file_path):
#     doc = docx.Document(file_path)
#     return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])

# def load_resumes(folder):
#     resumes = {}
#     for file in os.listdir(folder):
#         path = os.path.join(folder, file)
#         name = os.path.splitext(file)[0]
#         try:
#             if file.endswith(".pdf"):
#                 resumes[name] = read_pdf(path)
#             elif file.endswith(".docx"):
#                 resumes[name] = read_docx(path)
#         except Exception as e:
#             print(f"Error reading {file}: {e}")
#     return resumes

# # === TEXT MATCHING ===
# def get_most_relevant_sections(jd_text, resume_text, top_k=5):
#     sentences = [s.strip() for s in resume_text.split('\n') if len(s.strip()) > 20]
#     all_texts = [jd_text] + sentences
#     tfidf = TfidfVectorizer().fit_transform(all_texts)
#     scores = cosine_similarity(tfidf[0:1], tfidf[1:]).flatten()
#     top_indices = scores.argsort()[-top_k:][::-1]
#     return "\n".join([sentences[i] for i in top_indices]), float(scores[top_indices].mean())

# # === FORMAT OUTPUT ===
# def format_resume(ey_template_text, injected_text):
#     return ey_template_text.replace("[EXPERIENCE_PLACEHOLDER]", injected_text)

# # === MAIN PIPELINE ===
# def run_pipeline():
#     # Extract raw and manual resumes
#     raw_path = os.path.join(base_path, "raw_resume_extracted")
#     manual_path = os.path.join(base_path, "manual_resume_extracted")
#     extract_zip(raw_zip, raw_path)
#     extract_zip(manual_zip, manual_path)

#     # Load data
#     raw_resumes = load_resumes(raw_path)
#     manual_resumes = load_resumes(manual_path)

#     with open(ey_format_path, "r", encoding="utf-8") as f:
#         ey_template = f.read()

#     # Loop through each JD and match
#     for jd_file in os.listdir(jd_output_dir):
#         jd_path = os.path.join(jd_output_dir, jd_file)
#         jd_name = os.path.splitext(jd_file)[0]
#         with open(jd_path, "r", encoding="utf-8") as f:
#             jd_text = f.read()

#         for raw_name, raw_text in raw_resumes.items():
#             matched_text, match_score = get_most_relevant_sections(jd_text, raw_text)
#             customized_resume = format_resume(ey_template, matched_text)
#             out_path = os.path.join(custom_output_dir, f"{jd_name}__{raw_name}.txt")
#             with open(out_path, "w", encoding="utf-8") as f:
#                 f.write(customized_resume)

#             # Match against manual
#             manual_match_text = manual_resumes.get(raw_name, "")
#             sim_to_manual, _ = get_most_relevant_sections(customized_resume, manual_match_text)

#             # Save log
#             log_data = {
#                 "JD": jd_name,
#                 "Candidate": raw_name,
#                 "AutoGenerated_vs_Raw_Score": round(match_score, 4),
#                 "AutoGenerated_vs_Manual_Score": round(_, 4)
#             }
#             log_file = os.path.join(log_output_dir, f"{jd_name}__{raw_name}.json")
#             with open(log_file, "w") as lf:
#                 json.dump(log_data, lf, indent=2)

# # === RUN ALL ===
# if __name__ == "__main__":
#     print("STEP 1: Generating all JDs from prompts using LM Studio...")
#     generate_all_jds()

#     print("STEP 2 to 5: Running full resume customization pipeline...")
#     run_pipeline()
#     print("Done. Customized resumes and logs are saved.")


STEP 1: Generating all JDs from prompts using LM Studio...


  prompt = str(row[0]).strip()


Generating JD_1.txt
LM Studio error for prompt: Create a JD for a retail IRB model development requirement. ... -> HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=60)
Generating JD_2.txt
LM Studio error for prompt: Create a JD for a retail IRB model development requirement. ... -> HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=60)
Generating JD_3.txt
LM Studio error for prompt: Create a JD for a retail IRB model development requirement. ... -> HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=60)
Generating JD_4.txt
LM Studio error for prompt: Create a JD for a retail IRB model development requirement. ... -> HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=60)
Generating JD_5.txt
LM Studio error for prompt: Create a JD for a retail IRB model validation requirement. M... -> HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=60)
Generating

In [1]:
import os
import json
import time
import re
import fitz  # PyMuPDF
import docx
import zipfile
import pandas as pd
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --------------------- CONFIGURATION ---------------------

base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
jd_excel_path = os.path.join(base_path, "JD_Sentence_Variants.xlsx")
jd_output_dir = os.path.join(base_path, "JDs")
manual_resume_zip = os.path.join(base_path, "manualcustomisedresumes.zip")
manual_resume_dir = os.path.join(base_path, "manualcustomisedresumes")
raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
raw_resume_dir = os.path.join(base_path, "rawresumes")
ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
customised_output_dir = os.path.join(base_path, "customised_resumes")
logs_dir = os.path.join(base_path, "logs")

for folder in [jd_output_dir, manual_resume_dir, raw_resume_dir, customised_output_dir, logs_dir]:
    os.makedirs(folder, exist_ok=True)

# --------------------- FILE UTILS ---------------------

def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def read_pdf_text(pdf_path):
    doc = fitz.open(pdf_path)
    return "\n".join([page.get_text() for page in doc])

def read_docx_text(docx_path):
    doc = docx.Document(docx_path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

def read_resume(path):
    if path.endswith(".pdf"):
        return read_pdf_text(path)
    elif path.endswith(".docx"):
        return read_docx_text(path)
    else:
        return ""

# --------------------- TEXT UTILS ---------------------

def clean_text(text):
    return re.sub(r'\s+', ' ', text.strip().lower())

def get_similarity(text1, text2):
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# --------------------- JD GENERATION ---------------------

def generate_jds_from_prompts():
    df = pd.read_excel(jd_excel_path)
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    model = "mistral-7b-instruct-v0.3-q4_k_m-gguf"

    for idx, row in df.iterrows():
        prompt = str(row.iloc[0]).strip()
        print(f"Generating JD_{idx + 1}.txt")
        try:
            payload = {
                "model": model,
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0.7,
                "stream": False
            }
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            jd_text = response.json()['choices'][0]['message']['content']
            with open(os.path.join(jd_output_dir, f"JD_{idx+1}.txt"), "w", encoding="utf-8") as f:
                f.write(jd_text)
        except Exception as e:
            print(f"LM Studio error for prompt: {prompt[:80]}... ->", e)
        time.sleep(2)

# --------------------- CUSTOMISATION ---------------------

def customize_resume(raw_text, jd_text, ey_template):
    raw_sentences = raw_text.split(". ")
    matched = []
    for sentence in raw_sentences:
        score = get_similarity(sentence, jd_text)
        if score > 0.15:
            matched.append(sentence.strip())
    final_experience = "\n".join(matched).strip()
    return ey_template.replace("[EXPERIENCE_PLACEHOLDER]", final_experience)

# --------------------- PIPELINE ---------------------

def full_pipeline():
    print("STEP 1: Generating JDs...")
    generate_jds_from_prompts()

    print("STEP 2: Extracting resumes...")
    extract_zip(manual_resume_zip, manual_resume_dir)
    extract_zip(raw_resume_zip, raw_resume_dir)

    ey_template = open(ey_format_path, "r", encoding="utf-8").read()

    raw_resumes = {
        os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
        for root, _, files in os.walk(raw_resume_dir)
        for f in files
    }

    manual_resumes = {
        os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
        for root, _, files in os.walk(manual_resume_dir)
        for f in files
    }

    for jd_file in sorted(os.listdir(jd_output_dir)):
        jd_index = os.path.splitext(jd_file)[0].split("_")[1]
        jd_path = os.path.join(jd_output_dir, jd_file)
        jd_text = open(jd_path, "r", encoding="utf-8").read()

        for raw_name, raw_text in raw_resumes.items():
            customised = customize_resume(raw_text, jd_text, ey_template)
            output_path = os.path.join(customised_output_dir, f"{raw_name}_JD{jd_index}.txt")
            with open(output_path, "w", encoding="utf-8") as f:
                f.write(customised)

            # Validation
            match_key = next((k for k in manual_resumes if raw_name.lower() in k.lower()), None)
            if match_key:
                manual_text = manual_resumes[match_key]
                score = get_similarity(customised, manual_text)
                log = f"{raw_name}_JD{jd_index}: Score={score:.2f}"
                print(log)
                with open(os.path.join(logs_dir, f"log_JD{jd_index}.txt"), "a", encoding="utf-8") as logf:
                    logf.write(log + "\n")

# --------------------- RUN ---------------------

if __name__ == "__main__":
    full_pipeline()


STEP 1: Generating JDs...
Generating JD_1.txt
Generating JD_2.txt
Generating JD_3.txt
Generating JD_4.txt
Generating JD_5.txt
Generating JD_6.txt
Generating JD_7.txt
Generating JD_8.txt
LM Studio error for prompt: Create a JD for a retail IRB model validation requirement. Mention that SQL is a... -> HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=180)
Generating JD_9.txt
Generating JD_10.txt
Generating JD_11.txt
Generating JD_12.txt
Generating JD_13.txt
Generating JD_14.txt
Generating JD_15.txt
Generating JD_16.txt
Generating JD_17.txt
Generating JD_18.txt
Generating JD_19.txt
Generating JD_20.txt
Generating JD_21.txt
Generating JD_22.txt
Generating JD_23.txt
Generating JD_24.txt
Generating JD_25.txt
Generating JD_26.txt
Generating JD_27.txt
Generating JD_28.txt
Generating JD_29.txt
Generating JD_30.txt
Generating JD_31.txt
Generating JD_32.txt
Generating JD_33.txt
Generating JD_34.txt
Generating JD_35.txt
Generating JD_36.txt
Generating JD_37.txt
Generat

AttributeError: module 'fitz' has no attribute 'open'

In [None]:
# import os
# import zipfile
# import fitz  # PyMuPDF
# import docx
# import re
# import pandas as pd
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # --------------------- CONFIGURATION ---------------------

# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
# jd_output_dir = os.path.join(base_path, "JDs")
# manual_resume_zip = os.path.join(base_path, "manualcustomisedresumes.zip")
# manual_resume_dir = os.path.join(base_path, "manualcustomisedresumes")
# raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
# raw_resume_dir = os.path.join(base_path, "rawresumes")
# ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
# customised_output_dir = os.path.join(base_path, "customised_resumes")
# logs_dir = os.path.join(base_path, "logs")

# for folder in [manual_resume_dir, raw_resume_dir, customised_output_dir, logs_dir]:
#     os.makedirs(folder, exist_ok=True)

# # --------------------- UTILS ---------------------

# def extract_zip(zip_path, extract_to):
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)

# def read_pdf_text(pdf_path):
#     doc = fitz.open(pdf_path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx_text(docx_path):
#     doc = docx.Document(docx_path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf_text(path)
#     elif path.endswith(".docx"):
#         return read_docx_text(path)
#     else:
#         return ""

# def get_similarity(text1, text2):
#     vectorizer = TfidfVectorizer().fit_transform([text1, text2])
#     vectors = vectorizer.toarray()
#     return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# def customize_resume(raw_text, jd_text, ey_template):
#     raw_sentences = raw_text.split(". ")
#     matched = []
#     for sentence in raw_sentences:
#         score = get_similarity(sentence, jd_text)
#         if score > 0.15:
#             matched.append(sentence.strip())
#     final_experience = "\n".join(matched).strip()
#     return ey_template.replace("[EXPERIENCE_PLACEHOLDER]", final_experience)

# # --------------------- MAIN CUSTOMIZATION PIPELINE ---------------------

# def customise_resumes_from_existing_JDs():
#     print("STEP 2: Extracting Resumes...")
#     extract_zip(raw_resume_zip, raw_resume_dir)
#     extract_zip(manual_resume_zip, manual_resume_dir)

#     ey_template = open(ey_format_path, "r", encoding="utf-8").read()

#     raw_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(raw_resume_dir)
#         for f in files
#     }

#     manual_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(manual_resume_dir)
#         for f in files
#     }

#     print("STEP 3: Customising Resumes with JDs...")
#     for jd_file in sorted(os.listdir(jd_output_dir)):
#         jd_index = os.path.splitext(jd_file)[0].split("_")[1]
#         jd_path = os.path.join(jd_output_dir, jd_file)
#         jd_text = open(jd_path, "r", encoding="utf-8").read()

#         for raw_name, raw_text in raw_resumes.items():
#             customised = customize_resume(raw_text, jd_text, ey_template)
#             output_path = os.path.join(customised_output_dir, f"{raw_name}_JD{jd_index}.txt")
#             with open(output_path, "w", encoding="utf-8") as f:
#                 f.write(customised)

#             # Validation: compare with manual resume
#             match_key = next((k for k in manual_resumes if raw_name.lower() in k.lower()), None)
#             if match_key:
#                 manual_text = manual_resumes[match_key]
#                 score = get_similarity(customised, manual_text)
#                 log = f"{raw_name}_JD{jd_index}: Score={score:.2f}"
#                 print(log)
#                 with open(os.path.join(logs_dir, f"log_JD{jd_index}.txt"), "a", encoding="utf-8") as logf:
#                     logf.write(log + "\n")

# # --------------------- RUN ---------------------

# if __name__ == "__main__":
#     customise_resumes_from_existing_JDs()


STEP 2: Extracting Resumes...
STEP 3: Customising Resumes with JDs...
Anuska Das_JD1: Score=0.00
CV_Aarsh_JD1: Score=0.63
CV_Arnab Roy_Model_Validation_JD1: Score=0.55
G N Sindhur_resume_2025_JD1: Score=0.65
Isha Porwal Resume_JD1: Score=0.62
KartikMohan_JD1: Score=0.73
Kshitij_Sahdev_CV (002)_JD1: Score=0.53
Kundan_Kumar_JD1: Score=0.70
Munna Choudhary_JD1: Score=0.00
Narendra_Sahu_CreditRisk_JD1: Score=0.68
Naukri_AbhimanyuSingh[3y_7m]_JD1: Score=0.65
Naukri_AnandKumar[10y_0m]_JD1: Score=0.58
Naukri_DEBSUBHRAGHOSH[7y_0m]_JD1: Score=0.63
Naukri_GirishKhandelwal[11y_0m]_JD1: Score=0.73
Naukri_SAHILPATIL[2y_11m]_JD1: Score=0.69
Naukri_SusmitaMisra[3y_9m]_JD1: Score=0.63
Naukri_Vaibhav[3y_6m]_JD1: Score=0.61
Naukri_YashRai[4y_0m]_JD1: Score=0.63
Nidhika-Tomar_JD1: Score=0.53
Praveen R- Resume_JD1: Score=0.72
Priyajit Bishayee resume 2025_JD1: Score=0.72
Rajvi Doshi_JD1: Score=0.65
Resume - Reema Panday_JD1: Score=0.52
Resume.SautrikGanguly_JD1: Score=0.51
Resume_Rahul_kushwaha_JD1: Score

In [1]:
import os
import zipfile
import fitz  # PyMuPDF
import docx
import re
import requests
import time

# --------------------- CONFIGURATION ---------------------

base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
jd_dir = os.path.join(base_path, "JDs")
manual_resume_zip = os.path.join(base_path, "manualcustomisedresumes.zip")
raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
output_dir = os.path.join(base_path, "customised_resumes_llm")
temp_raw_dir = os.path.join(base_path, "temp_raw")
os.makedirs(output_dir, exist_ok=True)
os.makedirs(temp_raw_dir, exist_ok=True)

# --------------------- UTILS ---------------------

def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def read_pdf_text(pdf_path):
    doc = fitz.open(pdf_path)
    return "\n".join([page.get_text() for page in doc])

def read_docx_text(docx_path):
    doc = docx.Document(docx_path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

def read_resume(path):
    if path.endswith(".pdf"):
        return read_pdf_text(path)
    elif path.endswith(".docx"):
        return read_docx_text(path)
    elif path.endswith(".txt"):
        with open(path, 'r', encoding='utf-8') as f:
            return f.read()
    return ""

def split_sentences(text):
    return re.split(r'(?<=[.;])\s+', text)

def call_lmstudio_llm(jd_text, resume_bullets):
    prompt = f"""
You are a resume customization assistant for a consulting firm. Your job is to:
1. Select only relevant resume bullet points that match the job description (JD).
2. Rephrase those points to align with the tone and keywords of the JD.
3. Output in plain, professional bullet points.

JD:
{jd_text}

Candidate Resume:
{resume_bullets}

Final Output (only bullet points in EY tone):
"""

    try:
        response = requests.post(
            "http://localhost:1234/v1/chat",
            headers={"Content-Type": "application/json"},
            json={
                "model": "mistral",  # Your actual model name/alias from LM Studio
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0.5,
                "max_tokens": 1024
            },
            timeout=180
        )
        result = response.json()
        return result['choices'][0]['message']['content'].strip()
    except Exception as e:
        print("LM Studio error:", e)
        return ""

# --------------------- MAIN FUNCTION ---------------------

def customize_resumes_with_lmstudio():
    print("STEP 1: Extracting resumes...")
    extract_zip(raw_resume_zip, temp_raw_dir)
    with open(ey_format_path, "r", encoding="utf-8") as f:
        ey_template = f.read()

    jd_files = [f for f in os.listdir(jd_dir) if f.endswith(".txt")]
    raw_files = [f for f in os.listdir(temp_raw_dir) if f.endswith((".pdf", ".docx", ".txt"))]

    print("STEP 2: LLM-based customization in progress...")
    for raw_file in raw_files:
        raw_path = os.path.join(temp_raw_dir, raw_file)
        raw_name = os.path.splitext(raw_file)[0]
        raw_text = read_resume(raw_path)
        raw_sentences = split_sentences(raw_text)
        resume_bullets = "\n".join(f"- {line.strip()}" for line in raw_sentences if len(line.strip()) > 30)

        for jd_file in jd_files:
            jd_path = os.path.join(jd_dir, jd_file)
            jd_name = os.path.splitext(jd_file)[0]
            with open(jd_path, "r", encoding="utf-8") as f:
                jd_text = f.read()

            rewritten_bullets = call_lmstudio_llm(jd_text, resume_bullets)
            final_resume = ey_template.replace("[EXPERIENCE_PLACEHOLDER]", rewritten_bullets)

            output_path = os.path.join(output_dir, f"{raw_name}_{jd_name}.txt")
            with open(output_path, "w", encoding="utf-8") as out_f:
                out_f.write(final_resume)

            print(f"✅ Saved: {output_path}")
            time.sleep(1)

# --------------------- RUN ---------------------

if __name__ == "__main__":
    customize_resumes_with_lmstudio()


STEP 1: Extracting resumes...
STEP 2: LLM-based customization in progress...


In [None]:
# import os
# import zipfile
# import fitz  # PyMuPDF
# import docx
# import requests
# import json
# import re
# import time
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # ---------------- CONFIGURATION ----------------

# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"

# ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
# jd_dir = os.path.join(base_path, "JDs")
# raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
# manual_resume_zip = os.path.join(base_path, "manualcustomisedresumes.zip")

# raw_resume_dir = os.path.join(base_path, "raw_extracted")
# manual_resume_dir = os.path.join(base_path, "manual_extracted")
# output_dir = os.path.join(base_path, "customised_resumes_llm")
# log_dir = os.path.join(base_path, "match_logs")

# os.makedirs(output_dir, exist_ok=True)
# os.makedirs(log_dir, exist_ok=True)

# # ---------------- UTIL FUNCTIONS ----------------

# def extract_zip(zip_path, extract_to):
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)

# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join(page.get_text() for page in doc)

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join(p.text for p in doc.paragraphs if p.text.strip())

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf(path)
#     elif path.endswith(".docx"):
#         return read_docx(path)
#     else:
#         return ""

# def get_similarity(text1, text2):
#     vectorizer = TfidfVectorizer().fit_transform([text1, text2])
#     vectors = vectorizer.toarray()
#     return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# def call_lmstudio_llm(jd_text, bullet_points):
#     prompt = f"""You are a resume expert. Based on the job description below, select and rewrite only the most relevant bullet points from the candidate's experience to match the JD tone professionally.

# JD:
# {jd_text}

# Candidate Bullet Points:
# {bullet_points}

# Return only the final bullet points."""
    
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": "mistral-7b-instruct-v0.3-q4_k_m-gguf",
#         "prompt": prompt,
#         "temperature": 0.7,
#         "max_tokens": 1024,
#         "stop": ["</s>"]
#     }

#     try:
#         response = requests.post("http://localhost:1234/v1/completions", headers=headers, data=json.dumps(payload), timeout=180)
#         return response.json().get("choices", [{}])[0].get("text", "").strip()
#     except Exception as e:
#         return f"LLM error: {e}"

# # ---------------- MAIN FUNCTION ----------------

# def customize_resumes_with_lmstudio():
#     print("STEP 1: Extracting resumes...")
#     extract_zip(raw_resume_zip, raw_resume_dir)
#     extract_zip(manual_resume_zip, manual_resume_dir)

#     raw_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(raw_resume_dir)
#         for f in files
#     }

#     jd_texts = {
#         os.path.splitext(f)[0]: open(os.path.join(root, f), "r", encoding="utf-8").read()
#         for root, _, files in os.walk(jd_dir)
#         for f in files if f.endswith(".txt")
#     }

#     manual_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(manual_resume_dir)
#         for f in files
#     }

#     ey_template = open(ey_format_path, "r", encoding="utf-8").read()

#     print("STEP 2: LLM-based customization in progress...")

#     for raw_name, raw_text in raw_resumes.items():
#         # Find best JD for this resume
#         best_jd, best_score = None, 0
#         for jd_name, jd_text in jd_texts.items():
#             score = get_similarity(raw_text, jd_text)
#             if score > best_score:
#                 best_score = score
#                 best_jd = jd_name

#         if not best_jd:
#             continue

#         jd_text = jd_texts[best_jd]
#         resume_bullets = "\n".join([s.strip() for s in raw_text.split(". ") if len(s.strip()) > 30])

#         rewritten_bullets = call_lmstudio_llm(jd_text, resume_bullets)
#         final_resume = ey_template.replace("[EXPERIENCE_PLACEHOLDER]", rewritten_bullets)

#         output_path = os.path.join(output_dir, f"{raw_name}_{best_jd}.txt")
#         with open(output_path, "w", encoding="utf-8") as out_f:
#             out_f.write(final_resume)

#         print(f"✅ Saved: {output_path}")

#         # Optional: Compare with manual resume
#         matched_manual_key = next((k for k in manual_resumes if raw_name.lower() in k.lower()), None)
#         if matched_manual_key:
#             manual_text = manual_resumes[matched_manual_key]
#             sim = get_similarity(final_resume, manual_text)
#             log_line = f"{raw_name} <-> {best_jd}: {sim:.2f}\n"
#             with open(os.path.join(log_dir, "similarity_scores.txt"), "a", encoding="utf-8") as log_f:
#                 log_f.write(log_line)

#         time.sleep(1)

# # ---------------- RUN ----------------

# if __name__ == "__main__":
#     customize_resumes_with_lmstudio()



STEP 1: Extracting resumes...
STEP 2: LLM-based customization in progress...
✅ Saved: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\customised_resumes_llm\Anuska Das_JD_52.txt
✅ Saved: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\customised_resumes_llm\CV_Aarsh_JD_55.txt
✅ Saved: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\customised_resumes_llm\CV_Arnab Roy_Model_Validation_JD_77.txt
✅ Saved: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\customised_resumes_llm\G N Sindhur_resume_2025_JD_23.txt


In [None]:
import os, zipfile, fitz, docx, re, requests, json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ----------- CONFIG -----------
base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
jd_dir = os.path.join(base_path, "JDs")
raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
raw_resume_dir = os.path.join(base_path, "raw_extracted")
ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
output_dir = os.path.join(base_path, "LLM_Customised_Resumes")

os.makedirs(raw_resume_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

# ----------- HELPERS -----------
def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def read_pdf_text(pdf_path):
    doc = fitz.open(pdf_path)
    return "\n".join([page.get_text() for page in doc])

def read_docx_text(docx_path):
    doc = docx.Document(docx_path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

def read_resume(path):
    return read_pdf_text(path) if path.endswith(".pdf") else read_docx_text(path)

def get_similarity(text1, text2):
    vec = TfidfVectorizer().fit_transform([text1, text2])
    return cosine_similarity(vec[0:1], vec[1:2])[0][0]

def call_llm_mistral(jd_text, resume_text):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}

    prompt = f"""
You are an AI assistant helping customise resumes to job descriptions. Based on the job description below, rewrite the resume experience bullets to align with the required skills. Rephrase and match the tone accordingly.

Job Description:
{jd_text}

Resume Experience:
{resume_text}

Customised Resume Experience Bullets:
"""

    payload = {
        "model": "mistral-7b-instruct-v0.3-q4_k_m",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
    }

    response = requests.post(url, headers=headers, data=json.dumps(payload))
    return response.json()["choices"][0]["message"]["content"]

# ----------- MAIN PIPELINE -----------
def customize_all_resumes():
    print("STEP 1: Extracting resumes...")
    extract_zip(raw_resume_zip, raw_resume_dir)
    ey_template = open(ey_format_path, "r", encoding="utf-8").read()

    for root, _, files in os.walk(raw_resume_dir):
        for f in files:
            resume_name = os.path.splitext(f)[0]
            resume_path = os.path.join(root, f)
            resume_text = read_resume(resume_path)

            # STEP 2: Match best JD
            best_jd = None
            best_score = -1
            for jd_file in os.listdir(jd_dir):
                jd_path = os.path.join(jd_dir, jd_file)
                with open(jd_path, "r", encoding="utf-8") as jf:
                    jd_text = jf.read()
                score = get_similarity(resume_text, jd_text)
                if score > best_score:
                    best_score = score
                    best_jd = jd_text
                    best_jd_name = os.path.splitext(jd_file)[0]

            print(f"\n✅ Customizing {resume_name} with best JD match: {best_jd_name} (Score: {best_score:.2f})")

            # STEP 3: LLM Customisation
            rewritten_bullets = call_llm_mistral(best_jd, resume_text)
            final_resume = ey_template.replace("[EXPERIENCE_PLACEHOLDER]", rewritten_bullets)

            # STEP 4: Save
            out_path = os.path.join(output_dir, f"{resume_name}_{best_jd_name}.txt")
            with open(out_path, "w", encoding="utf-8") as outf:
                outf.write(final_resume)
            print(f"💾 Saved: {out_path}")

# ----------- RUN -----------
if __name__ == "__main__":
    customize_all_resumes()


STEP 1: Extracting resumes...

✅ Customizing Anuska Das with best JD match: JD_52 (Score: 0.49)
💾 Saved: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\LLM_Customised_Resumes\Anuska Das_JD_52.txt

✅ Customizing CV_Aarsh with best JD match: JD_55 (Score: 0.61)
💾 Saved: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\LLM_Customised_Resumes\CV_Aarsh_JD_55.txt

✅ Customizing CV_Arnab Roy_Model_Validation with best JD match: JD_77 (Score: 0.40)


In [None]:
# import os
# import zipfile
# import fitz  # PyMuPDF
# import docx
# import re
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # --------------------- CONFIGURATION ---------------------

# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
# jd_output_dir = os.path.join(base_path, "JDs")
# manual_resume_zip = os.path.join(base_path, "manualcustomisedresumes.zip")
# manual_resume_dir = os.path.join(base_path, "manualcustomisedresumes")
# raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
# raw_resume_dir = os.path.join(base_path, "rawresumes")
# ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
# customised_output_dir = os.path.join(base_path, "LLM_Customised_Resumes_Anuska")
# logs_dir = os.path.join(base_path, "logs_anuska")

# # Target candidate to process
# target_name = "Anuska Das"

# for folder in [manual_resume_dir, raw_resume_dir, customised_output_dir, logs_dir]:
#     os.makedirs(folder, exist_ok=True)

# # --------------------- UTILS ---------------------

# def extract_zip(zip_path, extract_to):
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)

# def read_pdf_text(pdf_path):
#     doc = fitz.open(pdf_path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx_text(docx_path):
#     doc = docx.Document(docx_path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf_text(path)
#     elif path.endswith(".docx"):
#         return read_docx_text(path)
#     else:
#         return ""

# def get_similarity(text1, text2):
#     vectorizer = TfidfVectorizer().fit_transform([text1, text2])
#     vectors = vectorizer.toarray()
#     return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# def customize_resume(raw_text, jd_text, ey_template):
#     raw_sentences = raw_text.split(". ")
#     matched = []
#     for sentence in raw_sentences:
#         score = get_similarity(sentence, jd_text)
#         if score > 0.15:
#             matched.append(sentence.strip())
#     final_experience = "\n".join(matched).strip()
#     return ey_template.replace("[EXPERIENCE_PLACEHOLDER]", final_experience)

# # --------------------- MAIN PIPELINE ---------------------

# def customise_anuska_resumes():
#     print("STEP 1: Extracting Resumes...")
#     extract_zip(raw_resume_zip, raw_resume_dir)
#     extract_zip(manual_resume_zip, manual_resume_dir)

#     ey_template = open(ey_format_path, "r", encoding="utf-8").read()

#     # Filter raw resumes for Anuska
#     raw_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(raw_resume_dir)
#         for f in files
#         if target_name.lower() in f.lower()
#     }

#     manual_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(manual_resume_dir)
#         for f in files
#         if target_name.lower() in f.lower()
#     }

#     if not raw_resumes:
#         print("No raw resume found for Anuska Das.")
#         return

#     print("STEP 2: Matching JDs to Anuska's raw resume...")
#     for jd_file in sorted(os.listdir(jd_output_dir)):
#         jd_index = os.path.splitext(jd_file)[0].split("_")[1]
#         jd_path = os.path.join(jd_output_dir, jd_file)
#         jd_text = open(jd_path, "r", encoding="utf-8").read()

#         for raw_name, raw_text in raw_resumes.items():
#             customised = customize_resume(raw_text, jd_text, ey_template)
#             output_path = os.path.join(customised_output_dir, f"{raw_name}_JD_{jd_index}.txt")
#             with open(output_path, "w", encoding="utf-8") as f:
#                 f.write(customised)

#             # Compare with Anuska's manual resume
#             match_key = next((k for k in manual_resumes if raw_name.lower() in k.lower()), None)
#             if match_key:
#                 manual_text = manual_resumes[match_key]
#                 score = get_similarity(customised, manual_text)
#                 log = f"{raw_name}_JD_{jd_index}: Similarity Score = {score:.2f}"
#                 print(log)
#                 with open(os.path.join(logs_dir, f"log_JD_{jd_index}.txt"), "a", encoding="utf-8") as logf:
#                     logf.write(log + "\n")

# # --------------------- RUN ---------------------

# if __name__ == "__main__":
#     customise_anuska_resumes()


STEP 1: Extracting Resumes...
STEP 2: Matching JDs to Anuska's raw resume...
Anuska Das_JD_1: Similarity Score = 0.00
Anuska Das_JD_10: Similarity Score = 0.00
Anuska Das_JD_100: Similarity Score = 0.00
Anuska Das_JD_101: Similarity Score = 0.00
Anuska Das_JD_102: Similarity Score = 0.00
Anuska Das_JD_103: Similarity Score = 0.00
Anuska Das_JD_104: Similarity Score = 0.00
Anuska Das_JD_105: Similarity Score = 0.00
Anuska Das_JD_106: Similarity Score = 0.00
Anuska Das_JD_107: Similarity Score = 0.00
Anuska Das_JD_108: Similarity Score = 0.00
Anuska Das_JD_109: Similarity Score = 0.00
Anuska Das_JD_11: Similarity Score = 0.00
Anuska Das_JD_110: Similarity Score = 0.00
Anuska Das_JD_111: Similarity Score = 0.00
Anuska Das_JD_112: Similarity Score = 0.00
Anuska Das_JD_113: Similarity Score = 0.00
Anuska Das_JD_114: Similarity Score = 0.00
Anuska Das_JD_115: Similarity Score = 0.00
Anuska Das_JD_116: Similarity Score = 0.00
Anuska Das_JD_117: Similarity Score = 0.00
Anuska Das_JD_118: Simil

In [None]:
# import os
# import zipfile
# import re
# import fitz  # PyMuPDF
# import docx
# import requests
# import json
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # ---------------- CONFIG ----------------
# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
# jd_dir = os.path.join(base_path, "JDs")
# manual_resume_zip = os.path.join(base_path, "manualcustomisedresumes.zip")
# manual_resume_dir = os.path.join(base_path, "manualcustomisedresumes")
# raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
# raw_resume_dir = os.path.join(base_path, "rawresumes")
# ey_format_path = os.path.join(base_path, "EY_sample_format1.txt")
# customised_output_dir = os.path.join(base_path, "LLM_Customised_Resumes")
# os.makedirs(customised_output_dir, exist_ok=True)

# lmstudio_url = "http://localhost:1234/v1/chat/completions"
# lmstudio_model = "mistral-7b-instruct-v0.3-q4_k_m-gguf"

# # ---------------- UTILS ----------------

# def extract_zip(zip_path, extract_to):
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)

# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf(path)
#     elif path.endswith(".docx"):
#         return read_docx(path)
#     return ""

# def get_similarity(text1, text2):
#     vectorizer = TfidfVectorizer().fit_transform([text1.lower(), text2.lower()])
#     vectors = vectorizer.toarray()
#     return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# def query_lmstudio(prompt):
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": lmstudio_model,
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.5,
#         "stream": False
#     }
#     try:
#         response = requests.post(lmstudio_url, headers=headers, data=json.dumps(payload), timeout=120)
#         return response.json()['choices'][0]['message']['content'].strip()
#     except Exception as e:
#         print(f"LM Studio failed: {e}")
#         return ""

# # ---------------- CORE ----------------

# def extract_skills(text):
#     prompt = f"""Extract all technical and domain-related skills from this resume:
# {text}
# Only return the list of skills as comma-separated values."""
#     return query_lmstudio(prompt)

# def customize_resume(candidate_text, jd_text, ey_template):
#     prompt = f"""Given the following resume content:
# {candidate_text}

# And the following job description:
# {jd_text}

# Extract the relevant experiences from the resume that match the JD. Then, rephrase them professionally to fit into EY format.

# Respond only with the formatted EXPERIENCE section content."""
#     experience = query_lmstudio(prompt)
#     return ey_template.replace("[EXPERIENCE_PLACEHOLDER]", experience)

# # ---------------- PIPELINE ----------------

# def run_pipeline():
#     print("STEP 1: Extracting files...")
#     extract_zip(raw_resume_zip, raw_resume_dir)
#     extract_zip(manual_resume_zip, manual_resume_dir)

#     ey_template = open(ey_format_path, "r", encoding="utf-8").read()

#     jd_files = {os.path.splitext(f)[0]: open(os.path.join(jd_dir, f), "r", encoding="utf-8").read()
#                 for f in os.listdir(jd_dir) if f.endswith(".txt")}

#     print("STEP 2: Processing candidates...")
#     for root, _, files in os.walk(raw_resume_dir):
#         for f in files:
#             candidate_name = os.path.splitext(f)[0]
#             print(f"\nProcessing: {candidate_name}")

#             candidate_path = os.path.join(root, f)
#             raw_text = read_resume(candidate_path)
#             skills_csv = extract_skills(raw_text)
#             print(f"Extracted skills: {skills_csv}")

#             matched_jds = []
#             for jd_name, jd_text in jd_files.items():
#                 match_score = get_similarity(skills_csv, jd_text)
#                 if match_score >= 0.6:
#                     matched_jds.append((jd_name, jd_text))
#             print(f"Matched JDs: {[j[0] for j in matched_jds]}")

#             for jd_name, jd_text in matched_jds:
#                 customised_text = customize_resume(raw_text, jd_text, ey_template)
#                 out_path = os.path.join(customised_output_dir, f"{candidate_name}_JD_{jd_name}.txt")
#                 with open(out_path, "w", encoding="utf-8") as out_f:
#                     out_f.write(customised_text)

# # ---------------- RUN ----------------

# if __name__ == "__main__":
#     run_pipeline()


STEP 1: Extracting files...
STEP 2: Processing candidates...

Processing: Anuska Das
Extracted skills: Statistical Analysis, Data analysis (logistic regression, linear regression, random forest, decision trees, credit risk analysis), Regulatory Reporting, Programming languages (SAS, Python, SQL, Excel, VBA, PowerPoint), Machine learning techniques, Primary and secondary market research
Matched JDs: []

Processing: CV_Aarsh


In [None]:
# # Load model directly
# from transformers import AutoModel
# model = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")

config.json:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


OSError: TheBloke/zephyr-7B-beta-GGUF does not appear to have a file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt or flax_model.msgpack.

In [None]:
# import os
# import zipfile
# import fitz  # PyMuPDF
# import docx
# import re
# import json
# import time
# import requests
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # -------------------- CONFIG --------------------
# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"

# raw_resume_zip = os.path.join(base_path, "Rawresumes.zip")
# jd_zip = os.path.join(base_path, "JDs.zip")
# ey_template_path = os.path.join(base_path, "EY_sample_resume_template.txt")

# raw_resume_dir = os.path.join(base_path, "rawresumes")
# jd_dir = os.path.join(base_path, "JDs")
# customised_dir = os.path.join(base_path, "final_customised_resumes")

# for folder in [raw_resume_dir, jd_dir, customised_dir]:
#     os.makedirs(folder, exist_ok=True)

# # -------------------- UTILITIES --------------------
# def extract_zip(zip_path, extract_to):
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)

# def read_pdf_text(pdf_path):
#     doc = fitz.open(pdf_path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx_text(docx_path):
#     doc = docx.Document(docx_path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf_text(path)
#     elif path.endswith(".docx"):
#         return read_docx_text(path)
#     elif path.endswith(".txt"):
#         return open(path, "r", encoding="utf-8").read()
#     else:
#         return ""

# def get_tfidf_similarity(text1, text2):
#     vectorizer = TfidfVectorizer().fit_transform([text1, text2])
#     vectors = vectorizer.toarray()
#     return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
#     url = "http://localhost:1234/v1/chat/completions"
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": model,
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.7,
#         "stream": False
#     }
#     for attempt in range(retries):
#         try:
#             response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
#             return response.json()['choices'][0]['message']['content']
#         except Exception as e:
#             if attempt == retries - 1:
#                 print("❌ LM Studio failed:", e)
#                 return ""
#             time.sleep(2)

# # -------------------- MAIN PIPELINE --------------------
# def process():
#     print("STEP 1: Extracting files...")
#     extract_zip(raw_resume_zip, raw_resume_dir)
#     extract_zip(jd_zip, jd_dir)

#     print("STEP 2: Loading resumes and JDs...")
#     ey_template = open(ey_template_path, "r", encoding="utf-8").read()

#     raw_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(raw_resume_dir)
#         for f in files
#     }

#     jd_texts = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(jd_dir)
#         for f in files
#     }

#     print("STEP 3: Matching resumes with best JD + Customizing...")
#     for candidate, resume_text in raw_resumes.items():
#         best_jd = ""
#         best_score = 0
#         for jd_name, jd_text in jd_texts.items():
#             score = get_tfidf_similarity(resume_text.lower(), jd_text.lower())
#             if score > best_score:
#                 best_score = score
#                 best_jd = jd_name

#         if best_jd == "":
#             print(f"No match found for {candidate}")
#             continue

#         print(f"→ Customizing {candidate} with JD: {best_jd} (Score: {best_score:.2f})")

#         jd_content = jd_texts[best_jd]

#         # Build the main prompt for all sections
#         prompt = f"""
# You are a resume writing assistant. Using the candidate's raw resume and the JD below, extract and rewrite the following sections:

# 1. SUMMARY: Write a 2–3 line summary tailored to the JD.
# 2. QUALIFICATIONS: Extract degrees and colleges, nicely formatted.
# 3. EXPERIENCE: Rewrite to reflect JD language, tone, and skills.
# 4. TECHNICAL_SKILLS: List relevant tools and technical expertise.

# --- RAW RESUME ---
# {resume_text}

# --- JOB DESCRIPTION ---
# {jd_content}

# Now provide the following output with exact labels:

# SUMMARY:
# ...

# QUALIFICATIONS:
# ...

# EXPERIENCE:
# ...

# TECHNICAL_SKILLS:
# ...
# """

#         result = call_lmstudio(prompt)
#         if not result:
#             continue

#         # Extract each block from the LLM output
#         summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
#         qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
#         experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
#         tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

#         final_resume = ey_template
#         final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate)
#         final_resume = final_resume.replace("[ROLE]", best_jd.replace("_", " "))
#         final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
#         final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
#         final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
#         final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

#         out_path = os.path.join(customised_dir, f"{candidate}_customised_for_{best_jd}.txt")
#         with open(out_path, "w", encoding="utf-8") as f:
#             f.write(final_resume)

#     print("\n✅ Customisation completed. Check:", customised_dir)

# # -------------------- EXECUTE --------------------
# if __name__ == "__main__":
#     process()


STEP 1: Extracting files...
STEP 2: Loading resumes and JDs...
STEP 3: Matching resumes with best JD + Customizing...
→ Customizing Anuska Das with JD: JD_52 (Score: 0.49)
→ Customizing CV_Aarsh with JD: JD_55 (Score: 0.61)
→ Customizing CV_Arnab Roy_Model_Validation with JD: JD_77 (Score: 0.40)
→ Customizing G N Sindhur_resume_2025 with JD: JD_23 (Score: 0.61)
❌ LM Studio failed: HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=180)
→ Customizing Isha Porwal Resume with JD: JD_58 (Score: 0.54)
→ Customizing KartikMohan with JD: JD_55 (Score: 0.61)
→ Customizing Kshitij_Sahdev_CV (002) with JD: JD_110 (Score: 0.57)
→ Customizing Kundan_Kumar with JD: JD_115 (Score: 0.66)
→ Customizing Munna Choudhary with JD: JD_100 (Score: 0.68)
→ Customizing Narendra_Sahu_CreditRisk with JD: JD_75 (Score: 0.55)
→ Customizing Naukri_AbhimanyuSingh[3y_7m] with JD: JD_111 (Score: 0.55)
→ Customizing Naukri_AnandKumar[10y_0m] with JD: JD_55 (Score: 0.47)
→ Customizing Naukri

In [None]:
# import os
# import fitz  # PyMuPDF
# import docx
# import re
# import json
# import time
# import requests
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # -------------------- CONFIG --------------------
# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"

# raw_resume_dir = os.path.join(base_path, "Rawresumes")
# jd_dir = os.path.join(base_path, "JDs")
# ey_template_path = os.path.join(base_path, "EY_sample_resume_template.txt")
# customised_dir = os.path.join(base_path, "final_customised_resumes")

# os.makedirs(customised_dir, exist_ok=True)

# # -------------------- UTILITIES --------------------
# def read_pdf_text(pdf_path):
#     doc = fitz.open(pdf_path)
#     return "\n".join([page.get_text() for page in doc])

# def read_docx_text(docx_path):
#     doc = docx.Document(docx_path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf_text(path)
#     elif path.endswith(".docx"):
#         return read_docx_text(path)
#     elif path.endswith(".txt"):
#         return open(path, "r", encoding="utf-8").read()
#     else:
#         return ""

# def get_tfidf_similarity(text1, text2):
#     vectorizer = TfidfVectorizer().fit_transform([text1, text2])
#     vectors = vectorizer.toarray()
#     return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
#     url = "http://localhost:1234/v1/chat/completions"
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": model,
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.7,
#         "stream": False
#     }
#     for attempt in range(retries):
#         try:
#             response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
#             return response.json()['choices'][0]['message']['content']
#         except Exception as e:
#             if attempt == retries - 1:
#                 print("❌ LM Studio failed:", e)
#                 return ""
#             time.sleep(2)

# # -------------------- MAIN PIPELINE --------------------
# def process():
#     print("STEP 1: Loading resumes and JDs...")
#     ey_template = open(ey_template_path, "r", encoding="utf-8").read()

#     raw_resumes = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(raw_resume_dir)
#         for f in files
#     }

#     jd_texts = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(root, f))
#         for root, _, files in os.walk(jd_dir)
#         for f in files
#     }

#     print("STEP 2: Matching resumes with best JD + Customizing...")
#     for candidate, resume_text in raw_resumes.items():
#         best_jd = ""
#         best_score = 0
#         for jd_name, jd_text in jd_texts.items():
#             score = get_tfidf_similarity(resume_text.lower(), jd_text.lower())
#             if score > best_score:
#                 best_score = score
#                 best_jd = jd_name

#         if best_jd == "":
#             print(f"❌ No JD match found for {candidate}")
#             continue

#         print(f"→ Customizing {candidate} with JD: {best_jd} (Score: {best_score:.2f})")
#         jd_content = jd_texts[best_jd]

#         prompt = f"""
# You are a resume writing assistant. Using the candidate's raw resume and the JD below, extract and rewrite the following sections:

# 1. SUMMARY: Write a 2–3 line summary tailored to the JD.
# 2. QUALIFICATIONS: Extract degrees and colleges, nicely formatted.
# 3. EXPERIENCE: Rewrite to reflect JD language, tone, and skills.
# 4. TECHNICAL_SKILLS: List relevant tools and technical expertise.

# --- RAW RESUME ---
# {resume_text}

# --- JOB DESCRIPTION ---
# {jd_content}

# Now provide the following output with exact labels:

# SUMMARY:
# ...

# QUALIFICATIONS:
# ...

# EXPERIENCE:
# ...

# TECHNICAL_SKILLS:
# ...
# """
#         result = call_lmstudio(prompt)
#         if not result:
#             continue

#         # Extract sections
#         summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
#         qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
#         experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
#         tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

#         final_resume = ey_template
#         final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate)
#         final_resume = final_resume.replace("[ROLE]", best_jd.replace("_", " "))
#         final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
#         final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
#         final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
#         final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

#         out_path = os.path.join(customised_dir, f"{candidate}_customised_for_{best_jd}.txt")
#         with open(out_path, "w", encoding="utf-8") as f:
#             f.write(final_resume)

#     print("\n✅ Customisation completed. Check:", customised_dir)

# # -------------------- EXECUTE --------------------
# if __name__ == "__main__":
#     process()


STEP 1: Loading resumes and JDs...
STEP 2: Matching resumes with best JD + Customizing...
→ Customizing Anuska Das with JD: JD_52 (Score: 0.49)
→ Customizing CV_Aarsh with JD: JD_55 (Score: 0.61)
→ Customizing CV_Arnab Roy_Model_Validation with JD: JD_77 (Score: 0.40)
→ Customizing G N Sindhur_resume_2025 with JD: JD_23 (Score: 0.61)
→ Customizing Isha Porwal Resume with JD: JD_58 (Score: 0.54)


In [None]:
# import os
# import re
# import json
# import time
# import fitz  # PyMuPDF
# import docx
# import requests
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # -------------------- CONFIG --------------------
# base_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples"
# raw_resume_dir = os.path.join(base_path, "Rawresumes")
# jd_dir = os.path.join(base_path, "JDs")
# template_path = os.path.join(base_path, "EY_sample_resume_template.txt")
# customised_dir = os.path.join(base_path, "final_customised_resumes")
# os.makedirs(customised_dir, exist_ok=True)

# # -------------------- UTILITIES --------------------
# def read_pdf(path):
#     doc = fitz.open(path)
#     return "\n".join(page.get_text() for page in doc)

# def read_docx(path):
#     doc = docx.Document(path)
#     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# def read_resume(path):
#     if path.endswith(".pdf"):
#         return read_pdf(path)
#     elif path.endswith(".docx"):
#         return read_docx(path)
#     elif path.endswith(".txt"):
#         return open(path, "r", encoding="utf-8").read()
#     else:
#         return ""

# def get_tfidf_similarity(text1, text2):
#     vec = TfidfVectorizer().fit_transform([text1, text2])
#     return cosine_similarity(vec[0:1], vec[1:2])[0][0]

# def call_zephyr(prompt, model="zephyr-7b-beta.Q4_K_M.gguf"):
#     url = "http://localhost:1234/v1/chat/completions"
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": model,
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.7,
#         "stream": False
#     }
#     try:
#         res = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
#         return res.json()["choices"][0]["message"]["content"]
#     except Exception as e:
#         print("❌ Error in Zephyr call:", e)
#         return ""

# # -------------------- MAIN FUNCTION --------------------
# def customise_resume(frontend_prompt: str, uploaded_resume_path: str):
#     print("🚀 Starting resume customization...")

#     # Step 1: Synthesize JD using Zephyr from prompt
#     synth_jd_prompt = f"""Write a clear and structured job description for the following role prompt:

# PROMPT: {frontend_prompt}

# Make sure to include responsibilities, required skills, and qualifications."""
#     generated_jd = call_zephyr(synth_jd_prompt)
#     print("✅ JD created using Zephyr")

#     # Step 2: Read uploaded resume
#     candidate_name = os.path.splitext(os.path.basename(uploaded_resume_path))[0]
#     resume_text = read_resume(uploaded_resume_path)
#     if not resume_text.strip():
#         print("❌ Resume text is empty.")
#         return

#     # Step 3: Load JD and resume corpus for similarity matching
#     jd_texts = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(jd_dir, f))
#         for f in os.listdir(jd_dir)
#         if f.endswith((".pdf", ".docx", ".txt"))
#     }
#     resume_texts = {
#         os.path.splitext(f)[0]: read_resume(os.path.join(raw_resume_dir, f))
#         for f in os.listdir(raw_resume_dir)
#         if f.endswith((".pdf", ".docx", ".txt"))
#     }

#     # Step 4: Find top matching JD from JD folder (optional, enrich context)
#     best_jd = ""
#     best_score = 0
#     for name, text in jd_texts.items():
#         score = get_tfidf_similarity(generated_jd.lower(), text.lower())
#         if score > best_score:
#             best_score = score
#             best_jd = text
#     combined_jd = generated_jd + "\n\n" + (best_jd if best_jd else "")

#     # Step 5: Ask Zephyr to customize resume
#     customise_prompt = f"""
# You are a professional resume writing assistant.

# Given the raw resume and JD below, rewrite the following sections:

# 1. SUMMARY
# 2. QUALIFICATIONS
# 3. EXPERIENCE
# 4. TECHNICAL_SKILLS

# --- RAW RESUME ---
# {resume_text}

# --- JOB DESCRIPTION ---
# {combined_jd}

# Now format output like:

# SUMMARY:
# ...

# QUALIFICATIONS:
# ...

# EXPERIENCE:
# ...

# TECHNICAL_SKILLS:
# ...
# """
#     result = call_zephyr(customise_prompt)
#     print("✅ Resume customized")

#     # Step 6: Extract sections using regex
#     summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
#     qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
#     experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
#     tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

#     # Step 7: Fill into EY template
#     template = open(template_path, "r", encoding="utf-8").read()
#     filled_resume = template
#     filled_resume = filled_resume.replace("[CANDIDATE_NAME]", candidate_name)
#     filled_resume = filled_resume.replace("[ROLE]", frontend_prompt)
#     filled_resume = filled_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
#     filled_resume = filled_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
#     filled_resume = filled_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
#     filled_resume = filled_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

#     # Step 8: Save the file
#     output_path = os.path.join(customised_dir, f"{candidate_name}_customised_resume.txt")
#     with open(output_path, "w", encoding="utf-8") as f:
#         f.write(filled_resume)

#     print(f"✅ Customised resume saved to: {output_path}")


In [None]:
# # Example frontend inputs
# frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
# uploaded_resume_path = r"C:\Users\HN338QQ\Downloads\Arnab Bhattacharya.pdf"

# customise_resume(frontend_prompt, uploaded_resume_path)


🚀 Starting resume customization...
✅ JD created using Zephyr
❌ Error in Zephyr call: HTTPConnectionPool(host='localhost', port=1234): Read timed out. (read timeout=180)
✅ Resume customized
✅ Customised resume saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Arnab Bhattacharya_customised_resume.txt


In [None]:
# import os
# import fitz  # PyMuPDF
# import requests

# # === Constants ===
# LM_API_URL = "http://127.0.0.1:1234/v1/chat/completions"
# MODEL_NAME = "thebloke_zephyr-7b-beta:2"

# # === Utility Functions ===

# def read_pdf_text(pdf_path):
#     text = ""
#     with fitz.open(pdf_path) as doc:
#         for page in doc:
#             text += page.get_text()
#     return text.strip()

# def call_zephyr(prompt):
#     payload = {
#         "model": MODEL_NAME,
#         "messages": [
#             {"role": "user", "content": prompt}
#         ],
#         "temperature": 0.7
#     }
#     try:
#         response = requests.post(LM_API_URL, json=payload, timeout=300)
#         response.raise_for_status()
#         return response.json()["choices"][0]["message"]["content"].strip()
#     except Exception as e:
#         print(f"❌ Error in Zephyr LLM call: {e}")
#         return None

# def merge_resume_with_jd(resume_text, jd_text):
#     # A simple formatted template — can be modified
#     customised_resume = f"""
# ========================
# CUSTOMISED RESUME
# ========================

# SUMMARY:
# {jd_text.split('SUMMARY:')[1].split('QUALIFICATIONS:')[0].strip() if 'SUMMARY:' in jd_text else ''}

# QUALIFICATIONS:
# {jd_text.split('QUALIFICATIONS:')[1].split('EXPERIENCE:')[0].strip() if 'QUALIFICATIONS:' in jd_text else ''}

# EXPERIENCE:
# {jd_text.split('EXPERIENCE:')[1].split('TECHNICAL_SKILLS:')[0].strip() if 'EXPERIENCE:' in jd_text else ''}

# TECHNICAL SKILLS:
# {jd_text.split('TECHNICAL_SKILLS:')[1].strip() if 'TECHNICAL_SKILLS:' in jd_text else ''}

# ========================
# ORIGINAL RESUME TEXT (Reference)
# ========================
# {resume_text}
# """
#     return customised_resume

# def save_customised_resume(resume_text, output_path):
#     with open(output_path, "w", encoding="utf-8") as f:
#         f.write(resume_text)
#     print(f"✅ Customised resume saved to: {output_path}")

# # === Main Customisation Function ===

# def customise_resume(frontend_prompt, uploaded_resume_path):
#     # Step 1: Read resume
#     resume_text = read_pdf_text(uploaded_resume_path)

#     # Step 2: Generate JD from prompt via Zephyr
#     generated_jd = call_zephyr(frontend_prompt)
#     if not generated_jd:
#         print("❌ JD generation failed. Exiting.")
#         return

#     # Step 3: Customise resume
#     final_resume = merge_resume_with_jd(resume_text, generated_jd)

#     # Step 4: Save customised resume
#     base_name = os.path.splitext(os.path.basename(uploaded_resume_path))[0]
#     output_path = f"{base_name}_customised_resume.txt"
#     save_customised_resume(final_resume, output_path)

#     return output_path


In [None]:
# frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
# uploaded_resume_path = r"C:\Users\HN338QQ\Downloads\Arnab Bhattacharya.pdf"

# customised_path = customise_resume(frontend_prompt, uploaded_resume_path)
# print("📄 Customised Resume Path:", customised_path)


✅ Customised resume saved to: Arnab Bhattacharya_customised_resume.txt
📄 Customised Resume Path: Arnab Bhattacharya_customised_resume.txt


In [None]:
# import os
# import fitz  # PyMuPDF
# import re
# import json
# import time
# import requests
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # -------------------- CONFIG --------------------
# EY_TEMPLATE_PATH = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
# RAW_RESUMES_DIR = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes"
# JDS_DIR = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\JDs"
# CUSTOMISED_DIR = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
# os.makedirs(CUSTOMISED_DIR, exist_ok=True)

# # -------------------- UTILITIES --------------------
# def read_pdf_text(pdf_path):
#     doc = fitz.open(pdf_path)
#     return "\n".join([page.get_text() for page in doc])

# def call_lmstudio(prompt, model="thebloke_zephyr-7b-beta:2", retries=3):
#     url = "http://localhost:1234/v1/chat/completions"
#     headers = {"Content-Type": "application/json"}
#     payload = {
#         "model": model,
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.7,
#         "stream": False
#     }
#     for attempt in range(retries):
#         try:
#             response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
#             return response.json()['choices'][0]['message']['content']
#         except Exception as e:
#             if attempt == retries - 1:
#                 print("❌ LM Studio failed:", e)
#                 return ""
#             time.sleep(2)

# # -------------------- MAIN FUNCTION --------------------
# def customise_resume(frontend_prompt, uploaded_resume_path):
#     print("STEP 1: Reading uploaded resume and sample template...")
#     resume_text = read_pdf_text(uploaded_resume_path)
#     with open(EY_TEMPLATE_PATH, "r", encoding="utf-8") as f:
#         ey_template = f.read()

#     print("STEP 2: Generating JD using prompt...")
#     jd_prompt = f"""You are a recruiter assistant. Write a detailed job description based on this prompt:

# \"\"\"{frontend_prompt}\"\"\"

# Provide a structured JD suitable for candidates."""
#     jd_generated = call_lmstudio(jd_prompt)
#     if not jd_generated.strip():
#         print("❌ JD generation failed.")
#         return

#     print("STEP 3: Customising resume with resume + JD...")
#     prompt = f"""
# You are a resume assistant. Given the candidate's resume and the JD below, rewrite and extract the following sections tailored to the JD:

# 1. SUMMARY: Write a 2–3 line summary aligned to the JD.
# 2. QUALIFICATIONS: Extract degrees, universities.
# 3. EXPERIENCE: Rewrite based on JD tone and keywords.
# 4. TECHNICAL_SKILLS: Highlight relevant tools/skills.

# --- RESUME ---
# {resume_text}

# --- JD ---
# {jd_generated}

# Respond ONLY in the following format:

# SUMMARY:
# ...

# QUALIFICATIONS:
# ...

# EXPERIENCE:
# ...

# TECHNICAL_SKILLS:
# ...
# """
#     result = call_lmstudio(prompt)
#     if not result.strip():
#         print("❌ Resume customisation failed.")
#         return

#     summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
#     qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
#     experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
#     tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

#     # Fill into template
#     candidate_name = os.path.splitext(os.path.basename(uploaded_resume_path))[0]
#     final_resume = ey_template
#     final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
#     final_resume = final_resume.replace("[ROLE]", frontend_prompt)
#     final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
#     final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
#     final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
#     final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

#     # Save
#     output_filename = f"{candidate_name}_customised_resume.txt"
#     output_path = os.path.join(CUSTOMISED_DIR, output_filename)
#     with open(output_path, "w", encoding="utf-8") as f:
#         f.write(final_resume)

#     print(f"\n✅ Resume customised and saved to:\n{output_path}")


In [None]:
# frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
# uploaded_resume_path = r"C:\Users\HN338QQ\Downloads\Arnab Bhattacharya.pdf"

# customise_resume(frontend_prompt, uploaded_resume_path)


STEP 1: Reading uploaded resume and sample template...
STEP 2: Generating JD using prompt...
STEP 3: Customising resume with resume + JD...

✅ Resume customised and saved to:
C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Arnab Bhattacharya_customised_resume.txt


In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resume.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path



In [2]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\Downloads\Arnab Bhattacharya.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)


Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\Downloads\Arnab Bhattacharya.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Arnab Bhattacharya_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Arnab Bhattacharya_customised_resume.txt'

In [3]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\Downloads\Archit_Resume.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)


Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\Downloads\Archit_Resume.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Archit_Resume_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Archit_Resume_customised_resume.txt'

In [2]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\1. CV - Karishma Rathi.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)


Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\1. CV - Karishma Rathi.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\1. CV - Karishma Rathi_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\1. CV - Karishma Rathi_customised_resume.txt'

In [13]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\20250123_Raja Saha (Manager at Citi).pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\20250123_Raja Saha (Manager at Citi).pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\20250123_Raja Saha (Manager at Citi)_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\20250123_Raja Saha (Manager at Citi)_customised_resume.txt'

In [4]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Aarti Chaba.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Aarti Chaba.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Aarti Chaba_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Aarti Chaba_customised_resume.txt'

In [1]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumeaarti1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path

In [2]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Aarti Chaba.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Aarti Chaba.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Aarti Chaba_customised_resumeaarti1.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Aarti Chaba_customised_resumeaarti1.txt'

In [5]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Aarushi_Vohra.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Aarushi_Vohra.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Aarushi_Vohra_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Aarushi_Vohra_customised_resume.txt'

In [7]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\AbhiGoswami(Insightsquared).pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\AbhiGoswami(Insightsquared).pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\AbhiGoswami(Insightsquared)_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\AbhiGoswami(Insightsquared)_customised_resume.txt'

In [8]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Abhishek_Agrahari_CreditRiskModelling-1.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Abhishek_Agrahari_CreditRiskModelling-1.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Abhishek_Agrahari_CreditRiskModelling-1_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Abhishek_Agrahari_CreditRiskModelling-1_customised_resume.txt'

In [9]:
frontend_prompt = "Create a JD for a retail IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Abir_Sarkar.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Abir_Sarkar.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Abir_Sarkar_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Abir_Sarkar_customised_resume.txt'

In [11]:
frontend_prompt = "Create a JD for a retail Scorecards model development requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Nidhika-Tomar.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail Scorecards model development requirement. Mention that SAS is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Nidhika-Tomar.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Nidhika-Tomar_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Nidhika-Tomar_customised_resume.txt'

In [1]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumenidhika1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [2]:
# ---- EXAMPLE FRONTEND USAGE ----
frontend_prompt = "Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must."
# Path to the uploaded resume from the frontend
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Nidhika-Tomar.pdf"

# ---- RUN FUNCTION ----
customise_resume(frontend_prompt, uploaded_resume_path)


Step 1: Generating JD from prompt → Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must.
Generated JD:
  "Job Title: Retail Scorecards Model Development Specialist

Job Description:
We are seeking a talented and experienced Model Development Specialist to join our team in creating Retail Scorecards utilizing SQL as the primary programming language. The ideal candidate will have a strong background in data science, statistics, and database management, with a focus on developing predictive models for retail operations.

The role involves working closely with other teams in the company to gather and analyze large amounts of data, creating and testing models using SQL, and providing valuable insights and recommendations based on the results. The Model Development Specialist will also be responsible for ensuring that all models developed are accurate, reliable, and easy to interpret.

Additional responsibilities include:
- Working with cross-functio

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Nidhika-Tomar_customised_resumenidhika1.txt'

In [3]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumereema.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [4]:
frontend_prompt = "Create a JD for a wholesale IRB model development requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Resume - Reema Panday.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a wholesale IRB model development requirement. Mention that SAS is a must.
Generated JD:
  The Wholesale IRB Model Development Specialist will be responsible for creating and implementing a new wholesale IRB model to streamline the review process of research protocols submitted by multiple institutions. This position requires a strong proficiency in SAS programming language as it will be used extensively to analyze data, generate reports, and conduct statistical analysis of research results. The successful candidate will work closely with stakeholders from various departments, including research, legal, and compliance, to ensure that the new model meets all regulatory requirements while reducing review time and costs. A Bachelor's or Master's degree in a related field is preferred, with at least 3 years of experience in IRB review process, clinical research, and SAS programming language.
Step 2: Reading resume → C:\Users\HN338QQ\OneDr

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Resume - Reema Panday_customised_resumereema.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumereema1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [6]:
frontend_prompt = "Create a JD for a wholesale IRB model development requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Resume - Reema Panday.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a wholesale IRB model development requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Resume - Reema Panday.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Resume - Reema Panday_customised_resumereema1.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Resume - Reema Panday_customised_resumereema1.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumerajvi.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [16]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SAS is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Rajvi Doshi_customised_resume.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Rajvi Doshi_customised_resume.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumerajvi1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [8]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Rajvi Doshi_customised_resumerajvi1.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Rajvi Doshi_customised_resumerajvi1.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumerajviifrs9.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [10]:
frontend_prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Rajvi Doshi_customised_resumerajviifrs9.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Rajvi Doshi_customised_resumerajviifrs9.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumerajviifrs91.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [12]:
frontend_prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Rajvi Doshi_customised_resumerajviifrs91.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Rajvi Doshi_customised_resumerajviifrs91.txt'

In [5]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundan.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [6]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SAS is a must.
Generated JD:
  Job Title: Retail Model Validation Specialist - SAS Expert
Job Summary:
The retail model validation specialist will be responsible for ensuring the accuracy and reliability of models used in retail operations. This role will require expertise in SAS programming language to validate and test models, identify any errors or issues, and provide recommendations on improvements.

Key Responsibilities:
• Conduct regular model validations to ensure compliance with regulatory requirements and best practices
• Use SAS to analyze data, validate models, and perform statistical tests
• Identify and document any errors or inconsistencies in models and suggest corrective actions
• Collaborate with cross-functional teams including data science, analytics, IT, and operations to ensure models are accurately implemented
• Develop documentation and reports on model val

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumekundan.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanIRB1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [4]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SQL is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Kundan_Kumar_customised_resumeKundanIRB1.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumeKundanIRB1.txt'

In [None]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanifrs9.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [6]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SAS is a must.
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\Kundan_Kumar_customised_resumeKundanIFRS9.txt


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumeKundanIFRS9.txt'

In [7]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumekundanifrs91.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [8]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Kundan_Kumar.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SQL is a must.
Generated JD:
  "Job Title: Retail IRB Model Validation Specialist

Job Summary:
We are seeking a highly skilled and detail-oriented individual to join our team as a Retail IRB Model Validation Specialist. This individual will be responsible for ensuring the accuracy and integrity of data utilized in our retail model validation processes, while also leveraging SQL to analyze complex data sets. Successful candidates must have a strong background in SQL and possess excellent analytical and problem-solving skills.

Key Responsibilities:
* Conducting regular audits and validations of our retail models to ensure compliance with regulatory requirements and best practices
* Utilizing SQL to gather and analyze large data sets, including customer transactional data, sales figures, and other relevant metrics
* Identifying inconsistencies or errors in model results and develo

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Kundan_Kumar_customised_resumekundanifrs91.txt'

In [9]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnabirb.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [10]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SQL is a must.
Generated JD:
  "Job Description: We are seeking a skilled Retail IRB Model Validation Specialist with expertise in SQL to join our team. In this role, the Specialist will be responsible for validating and ensuring compliance of retail models used by our clients in their sales and marketing efforts. This position requires strong problem-solving skills, attention to detail, and a solid understanding of data analytics. If you have experience with SQL and are passionate about data validation, we encourage you to apply."
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\CV_Arnab Roy_Model_Validation_cust

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnabirb.txt'

In [11]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumarnabirb1.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [12]:
frontend_prompt = "Create a JD for a retail IRB model validation requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IRB model validation requirement. Mention that SAS is a must.
Generated JD:
  "Job Title: Retail IRB Model Validation Specialist

Job Summary/Description:
We are seeking an experienced and detail-oriented Retail IRB Model Validation Specialist to join our team. The ideal candidate will have a strong background in SAS and be responsible for reviewing, validating and ensuring the accuracy of data submitted by retail pharmacies to our company's regulatory board. The role requires a high degree of attention to detail, critical thinking skills, and excellent communication abilities.

Key Responsibilities:
• Review and validate retail IRB submissions using SAS software
• Ensure that all data meets compliance standards and is accurate and complete
• Identify any discrepancies or inconsistencies and document findings in a clear and concise manner
• Communicate findings to relevant stakeholders in a timely and professional manner
• Ma

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumarnabirb1.txt'

In [13]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnabifrs9.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [14]:
frontend_prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must.
Generated JD:
  "Job Description: We are seeking a talented professional to join our team as an IFRS9 Model Validation Specialist. The successful candidate will be responsible for ensuring our models adhere to regulatory guidelines, particularly with regards to the new IFRS9 standard. To succeed in this role, the individual must have a strong background in financial modeling and a thorough understanding of SAS tools. This is an exciting opportunity to contribute to our organization's success while making a difference in the financial industry."
Step 2: Reading resume → C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf
Step 3: Customizing resume using resume + generated JD
✅ Resume customized and saved to: C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes\CV_A

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnabifrs9.txt'

In [15]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("❌ LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD from prompt → {prompt_text}")
    generated_jd = call_lmstudio(f"Create a detailed job description based on this prompt:\n{prompt_text}")
    if not generated_jd:
        print("JD generation failed.")
        return

    # ✅ Print generated JD for reference
    print("Generated JD:\n", generated_jd)

    print(f"Step 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("Step 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumearnabifrs91.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)
    return output_path


In [16]:
frontend_prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must."
uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\CV_Arnab Roy_Model_Validation.pdf"

customise_resume(frontend_prompt, uploaded_resume_path)

Step 1: Generating JD from prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SQL is a must.
Generated JD:
  "Job Title: IFRS9 Model Validation Specialist

Job Summary:
We are seeking an experienced and analytically-minded individual to join our team as an IFRS9 Model Validation Specialist. The candidate will be responsible for ensuring the accuracy of financial statements prepared in accordance with the International Financial Reporting Standards (IFRS) through the validation of models used in these calculations. The position requires advanced skills in SQL and strong problem-solving abilities, combined with a solid background in finance or accounting.

Key Responsibilities:
• Validation of models used in IFRS9 calculations, including review of documentation, input data, and output results.
• Identification of potential errors or inconsistencies and investigation of their root causes.
• Collaboration with cross-functional teams to ensure the accuracy an

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\CV_Arnab Roy_Model_Validation_customised_resumearnabifrs91.txt'

In [None]:
# from sentence_transformers import SentenceTransformer, util

# # Load model (this downloads if not already present)
# model = SentenceTransformer('all-MiniLM-L6-v2')

# # Example JD and resume texts
# jd_text = "Looking for a data scientist with SQL, Python, and scorecard modeling experience."
# resume_text = "I have developed credit scorecard models and use SQL daily for querying and analysis."

# # Get embeddings
# jd_embedding = model.encode(jd_text, convert_to_tensor=True)
# resume_embedding = model.encode(resume_text, convert_to_tensor=True)

# # Compute cosine similarity
# similarity_score = util.cos_sim(jd_embedding, resume_embedding)

# print("Cosine Similarity:", similarity_score.item())  # Should be between 0 and 1


ImportError: cannot import name 'cached_download' from 'huggingface_hub' (c:\Users\HN338QQ\AppData\Local\anaconda3\Lib\site-packages\huggingface_hub\__init__.py)

In [None]:
# from sentence_transformers import SentenceTransformer, util

# # Load model
# model = SentenceTransformer('all-MiniLM-L6-v2')

# # Sample JD and Resume snippet
# jd = "Looking for a candidate with experience in retail scorecard development and strong SQL skills."
# resume = "Developed retail banking scorecards and performed SQL-based data extraction for model monitoring."

# # Get embeddings
# embedding_jd = model.encode(jd, convert_to_tensor=True)
# embedding_resume = model.encode(resume, convert_to_tensor=True)

# # Calculate cosine similarity
# similarity_score = util.cos_sim(embedding_jd, embedding_resume).item()
# print(f"BERT Cosine Similarity Score: {similarity_score:.4f}")


ImportError: cannot import name 'cached_download' from 'huggingface_hub' (c:\Users\HN338QQ\AppData\Local\anaconda3\Lib\site-packages\huggingface_hub\__init__.py)

In [None]:
# import os
# import fitz  # PyMuPDF
# import docx
# from sentence_transformers import SentenceTransformer, util
# import shutil


ModuleNotFoundError: No module named 'sentence_transformers'

In [1]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resume.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Nidhika-Tomar.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must.

Generated JD:
 
Job Description - Retail Scorecards Model Development

Position Summary:
We are seeking an experienced developer with a strong background in data science and modeling to join our team in developing scorecards for the retail industry. This role involves analyzing sales, inventory, and customer data to create predictive models that will help our clients make informed business decisions. The ideal candidate should have experience working with SQL databases and be comfortable using Python or R for data analysis and visualization.

Responsibilities:
- Collaborate with sales and marketing teams to gather required data and insights
- Develop scorecards using SQL databases and Python/R for data science
- Create visualizations and presentations to communicate results to clients
- Stay up-to-d

'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Nidhika-Tomar_customised_resume.txt'

In [None]:
# # ---- USAGE ----
# frontend_prompt = "Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must."
# uploaded_resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Nidhika-Tomar.pdf"

# customise_resume(frontend_prompt, uploaded_resume_path)


🔹 Step 1: Generating JD from prompt → Create a JD for a retail Scorecards model development requirement. Mention that SQL is a must.
🔹 Generated JD:
  "Job Title: Retail Scorecards Model Development Specialist

Job Summary/Description:
We are looking to hire a Retail Scorecards Model Development Specialist who will utilize his/her expertise in SQL to develop and optimize retail scorecards for our organization. The successful candidate will have the opportunity to work with a dynamic team and contribute to the continued growth of our business.

Key Responsibilities:
• Develop and maintain retail scorecards using SQL.
• Analyze sales data to identify trends and provide recommendations based on findings.
• Collaborate with cross-functional teams including merchandising, marketing, and operations to ensure successful implementation of scorecard results.
• Conduct regular reviews of scorecard performance and make necessary adjustments to optimize results.
• Maintain confidentiality of sens

FileNotFoundError: [Errno 2] No such file or directory: 'path\\to\\EY_sample_resume_template.txt'

In [None]:
# print("\n📊 Accuracy Metrics:")
# print(f"TF-IDF Similarity:{tfidf_score:.2f}")
# print(f"Keyword Coverage Ratio:    {keyword_match_ratio:.2f}")
# print(f"Qualitative Relevance %:   {qualitative_rate:.2f}")
# print(f"Matched Keywords:          {matched_keywords}")
# print(f"Section Presence Flags:    {section_flags}")


In [2]:
import os
import fitz  # PyMuPDF
import docx
import re
import json
import requests
import time
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---- CONFIGURATION ----
ey_template_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\EY_sample_resume_template.txt"
output_dir = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\final_customised_resumes"
os.makedirs(output_dir, exist_ok=True)

nlp = spacy.load("en_core_web_sm")

# ---- LM Studio CALL ----
def call_lmstudio(prompt, model="zephyr-7b-beta.Q4_K_M.gguf", retries=3):
    url = "http://localhost:1234/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "stream": False
    }
    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=180)
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            if attempt == retries - 1:
                print("LLM call failed:", e)
                return ""
            time.sleep(2)

# ---- READ RESUME ----
def read_resume_text(file_path):
    if file_path.endswith(".pdf"):
        doc = fitz.open(file_path)
        return "\n".join([page.get_text() for page in doc])
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ---- METRICS ----
def compute_tfidf_similarity(jd, resume):
    tfidf = TfidfVectorizer().fit_transform([jd, resume])
    return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]

def keyword_coverage(jd, resume):
    jd_keywords = set(token.text.lower() for token in nlp(jd) if token.is_alpha and not token.is_stop)
    resume_words = resume.lower()
    matched = [kw for kw in jd_keywords if kw in resume_words]
    return len(matched) / len(jd_keywords) if jd_keywords else 0, matched

def qualitative_relevance(jd, resume):
    jd_words = set(jd.lower().split())
    resume_words = set(resume.lower().split())
    overlap = jd_words.intersection(resume_words)
    return len(overlap) / len(jd_words) if jd_words else 0

def section_audit(text):
    sections = ["summary", "qualification", "experience", "technical", "skill"]
    present = [sec for sec in sections if sec in text.lower()]
    return present

# ---- MAIN FUNCTION ----
def customise_resume(prompt_text, resume_path):
    candidate_name = os.path.splitext(os.path.basename(resume_path))[0]

    print(f"Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → {prompt_text}")
    jd_prompt = f"""Create a structured job description based only on the following three sections:
1. Responsibilities
2. Requirements
3. Preferred Skills

Prompt:
{prompt_text}
"""
    generated_jd = call_lmstudio(jd_prompt)
    if not generated_jd:
        print("JD generation failed.")
        return

    print("\nGenerated JD:\n", generated_jd)

    print(f"\nStep 2: Reading resume → {resume_path}")
    resume_text = read_resume_text(resume_path)

    print("\nStep 3: Customizing resume using resume + generated JD")
    custom_prompt = f"""
You are a resume writer. Given the candidate's resume and the job description, rewrite the following sections to make the resume aligned with the JD. DO NOT INVENT DETAILS. Use only the content in the resume.

Resume:
{resume_text}

Job Description:
{generated_jd}

Provide the output in this format (with labels):
SUMMARY:
...

QUALIFICATIONS:
...

EXPERIENCE:
...

TECHNICAL_SKILLS:
...
"""

    result = call_lmstudio(custom_prompt)
    if not result:
        print("Customization failed.")
        return

    print("\n=== Raw Output from LLM ===\n")
    print(result)

    summary = re.search(r"SUMMARY:(.*?)QUALIFICATIONS:", result, re.DOTALL | re.IGNORECASE)
    qualifications = re.search(r"QUALIFICATIONS:(.*?)EXPERIENCE:", result, re.DOTALL | re.IGNORECASE)
    experience = re.search(r"EXPERIENCE:(.*?)TECHNICAL_SKILLS:", result, re.DOTALL | re.IGNORECASE)
    tech_skills = re.search(r"TECHNICAL_SKILLS:(.*)", result, re.DOTALL | re.IGNORECASE)

    # Read EY template
    with open(ey_template_path, "r", encoding="utf-8") as f:
        template = f.read()

    # Fill template
    final_resume = template
    final_resume = final_resume.replace("[CANDIDATE_NAME]", candidate_name)
    final_resume = final_resume.replace("[ROLE]", prompt_text)
    final_resume = final_resume.replace("[SUMMARY]", summary.group(1).strip() if summary else "")
    final_resume = final_resume.replace("[QUALIFICATIONS]", qualifications.group(1).strip() if qualifications else "")
    final_resume = final_resume.replace("[EXPERIENCE_PLACEHOLDER]", experience.group(1).strip() if experience else "")
    final_resume = final_resume.replace("[TECHNICAL_SKILLS]", tech_skills.group(1).strip() if tech_skills else "")

    # Save customized resume
    output_path = os.path.join(output_dir, f"{candidate_name}_customised_resumerajviifrs9.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_resume)

    print("✅ Resume customized and saved to:", output_path)

    # ---- ACCURACY METRICS ----
    tfidf_score = compute_tfidf_similarity(generated_jd, final_resume)
    keyword_match_ratio, matched_keywords = keyword_coverage(generated_jd, final_resume)
    qualitative_score = qualitative_relevance(generated_jd, final_resume)
    section_flags = section_audit(final_resume)

    print("\nAccuracy Metrics:")
    print(f"TF-IDF Similarity:            {tfidf_score:.2f}")
    print(f"Keyword Coverage Ratio:       {keyword_match_ratio:.2f}")
    print(f"Qualitative Relevance:        {qualitative_score:.2f}")
    print(f"Matched Keywords:             {matched_keywords}")
    print(f"Section Presence Flags:       {section_flags}\n")

    return output_path

# ---- RUN EXAMPLE ----
prompt = "Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must."
resume_path = r"C:\Users\HN338QQ\OneDrive - EY\Documents\raw and ey curated samples\Rawresumes\Rajvi Doshi.pdf"

customise_resume(prompt, resume_path)


Step 1: Generating JD with only Responsibilities, Requirements, Preferred Skills for prompt → Create a JD for a retail IFRS9 model validation requirement. Mention that SAS is a must.

Generated JD:
 
Job Title: Model Validation Specialist - IFRS9

Job Summary:
We are seeking a skilled Model Validation Specialist with expertise in IFRS9 and strong knowledge of SAS to join our team. In this role, you will be responsible for ensuring the accuracy and reliability of our IFRS9 models while maintaining compliance with regulatory requirements. This is an exciting opportunity to work in a dynamic and fast-paced environment where your skills and expertise will make a significant contribution to the success of the company.

Responsibilities:
• Conduct regular validation tests of IFRS9 models using SAS to ensure accuracy, reliability, and compliance with regulatory requirements.
• Prepare detailed documentation and reports on validation results for review by internal stakeholders and regulators.


'C:\\Users\\HN338QQ\\OneDrive - EY\\Documents\\raw and ey curated samples\\final_customised_resumes\\Rajvi Doshi_customised_resumerajviifrs9.txt'