In [3]:

import os
import pickle
import fitz  # PyMuPDF
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# -------------------------
# -------------------------
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# -------------------------
# -------------------------
template = """
You are an expert recruiter. Compare the following resume with the job description.

Resume:
{resume}

Job Description:
{jd}

Answer:
1. Match percentage (0-100)?
2. List top matched skills.
3. List missing/weak skills.
4. Is this candidate a good fit? Why?
"""

prompt = PromptTemplate(
    input_variables=["resume", "jd"],
    template=template
)

# -------------------------
# -------------------------
model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512
)

llm = HuggingFacePipeline(pipeline=pipe)

# -------------------------
# -------------------------
chain = LLMChain(llm=llm, prompt=prompt)

# -------------------------
# -------------------------
with open("resume_chain.pkl", "wb") as f:
    pickle.dump(chain, f)

print("✅ Model saved to resume_chain.pkl")


Device set to use cpu


✅ Model saved to resume_chain.pkl
