In [2]:
import fitz  # PyMuPDF
import joblib
import pandas as pd

# Load saved model and vectorizer
best_svm = joblib.load("svm_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        return " ".join([page.get_text("text") for page in doc]).strip()
    except Exception as e:
        print(f"Error extracting text: {e}")
        return ""

# Predict job role
def predict_job_role(pdf_path):
    resume_text = extract_text_from_pdf(pdf_path)
    if not resume_text:
        return "Error: Could not extract text."

    resume_vector = vectorizer.transform([resume_text])
    resume_vector_df = pd.DataFrame(resume_vector.toarray(), columns=vectorizer.get_feature_names_out())

    # Ensure feature consistency
    missing_cols = set(vectorizer.get_feature_names_out()) - set(resume_vector_df.columns)
    for col in missing_cols:
        resume_vector_df[col] = 0
    resume_vector_df = resume_vector_df[vectorizer.get_feature_names_out()]

    return best_svm.predict(resume_vector_df)[0]

# Example usage
if __name__ == "__main__":
    pdf_path = "Resume-test.pdf"
    predicted_job = predict_job_role(pdf_path)
    print(f"Predicted Job Role: {predicted_job}")


Predicted Job Role: INFORMATION-TECHNOLOGY
