# 

In [4]:
from flask import Flask, render_template, request
import os
import socket
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

from utils.split_by_bookmark import split_pdf_by_bookmarks
from utils.extract_text import extract_text_and_info
from utils.semantic_matcher import match_skills
from utils.generate_report import save_results_to_excel, prepare_result_table
from utils.jd_reader import extract_text_from_jd

app = Flask(__name__)
UPLOAD_FOLDER = "uploads"
REPORT_FOLDER = "static/reports"
EXCEL_REPORT = os.path.join(UPLOAD_FOLDER, "resume_report.xlsx")

os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(REPORT_FOLDER, exist_ok=True)

def get_free_port():
    s = socket.socket()
    s.bind(('', 0))
    port = s.getsockname()[1]
    s.close()
    return port

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/analyze', methods=['POST'])
def analyze():
    jd_file = request.files['jd_file']
    resume_pdf = request.files['resume_pdf']
    mandatory_skills = request.form['mandatory_skills'].split(',')
    optional_skills = request.form['optional_skills'].split(',')

    # Save files
    jd_ext = os.path.splitext(jd_file.filename)[1].lower()
    jd_filename = f"jd{jd_ext}"
    jd_path = os.path.join(UPLOAD_FOLDER, jd_filename)
    resume_path = os.path.join(UPLOAD_FOLDER, "resumes.pdf")
    jd_file.save(jd_path)
    resume_pdf.save(resume_path)

    if os.path.getsize(jd_path) == 0:
        return "<h3 style='color:red'>❌ JD file is empty or corrupted. Please upload a valid PDF or Word file.</h3>"

    try:
        jd_text = extract_text_from_jd(jd_path)
    except Exception as e:
        return f"<h3 style='color:red'>❌ Failed to read JD file: {e}</h3>"

    try:
        resumes = split_pdf_by_bookmarks(resume_path, REPORT_FOLDER)
    except Exception as e:
        return f"<h3 style='color:red'>❌ Failed to split resume PDF: {e}</h3>"

    results = []
    for r in resumes:
        extracted = extract_text_and_info(r['filepath'])
        skill_match = match_skills(extracted['text'], mandatory_skills, optional_skills)

        # JD keyword gap via TF-IDF
        tfidf = TfidfVectorizer(stop_words='english', max_features=1000)
        tfidf_matrix = tfidf.fit_transform([jd_text, extracted['text']])
        jd_features = tfidf.get_feature_names_out()

        jd_words = set(tfidf_matrix[0].nonzero()[1])
        resume_words = set(tfidf_matrix[1].nonzero()[1])
        missing_keywords = [jd_features[i] for i in jd_words - resume_words]
        top_missing = ", ".join(missing_keywords[:30]) if missing_keywords else "None"

        results.append({
            'name': r['name'],
            'email': extracted['email'],
            'phone': extracted['phone'],
            'score': skill_match['score'],
            'missing_must': skill_match['missing_mandatory'],
            'missing_opt': skill_match['missing_optional'],
            'missing_jd': top_missing,
            'filename': r['filename']
        })

    save_results_to_excel(results, EXCEL_REPORT)
    return render_template('results.html', results=prepare_result_table(results))

@app.route('/results')
def results():
    if not os.path.exists(EXCEL_REPORT):
        return "No report available. Please upload files first."
    df = pd.read_excel(EXCEL_REPORT)
    df.fillna("N/A", inplace=True)
    return render_template('results.html', results=df.to_dict(orient='records'))

# For Jupyter Notebook Launcher
def run_app_in_notebook():
    import nest_asyncio
    import threading

    nest_asyncio.apply()
    port = get_free_port()
    print(f"✅ Flask app running at: http://127.0.0.1:{port}")

    def run():
        app.run(port=port, debug=False, use_reloader=False)

    threading.Thread(target=run).start()


python app.py

In [3]:
!python app.py

Traceback (most recent call last):
  File "C:\Users\ZY633CY\Anaconda\resume_parser_new - Multiple resumes - Copy\app.py", line 105, in <module>
    print(f"\u2705 App running at http://127.0.0.1:{port}")
  File "C:\Users\ZY633CY\AppData\Local\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 0: character maps to <undefined>
