In [1]:
# Install necessary libraries
!pip install Flask PyPDF2 docx2txt scikit-learn numpy pyngrok

# Create the required folder structure for Flask
!mkdir -p templates
!mkdir -p uploads

print("Installation and directory setup ")

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting docx2txt
  Downloading docx2txt-0.9-py3-none-any.whl.metadata (529 bytes)
Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m232.6/232.6 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading docx2txt-0.9-py3-none-any.whl (4.0 kB)
Downloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Installing collected packages: docx2txt, PyPDF2, pyngrok
Successfully installed PyPDF2-3.0.1 docx2txt-0.9 pyngrok-7.4.1
Installation and directory setup 


In [2]:
%%writefile app.py
import os
import re
from flask import Flask, request, render_template
import PyPDF2
import docx2txt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads/'

# --- Skill List ---
SKILL_SET = [
    'python', 'java', 'c++', 'sql', 'html', 'css', 'javascript',
    'machine learning', 'deep learning', 'nlp', 'flask', 'django',
    'pandas', 'numpy', 'scikit-learn', 'tensorflow', 'keras',
    'git', 'docker', 'linux', 'aws', 'azure', 'react', 'node.js'
]

# --- Text Extraction Functions ---
def extract_text_from_pdf(file_path):
    text = ""
    try:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text += page.extract_text() or ""
    except Exception as e:
        print(f"Error reading PDF {file_path}: {e}")
        return ""
    return text

def extract_text_from_docx(file_path):
    return docx2txt.process(file_path)

def extract_text_from_txt(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        return file.read()

def extract_text(file_path):
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    elif file_path.endswith('.txt'):
        return extract_text_from_txt(file_path)
    else:
        return ""

# --- Skill Extraction ---
def extract_skills(text):
    text = text.lower()
    found_skills = set()
    for skill in SKILL_SET:
        pattern = r'\b' + re.escape(skill.lower()) + r'\b'
        if re.search(pattern, text):
            found_skills.add(skill)
    return found_skills

# --- Routes ---
@app.route("/")
def matchresume():
    return render_template('index.html')

@app.route("/matcher", methods=['POST'])
def matcher():
    job_description = request.form.get('job_description', '')
    resume_files = request.files.getlist('resumes')

    resumes_text_list = []
    uploaded_files_info = []

    for resume_file in resume_files:
        if resume_file.filename:
            filename = os.path.join(app.config['UPLOAD_FOLDER'], resume_file.filename)
            resume_file.save(filename)

            text = extract_text(filename)
            resumes_text_list.append(text)
            uploaded_files_info.append({
                "filename": resume_file.filename,
                "filepath": filename,
                "text": text
            })

    if not job_description or not resumes_text_list:
        return render_template('index.html',
                               message="Please upload at least one resume and enter a job description.")

    # --- Skill Extraction from JD ---
    jd_skills = extract_skills(job_description)

    # --- Skill Extraction from Resumes + Confusion Matrix ---
    for info in uploaded_files_info:
        resume_skills = extract_skills(info["text"])
        tp = jd_skills & resume_skills
        fp = resume_skills - jd_skills
        fn = jd_skills - resume_skills
        info["resume_skills"] = list(resume_skills)
        info["confusion_matrix"] = {
            "True Positives": list(tp),
            "False Positives": list(fp),
            "False Negatives": list(fn)
        }

    # --- TF-IDF Vectorization ---
    all_documents = [job_description] + resumes_text_list
    vectorizer = TfidfVectorizer().fit(all_documents)
    vectors = vectorizer.transform(all_documents).toarray()

    job_vector = vectors[0]
    resume_vectors = vectors[1:]
    similarities = cosine_similarity([job_vector], resume_vectors)[0]

    # --- Top 5 Matching Resumes ---
    top_indices = similarities.argsort()[-5:][::-1]

    top_resumes = []
    similarity_scores = []
    top_resume_details = []

    for i in top_indices:
        top_resumes.append(uploaded_files_info[i]["filename"])
        similarity_scores.append(round(similarities[i] * 100, 2))
        top_resume_details.append({
            "filename": uploaded_files_info[i]["filename"],
            "similarity": round(similarities[i] * 100, 2),
            "skills": uploaded_files_info[i]["resume_skills"],
            "confusion_matrix": uploaded_files_info[i]["confusion_matrix"]
        })

    return render_template('index.html',
                           message="Top Matching Resumes:",
                           top_resumes=top_resumes,
                           similarity_scores=similarity_scores,
                           jd_skills=list(jd_skills),
                           top_resume_details=top_resume_details)


Writing app.py


In [3]:
!mkdir -p templates

In [4]:
%%writefile templates/index.html
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Resume Matcher</title>
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
  <style>
    body {
      background: linear-gradient(to right, #e3f2fd, #fce4ec);
      font-family: 'Roboto', sans-serif;
    }
    .container {
      margin-top: 50px;
    }
    .card {
      border-radius: 12px;
      box-shadow: 0 8px 16px rgba(0,0,0,0.1);
      background-color: #ffffff;
    }
    .card-header {
      background: linear-gradient(to right, #007bff, #00bcd4);
      color: white;
      border-radius: 12px 12px 0 0;
      padding: 20px;
      font-size: 1.6rem;
      text-align: center;
    }
    .btn-primary {
      background-color: #ff4081;
      border-color: #ff4081;
    }
    .btn-primary:hover {
      background-color: #e91e63;
      border-color: #e91e63;
    }
    .form-group label {
      font-weight: 600;
      color: #333;
    }
    .alert-info {
      background-color: #f1f8e9;
      border-color: #c5e1a5;
      color: #33691e;
      font-weight: 500;
    }
    .results-list {
      list-style: none;
      padding-left: 0;
    }
    .results-list li {
      padding: 10px;
      margin-bottom: 10px;
      background-color: #f8f9fa;
      border-left: 5px solid #007bff;
      border-radius: 6px;
    }
    .badge {
      font-size: 0.9rem;
      margin-left: 10px;
    }
    .section-title {
      margin-top: 30px;
      font-size: 1.3rem;
      font-weight: bold;
      color: #444;
    }
  </style>
</head>
<body>
<div class="container">
  <div class="card">
    <div class="card-header">Job Description & Resume Matcher</div>
    <div class="card-body">
      <form method="POST" action="/matcher" enctype="multipart/form-data">
        <div class="form-group">
          <label for="job_description">Job Description:</label>
          <textarea class="form-control" id="job_description" name="job_description" rows="5" placeholder="Paste job description here..." required></textarea>
        </div>
        <div class="form-group">
          <label for="resumes">Upload Resumes:</label>
          <input type="file" class="form-control-file" id="resumes" name="resumes" multiple required accept=".pdf,.docx,.txt">
        </div>
        <button type="submit" class="btn btn-primary btn-block">Match Resumes</button>
      </form>

      {% if message %}
      <div class="mt-4">
        <p class="alert alert-info"><strong>{{ message }}</strong></p>

        {% if jd_skills %}
        <div class="section-title">Skills Required (from Job Description):</div>
        <ul class="results-list">
          {% for skill in jd_skills %}
            <li>{{ skill }}</li>
          {% endfor %}
        </ul>
        {% endif %}

        {% if top_resume_details %}
        <div class="section-title">Top Resume Matches:</div>
        {% for resume in top_resume_details %}
          <div class="results-list">
            <li>
              <strong>{{ resume.filename }}</strong>
              <span class="badge badge-success">Match: {{ resume.similarity }}%</span>
              <br><strong>Extracted Skills:</strong> {{ resume.skills | join(', ') }}
              <br><strong>Confusion Matrix:</strong>
              <ul>
                <li><strong>True Positives:</strong> {{ resume.confusion_matrix['True Positives'] | join(', ') }}</li>
                <li><strong>False Positives:</strong> {{ resume.confusion_matrix['False Positives'] | join(', ') }}</li>
                <li><strong>False Negatives:</strong> {{ resume.confusion_matrix['False Negatives'] | join(', ') }}</li>
              </ul>
            </li>
          </div>
        {% endfor %}
        {% endif %}
      </div>
      {% endif %}
    </div>
  </div>
</div>
</body>
</html>

Writing templates/index.html


In [None]:
# Shortcut: Run Flask app with ngrok in one go
from app import app
from pyngrok import ngrok
from IPython.display import display, HTML

# Set ngrok token (only once per session)
ngrok.set_auth_token("33hBxyiHGfdeVJ00VRAMgj868jd_tFaxhNsKwHTPurh7uK83")

# Kill any previous tunnels
ngrok.kill()

# Start new tunnel
public_url = ngrok.connect(5000).public_url

# Show clickable link
print(f"‚úÖ Resume Matcher App is Live at: {public_url}")
display(HTML(f"<a href='{public_url}' target='_blank'>üëâ Click here to open your Resume Matcher App</a>"))

# Run Flask app
app.run(port=5000)

‚úÖ Resume Matcher App is Live at: https://untelevised-margot-unendured.ngrok-free.dev


 * Serving Flask app 'app'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [26/Oct/2025 05:03:31] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [26/Oct/2025 05:03:32] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [26/Oct/2025 05:06:11] "POST /matcher HTTP/1.1" 200 -
