In [1]:
# Install necessary libraries
!pip install Flask PyPDF2 docx2txt scikit-learn numpy pyngrok

# Create the required folder structure for Flask
!mkdir -p templates
!mkdir -p uploads

print("Installation and directory setup ")

Installation and directory setup 


In [2]:
# 4. Create the Flask application (app.py)

%%writefile app.py
import os
import re
from flask import Flask, request, render_template
import PyPDF2
import docx2txt
from sklearn.feature_extraction.text import TfidfVectorizer
# Import euclidean_distances for distance calculation
from sklearn.metrics.pairwise import euclidean_distances

app = Flask(__name__, template_folder='templates')
app.config['UPLOAD_FOLDER'] = 'uploads/'

# --- Skill List ---
SKILL_SET = [
    'python', 'java', 'c++', 'sql', 'html', 'css', 'javascript',
    'machine learning', 'deep learning', 'nlp', 'flask', 'django',
    'pandas', 'numpy', 'scikit-learn', 'tensorflow', 'keras',
    'git', 'docker', 'linux', 'aws', 'azure', 'react', 'node.js'
]

# --- Text Extraction Functions ---
def extract_text_from_pdf(file_path):
    text = ""
    try:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text += page.extract_text() or ""
    except Exception as e:
        # print(f"Error reading PDF {file_path}: {e}") # Commented for cleaner Colab output
        return ""
    return text

def extract_text_from_docx(file_path):
    try:
        return docx2txt.process(file_path)
    except Exception as e:
        # print(f"Error reading DOCX {file_path}: {e}") # Commented for cleaner Colab output
        return ""

def extract_text_from_txt(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        return file.read()

def extract_text(file_path):
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    elif file_path.endswith('.txt'):
        return extract_text_from_txt(file_path)
    else:
        return ""

# --- Skill Extraction ---
def extract_skills(text):
    text = text.lower()
    found_skills = set()
    for skill in SKILL_SET:
        # Use word boundaries for accurate matching
        pattern = r'\b' + re.escape(skill.lower()) + r'\b'
        if re.search(pattern, text):
            found_skills.add(skill)
    return found_skills

# --- Routes ---
@app.route("/")
def matchresume():
    return render_template('index.html')

@app.route("/matcher", methods=['POST'])
def matcher():
    job_description = request.form.get('job_description', '')
    resume_files = request.files.getlist('resumes')

    resumes_text_list = []
    uploaded_files_info = []

    os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

    for resume_file in resume_files:
        if resume_file.filename:
            filename = os.path.join(app.config['UPLOAD_FOLDER'], resume_file.filename)
            resume_file.save(filename)

            text = extract_text(filename)
            resumes_text_list.append(text)
            uploaded_files_info.append({
                "filename": resume_file.filename,
                "filepath": filename,
                "text": text
            })

    if not job_description or not resumes_text_list:
        return render_template('index.html',
                               message="Please upload at least one resume and enter a job description.")

    # --- Skill Extraction from JD ---
    jd_skills = extract_skills(job_description)

    # --- Skill Extraction from Resumes + Confusion Matrix ---
    for info in uploaded_files_info:
        resume_skills = extract_skills(info["text"])
        tp = jd_skills & resume_skills
        fp = resume_skills - jd_skills
        fn = jd_skills - resume_skills
        info["resume_skills"] = list(resume_skills)
        info["confusion_matrix"] = {
            "True Positives": list(tp),
            "False Positives": list(fp),
            "False Negatives": list(fn)
        }

    # --- TF-IDF Vectorization ---
    all_documents = [job_description] + resumes_text_list
    vectorizer = TfidfVectorizer().fit(all_documents)
    vectors = vectorizer.transform(all_documents).toarray()

    job_vector = vectors[0]
    resume_vectors = vectors[1:]

    # --- EUCLIDEAN DISTANCE CALCULATION (Lower is better) ---
    distances = euclidean_distances([job_vector], resume_vectors)[0]

    # --- Top 5 Matching Resumes ---
    # Sorts for the 5 SMALLEST distances (highest match)
    top_indices = distances.argsort()[:5]

    top_resumes = []
    distance_scores = []
    top_resume_details = []

    for i in top_indices:
        distance_score = round(distances[i], 4)

        top_resumes.append(uploaded_files_info[i]["filename"])
        distance_scores.append(distance_score)

        top_resume_details.append({
            "filename": uploaded_files_info[i]["filename"],
            "distance": distance_score, # Display the distance score
            "skills": uploaded_files_info[i]["resume_skills"],
            "confusion_matrix": uploaded_files_info[i]["confusion_matrix"]
        })

    return render_template('index.html',
                           message="Top Matching Resumes (Sorted by Lowest Euclidean Distance):",
                           distance_scores=distance_scores,
                           jd_skills=list(jd_skills),
                           top_resume_details=top_resume_details)

Overwriting app.py


In [3]:
!mkdir -p templates

In [4]:
%%writefile templates/index.html
<!DOCTYPE html>
<html lang="en">
<head>
Â  <meta charset="UTF-8">
Â  <meta name="viewport" content="width=device-width, initial-scale=1.0">
Â  <title>Resume Matcher</title>
Â  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
Â  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
Â  <style>
Â  Â  body {
Â  Â  Â  background: linear-gradient(to right, #e3f2fd, #fce4ec);
Â  Â  Â  font-family: 'Roboto', sans-serif;
Â  Â  }
Â  Â  .container {
Â  Â  Â  margin-top: 50px;
Â  Â  }
Â  Â  .card {
Â  Â  Â  border-radius: 12px;
Â  Â  Â  box-shadow: 0 8px 16px rgba(0,0,0,0.1);
Â  Â  Â  background-color: #ffffff;
Â  Â  }
Â  Â  .card-header {
Â  Â  Â  background: linear-gradient(to right, #007bff, #00bcd4);
Â  Â  Â  color: white;
Â  Â  Â  border-radius: 12px 12px 0 0;
Â  Â  Â  padding: 20px;
Â  Â  Â  font-size: 1.6rem;
Â  Â  Â  text-align: center;
Â  Â  }
Â  Â  .btn-primary {
Â  Â  Â  background-color: #ff4081;
Â  Â  Â  border-color: #ff4081;
Â  Â  }
Â  Â  .btn-primary:hover {
Â  Â  Â  background-color: #e91e63;
Â  Â  Â  border-color: #e91e63;
Â  Â  }
Â  Â  .form-group label {
Â  Â  Â  font-weight: 600;
Â  Â  Â  color: #333;
Â  Â  }
Â  Â  .alert-info {
Â  Â  Â  background-color: #f1f8e9;
Â  Â  Â  border-color: #c5e1a5;
Â  Â  Â  color: #33691e;
Â  Â  Â  font-weight: 500;
Â  Â  }
Â  Â  .results-list {
Â  Â  Â  list-style: none;
Â  Â  Â  padding-left: 0;
Â  Â  }
Â  Â  .results-list li {
Â  Â  Â  padding: 10px;
Â  Â  Â  margin-bottom: 10px;
Â  Â  Â  background-color: #f8f9fa;
Â  Â  Â  border-left: 5px solid #007bff;
Â  Â  Â  border-radius: 6px;
Â  Â  }
Â  Â  .badge {
Â  Â  Â  font-size: 0.9rem;
Â  Â  Â  margin-left: 10px;
Â  Â  }
Â  Â  .section-title {
Â  Â  Â  margin-top: 30px;
Â  Â  Â  font-size: 1.3rem;
Â  Â  Â  font-weight: bold;
Â  Â  Â  color: #444;
Â  Â  }
Â  </style>
</head>
<body>
<div class="container">
Â  <div class="card">
Â  Â  <div class="card-header">Job Description & Resume Matcher</div>
Â  Â  <div class="card-body">
Â  Â  Â  <form method="POST" action="/matcher" enctype="multipart/form-data">
Â  Â  Â  Â  <div class="form-group">
Â  Â  Â  Â  Â  <label for="job_description">Job Description:</label>
Â  Â  Â  Â  Â  <textarea class="form-control" id="job_description" name="job_description" rows="5" placeholder="Paste job description here..." required></textarea>
Â  Â  Â  Â  </div>
Â  Â  Â  Â  <div class="form-group">
Â  Â  Â  Â  Â  <label for="resumes">Upload Resumes:</label>
Â  Â  Â  Â  Â  <input type="file" class="form-control-file" id="resumes" name="resumes" multiple required accept=".pdf,.docx,.txt">
Â  Â  Â  Â  </div>
Â  Â  Â  Â  <button type="submit" class="btn btn-primary btn-block">Match Resumes</button>
Â  Â  Â  </form>

Â  Â  Â  {% if message %}
Â  Â  Â  <div class="mt-4">
Â  Â  Â  Â  <p class="alert alert-info"><strong>{{ message }}</strong></p>

Â  Â  Â  Â  {% if jd_skills %}
Â  Â  Â  Â  <div class="section-title">Skills Required (from Job Description):</div>
Â  Â  Â  Â  <ul class="results-list">
Â  Â  Â  Â  Â  {% for skill in jd_skills %}
Â  Â  Â  Â  Â  Â  <li>{{ skill }}</li>
Â  Â  Â  Â  Â  {% endfor %}
Â  Â  Â  Â  </ul>
Â  Â  Â  Â  {% endif %}

Â  Â  Â  Â  {% if top_resume_details %}
Â  Â  Â  Â  <div class="section-title">Top Resume Matches:</div>
Â  Â  Â  Â  {% for resume in top_resume_details %}
Â  Â  Â  Â  Â  <div class="results-list">
Â  Â  Â  Â  Â  Â  <li>
Â  Â  Â  Â  Â  Â  Â  <strong>{{ resume.filename }}</strong>
Â  Â  Â  Â  Â  Â  Â  <span class="match-score">Distance: {{ resume.distance }}</span>  <--- **FIXED HERE**
Â  Â  Â  Â  Â  Â  Â  <br><strong>Extracted Skills:</strong> {{ resume.skills | join(', ') }}
Â  Â  Â  Â  Â  Â  Â  <br><strong>Confusion Matrix:</strong>
Â  Â  Â  Â  Â  Â  Â  <ul>
Â  Â  Â  Â  Â  Â  Â  Â  <li><strong>True Positives:</strong> {{ resume.confusion_matrix['True Positives'] | join(', ') }}</li>
Â  Â  Â  Â  Â  Â  Â  Â  <li><strong>False Positives:</strong> {{ resume.confusion_matrix['False Positives'] | join(', ') }}</li>
Â  Â  Â  Â  Â  Â  Â  Â  <li><strong>False Negatives:</strong> {{ resume.confusion_matrix['False Negatives'] | join(', ') }}</li>
Â  Â  Â  Â  Â  Â  Â  </ul>
Â  Â  Â  Â  Â  Â  </li>
Â  Â  Â  Â  Â  </div>
Â  Â  Â  Â  {% endfor %}
Â  Â  Â  Â  {% endif %}
Â  Â  Â  </div>
Â  Â  Â  {% endif %}
Â  Â  </div>
Â  </div>
</div>
</body>
</html>

Overwriting templates/index.html


In [None]:
# Shortcut: Run Flask app with ngrok in one go
from app import app
from pyngrok import ngrok
from IPython.display import display, HTML

# Set ngrok token (only once per session)
ngrok.set_auth_token("33hBxyiHGfdeVJ00VRAMgj868jd_tFaxhNsKwHTPurh7uK83")

# Kill any previous tunnels
ngrok.kill()

# Start new tunnel
public_url = ngrok.connect(5000).public_url

# Show clickable link
print(f"âœ… Resume Matcher App is Live at: {public_url}")
display(HTML(f"<a href='{public_url}' target='_blank'>ðŸ‘‰ Click here to open your Resume Matcher App</a>"))

# Run Flask app
app.run(port=5000)

âœ… Resume Matcher App is Live at: https://untelevised-margot-unendured.ngrok-free.dev


 * Serving Flask app 'app'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [26/Oct/2025 05:39:42] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [26/Oct/2025 05:40:33] "POST /matcher HTTP/1.1" 200 -
