In [1]:
!pip install Flask Flask-CORS

Collecting Flask
  Downloading https://files.pythonhosted.org/packages/9f/1a/8b6d48162861009d1e017a9740431c78d860809773b66cac220a11aa3310/Flask-2.2.5-py3-none-any.whl (101kB)
Collecting Flask-CORS
  Downloading https://files.pythonhosted.org/packages/8b/52/2aa6285f104616f73ee1ad7905a16b2b35af0143034ad0cf7b64bcba715c/Flask_Cors-4.0.1-py2.py3-none-any.whl
Collecting click>=8.0 (from Flask)
  Using cached https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl
Collecting itsdangerous>=2.0 (from Flask)
  Downloading https://files.pythonhosted.org/packages/68/5f/447e04e828f47465eeab35b5d408b7ebaaaee207f48b7136c5a7267a30ae/itsdangerous-2.1.2-py3-none-any.whl
Installing collected packages: click, itsdangerous, Flask, Flask-CORS
Successfully installed Flask-2.2.5 Flask-CORS-4.0.1 click-8.1.7 itsdangerous-2.1.2


In [1]:
import spacy
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


app = Flask(__name__)
CORS(app)


nlp = spacy.load('en_core_web_lg')


def extract_info(resume_text):
    doc = nlp(resume_text)
    skills = []
    education = []
    experience = []

 
    for ent in doc.ents:
        if ent.label_ == 'ORG' and 'university' in ent.text.lower():
            education.append(ent.text)  
        elif ent.label_ in ['WORK_OF_ART', 'PRODUCT']:
            skills.append(ent.text)
        elif ent.label_ == 'DATE':
            experience.append(ent.text)


    skill_pattern = re.compile(r'\b(Python|JavaScript|React|Node\.js|Java|C\+\+|SQL|Machine Learning|AI|Django|Flask|HTML|CSS|jQuery|Angular|Vue\.js|Git|Linux)\b', re.IGNORECASE)
    skills.extend(skill_pattern.findall(resume_text))

  
    education_pattern = re.compile(r'\b(B\.S\.|M\.S\.|Ph\.D\.|Bachelor|Master|Doctorate)\b.*?\b(in|from)?\b.*?\b(?:[A-Z][a-z]*\b[ ]?){1,3}(University|College|Institute)', re.IGNORECASE)
    education.extend(education_pattern.findall(resume_text))

    experience_pattern = re.compile(r'(\d+ years?|months?|\d+-\d+|\d{4}-\d{4})')
    experience.extend(experience_pattern.findall(resume_text))


    skills = list(set(skills))
    education = list(set([' '.join(edu).strip() for edu in education]))
    experience = list(set(experience))

    return {
        'skills': skills,
        'education': education,
        'experience': experience
    }


@app.route('/parse_resume', methods=['POST'])
def parse_resume():
    resume_text = request.json.get('text')
    parsed_data = extract_info(resume_text)
    return jsonify(parsed_data)


def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return ' '.join(tokens)


def extract_features(text):
    doc = nlp(text)
    features = []
    
    for ent in doc.ents:
        if ent.label_ in ['SKILL', 'ORG', 'DATE']:  
            features.append(ent.text.lower())
    
    
    skill_pattern = re.compile(r'\b(python|javascript|react|node\.js|java|c\+\+|sql|machine learning|ai|django|flask|html|css|jquery|angular|vue\.js|git|linux)\b', re.IGNORECASE)
    features.extend(skill_pattern.findall(text))
    
    education_pattern = re.compile(r'\b(b\.s\.|m\.s\.|ph\.d\.|bachelor|master|doctorate)\b.*?\b(in|from)?\b.*?\b(?:[a-z][a-z]*\b[ ]?){1,3}(university|college|institute)?\b', re.IGNORECASE)
    features.extend(education_pattern.findall(text))
    
    return ' '.join(set(features))

def match_job(resume_data, job_descriptions):
    resume_text = ' '.join(resume_data['skills'] + resume_data['education'] + resume_data['experience'])
    resume_features = extract_features(resume_text)
    
    processed_jobs = [extract_features(job) for job in job_descriptions]
    
    vectorizer = TfidfVectorizer().fit_transform([resume_features] + processed_jobs)
    vectors = vectorizer.toarray()
    
    cosine_similarities = cosine_similarity(vectors[0:1], vectors[1:]).flatten()
    return cosine_similarities.tolist()


@app.route('/match_job', methods=['POST'])
def match_job_route():
    resume_data = request.json.get('resumeData')
    job_descriptions = request.json.get('jobDescriptions')
    matches = match_job(resume_data, job_descriptions)
    return jsonify(matches)


def run_flask():
    app.run(host='0.0.0.0', port=5000)

thread = threading.Thread(target=run_flask)
thread.start()


  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype=np.int):


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://11.6.95.81:5000
Press CTRL+C to quit
127.0.0.1 - - [15/Jul/2024 12:11:14] "OPTIONS /parse_resume HTTP/1.1" 200 -
127.0.0.1 - - [15/Jul/2024 12:11:14] "POST /parse_resume HTTP/1.1" 200 -
127.0.0.1 - - [15/Jul/2024 12:11:14] "POST /parse_resume HTTP/1.1" 200 -
127.0.0.1 - - [15/Jul/2024 12:11:14] "OPTIONS /match_job HTTP/1.1" 200 -
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
127.0.0.1 - - [15/Jul/2024 12:11:14] "POST /match_job HTTP/1.1" 200 -
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
127.0.0.1 - - [15/Jul/2024 12:11:15] "POST /match_job HTTP/1.1" 200 -
