In [6]:
!pip install pandas numpy scikit-learn sentence-transformers nltk spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
import pandas as pd
import numpy as np
import re
import nltk
import spacy
from sentence_transformers import SentenceTransformer, util

nltk.download('punkt')
nlp = spacy.load("en_core_web_sm")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [7]:
df = pd.read_csv("/content/Resume.csv")
df.head()


Unnamed: 0,Category,Resume
0,Data Science,Skills * Programming Languages: Python (pandas...
1,Data Science,Education Details \r\nMay 2013 to May 2017 B.E...
2,Data Science,"Areas of Interest Deep Learning, Control Syste..."
3,Data Science,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...
4,Data Science,"Education Details \r\n MCA YMCAUST, Faridab..."


In [8]:
def clean_text(text):
    if type(text) != str:
        return ""
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^A-Za-z0-9,. ]+', '', text)
    return text.lower()

df['clean_resume'] = df['Resume'].apply(clean_text)
df.head()


Unnamed: 0,Category,Resume,clean_resume
0,Data Science,Skills * Programming Languages: Python (pandas...,"skills programming languages python pandas, n..."
1,Data Science,Education Details \r\nMay 2013 to May 2017 B.E...,education details may 2013 to may 2017 b.e uit...
2,Data Science,"Areas of Interest Deep Learning, Control Syste...","areas of interest deep learning, control syste..."
3,Data Science,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...,skills r python sap hana tableau sap hana...
4,Data Science,"Education Details \r\n MCA YMCAUST, Faridab...","education details mca ymcaust, faridabad, hary..."


In [10]:
skills_list = [
    "python","java","sql","nlp","machine learning","deep learning",
    "pandas","numpy","tensorflow","pytorch","docker","kubernetes",
    "power bi","tableau","communication","leadership","excel",
    "data analysis","statistics"
]

def extract_skills(text):
    extracted = []
    for skill in skills_list:
        if skill in text:
            extracted.append(skill)
    return extracted

df['skills'] = df['clean_resume'].apply(extract_skills)
df[['Resume','skills']].head()


Unnamed: 0,Resume,skills
0,Skills * Programming Languages: Python (pandas...,"[python, java, sql, machine learning, deep lea..."
1,Education Details \r\nMay 2013 to May 2017 B.E...,"[python, machine learning]"
2,"Areas of Interest Deep Learning, Control Syste...","[python, java, sql, machine learning, deep lea..."
3,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...,"[python, sql, nlp, machine learning, deep lear..."
4,"Education Details \r\n MCA YMCAUST, Faridab...","[python, java, data analysis]"


In [11]:
job_description = """
We are hiring a Data Scientist with strong Python, Machine Learning, NLP,
Data Analysis, SQL, statistics, and cloud experience.
"""


In [13]:
model = SentenceTransformer("all-MiniLM-L6-v2")

resume_embeddings = model.encode(df['clean_resume'].tolist(), convert_to_tensor=True)
jd_embedding = model.encode(job_description, convert_to_tensor=True)

similarities = util.cos_sim(jd_embedding, resume_embeddings)[0].cpu().numpy()
df['similarity'] = similarities
df[['Resume', 'similarity']].head()


Unnamed: 0,Resume,similarity
0,Skills * Programming Languages: Python (pandas...,0.589142
1,Education Details \r\nMay 2013 to May 2017 B.E...,0.338514
2,"Areas of Interest Deep Learning, Control Syste...",0.473004
3,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...,0.386179
4,"Education Details \r\n MCA YMCAUST, Faridab...",0.441133


In [14]:
def compute_score(row):
    skill_score = len(row['skills']) / len(skills_list)
    sim_score = row['similarity']
    final = (0.5 * sim_score) + (0.5 * skill_score)
    return final

df['final_score'] = df.apply(compute_score, axis=1)


In [16]:
top_candidates = df.sort_values("final_score", ascending=False).head(10)
top_candidates[['Resume', 'skills', 'similarity', 'final_score']]


Unnamed: 0,Resume,skills,similarity,final_score
0,Skills * Programming Languages: Python (pandas...,"[python, java, sql, machine learning, deep lea...",0.589142,0.531413
10,Skills * Programming Languages: Python (pandas...,"[python, java, sql, machine learning, deep lea...",0.589142,0.531413
20,Skills * Programming Languages: Python (pandas...,"[python, java, sql, machine learning, deep lea...",0.589142,0.531413
30,Skills * Programming Languages: Python (pandas...,"[python, java, sql, machine learning, deep lea...",0.589142,0.531413
37,Education Details \r\n B.Tech Rayat and Bahr...,"[python, java, sql, nlp, machine learning, dee...",0.526536,0.526426
7,Education Details \r\n B.Tech Rayat and Bahr...,"[python, java, sql, nlp, machine learning, dee...",0.526536,0.526426
17,Education Details \r\n B.Tech Rayat and Bahr...,"[python, java, sql, nlp, machine learning, dee...",0.526536,0.526426
27,Education Details \r\n B.Tech Rayat and Bahr...,"[python, java, sql, nlp, machine learning, dee...",0.526536,0.526426
8,Personal Skills â¢ Ability to quickly grasp t...,"[python, sql, nlp, machine learning, deep lear...",0.546006,0.457213
18,Personal Skills â¢ Ability to quickly grasp t...,"[python, sql, nlp, machine learning, deep lear...",0.546006,0.457213


In [18]:
!pip install streamlit pyngrok docx2txt pymupdf sentence-transformers


Collecting streamlit
  Downloading streamlit-1.52.1-py3-none-any.whl.metadata (9.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting docx2txt
  Downloading docx2txt-0.9-py3-none-any.whl.metadata (529 bytes)
Collecting pymupdf
  Downloading pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.1-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading docx2txt-0.9-py3-none-any.whl (4.0 kB)
Downloading pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 

In [20]:
!ngrok config add-authtoken "36i5fDaaLkJ0ibTssEKs8cCb5wF_5RnyxQ62ifNeDN25jiA9G"


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [21]:
from pyngrok import ngrok

# Kill any old tunnels
ngrok.kill()

# Start a new ngrok tunnel
public_url = ngrok.connect(8501)
print("Your Streamlit App URL:", public_url)

# Run the app
!streamlit run resume_app.py & sleep 3


Your Streamlit App URL: NgrokTunnel: "https://prelawfully-lifeful-ivelisse.ngrok-free.dev" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.23.183.201:8501[0m
[0m
[34m  Stopping...[0m
