In [25]:
!pip install PyPDF2 nltk scikit-learn




In [26]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [27]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

resume_text = extract_text_from_pdf('/content/Mahmud Hasan Shanto_FullStack.pdf')
print(resume_text[:1000])


 
Education
 Bachelor ofScience in Computer Science & Engineering (BSCSE)
United International University, Dhaka
Expected Graduation: March 2026
CGPA: 3.08/4.00
SkillsExperience
Personal ProjectsBSc in Computer Science & Engineering
United International University, Dhaka, Bangladesh
Final Year Design Project – LLMs + Knowledge Graph (BioMamba S6)
UIU, Dhaka | June 2025 – Present
Developed a biomedical AI model with advanced reasoning by combining Mamba S6 and a custom Knowledge Graph,
reducing hallucinations and improving medical answer quality.
Full-Stack Developer – Laravel & React Projects
• Developed book review and job portal systems using Laravel (backend, DB design, REST APIs)
• Currently building an e-learning platform with Laravel and React, including frontend integration and dynamic UI
DevOps Training (Ongoing)
Learning DevOps fundamentals with hands-on work in Linux, Git, Docker, CI/CD, Kubernetes, and Cloud deployments.
Blockchain Project – Secure E-Health Records
• Built a

In [28]:
import re
from nltk.corpus import stopwords


In [29]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return ' '.join(words)
clean_resume = clean_text(resume_text)
print(clean_resume[:1000])


education bachelor ofscience computer science engineering bscse united international university dhaka expected graduation march cgpa skillsexperience personal projectsbsc computer science engineering united international university dhaka bangladesh final year design project llms knowledge graph biomamba uiu dhaka june present developed biomedical ai model advanced reasoning combining mamba custom knowledge graph reducing hallucinations improving medical answer quality full stack developer laravel react projects developed book review job portal systems using laravel backend db design rest apis currently building e learning platform laravel react including frontend integration dynamic ui devops training ongoing learning devops fundamentals hands work linux git docker ci cd kubernetes cloud deployments blockchain project secure e health records built tested smart contracts ethereum ganache metamask truffle integrated web react python medical record operations compared aes rsa ecc homomorp

In [30]:
skill_set = [
    'python', 'java', 'c++', 'c', 'javascript',
    'machine learning', 'deep learning', 'nlp',
    'data science', 'sql', 'mysql',
    'html', 'css', 'react', 'node',
    'tensorflow', 'keras', 'pytorch',
    'git', 'github', 'linux',
    'docker', 'aws'
]


In [31]:
def extract_skills(text, skill_list):
    found_skills = set()
    for skill in skill_list:
        if skill in text:
            found_skills.add(skill)
    return found_skills

resume_skills = extract_skills(clean_resume, skill_set)
print("Skills found in resume:", resume_skills)


Skills found in resume: {'linux', 'github', 'c', 'sql', 'css', 'mysql', 'docker', 'git', 'html', 'java', 'javascript', 'react', 'python'}


In [32]:
## This is where automation is needed or a website link for the post that they are hiring for or posted for jobs. It's being done manually for now.
## Changing the job description to your own industry requirements.
job_description = """
We are looking for a Machine Learning Engineer with experience in Python,
Machine Learning, NLP, SQL, Docker, and AWS.
"""

clean_job = clean_text(job_description)
job_skills = extract_skills(clean_job, skill_set)

print("Required job skills:", job_skills)


Required job skills: {'c', 'sql', 'docker', 'aws', 'machine learning', 'nlp', 'python'}


In [33]:
##Compare Resume vs Job Skills
matched_skills = resume_skills.intersection(job_skills)
missing_skills = job_skills - resume_skills

print("Matched Skills:", matched_skills)
print("Missing Skills:", missing_skills)


Matched Skills: {'python', 'sql', 'c', 'docker'}
Missing Skills: {'nlp', 'machine learning', 'aws'}


In [34]:
#------------------------------I------M--------P------O---------R-------------------------------------
##technique used in industry-standard.------------------------------------kk--------------------
#------------------------------T-----------A---------------N----------T-------------------------------

In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [36]:
##Prepare Clean Texts
documents = [clean_resume, clean_job]


##Apply TF-IDF Vectorization
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(documents)


##Calculate Cosine Similarity
similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
match_percentage = similarity_score[0][0] * 100

print(f"Resume Match Percentage: {match_percentage:.2f}%")


Resume Match Percentage: 5.98%


In [37]:
##Combine with Skill Matching
print("Matched Skills:", matched_skills)
print("Missing Skills:", missing_skills)


Matched Skills: {'python', 'sql', 'c', 'docker'}
Missing Skills: {'nlp', 'machine learning', 'aws'}


In [38]:
#------------------------------I------M--------P------O---------R-------------------------------------
## The real game is here. now new and improved logic with REGEX
## Helps to Get All Skills and perfect score mag
#------------------------------T-----------A---------------N----------T-------------------------------

In [39]:
#Improved Skill Dictionary
skill_set = [
    'python', 'java', 'c\\+\\+', 'c', 'javascript',
    'machine learning', 'deep learning', 'natural language processing', 'nlp',
    'data science', 'data analysis',
    'sql', 'mysql', 'postgresql',
    'html', 'css', 'react', 'node',
    'tensorflow', 'keras', 'pytorch',
    'git', 'github', 'linux',
    'docker', 'aws'
]


In [40]:
#Advanced Skill Extraction Using Regex

def extract_skills_advanced(text, skills):
    found = set()
    for skill in skills:
        pattern = r'\b' + skill + r'\b'
        if re.search(pattern, text):
            found.add(skill)
    return found


In [41]:
resume_skills = extract_skills_advanced(clean_resume, skill_set)
job_skills = extract_skills_advanced(clean_job, skill_set)

print("Resume Skills:", resume_skills)
print("Job Skills:", job_skills)


Resume Skills: {'linux', 'github', 'c', 'css', 'mysql', 'docker', 'git', 'html', 'java', 'javascript', 'react', 'python'}
Job Skills: {'sql', 'docker', 'aws', 'machine learning', 'nlp', 'python'}


In [42]:
#Improved Skill Comparison
matched_skills = resume_skills & job_skills
missing_skills = job_skills - resume_skills

print("Matched Skills:", matched_skills)
print("Missing Skills:", missing_skills)


Matched Skills: {'python', 'docker'}
Missing Skills: {'nlp', 'sql', 'aws', 'machine learning'}


In [43]:
print("===== ATS RESULT =====")
print(f"Match Score: {match_percentage:.2f}%")
print(f"Matched Skills ({len(matched_skills)}):", ', '.join(matched_skills))
print(f"Missing Skills ({len(missing_skills)}):", ', '.join(missing_skills))


===== ATS RESULT =====
Match Score: 5.98%
Matched Skills (2): python, docker
Missing Skills (4): nlp, sql, aws, machine learning


In [44]:
#Resume Improvement Suggestions
if missing_skills:
    print("\n Resume Improvement Suggestions:")
    for skill in missing_skills:
        print(f"- Consider adding experience or projects related to: {skill}")
else:
    print("\n✅ Your resume matches all required skills!")



 Resume Improvement Suggestions:
- Consider adding experience or projects related to: nlp
- Consider adding experience or projects related to: sql
- Consider adding experience or projects related to: aws
- Consider adding experience or projects related to: machine learning


In [45]:
# next lvl

In [60]:
from ats_engine import calculate_ats_score


In [61]:
result = calculate_ats_score(
    resume_text,
    job_description,
    skill_set
)

print("========= ATS FINAL REPORT =========")
print(f"Semantic Match (TF-IDF): {result['semantic_score']}%")
print(f"Skill Match Score: {result['skill_score']}%")
print(f"FINAL ATS SCORE: {result['final_score']}%\n")

print(f"Matched Skills ({len(result['matched_skills'])}):")
print(", ".join(result['matched_skills']))

print(f"\nMissing Skills ({len(result['missing_skills'])}):")
print(", ".join(result['missing_skills']))


Semantic Match (TF-IDF): 5.98%
Skill Match Score: 33.33%
FINAL ATS SCORE: 14.19%

Matched Skills (2):
python, docker

Missing Skills (4):
nlp, sql, aws, machine learning


In [62]:
from ats_engine import extract_text_from_pdf, calculate_ats_score
import os


In [53]:
#Load & Score All Resumes
resume_folder = "/content"

results = []

for file in os.listdir(resume_folder):
    if file.endswith(".pdf"):
        with open(os.path.join(resume_folder, file), "rb") as f:
            resume_text = extract_text_from_pdf(f)
            score = calculate_ats_score(
                resume_text,
                job_description,
                skill_set
            )
            score["candidate"] = file
            results.append(score)


In [63]:
#Rank Candidates
ranked_candidates = sorted(
    results,
    key=lambda x: x["final_score"],
    reverse=True
)


In [64]:
#
print("======= CANDIDATE RANKING =======")

for i, candidate in enumerate(ranked_candidates, start=1):
    print(f"\nRank {i}: {candidate['candidate']}")
    print(f"Final ATS Score: {candidate['final_score']}%")
    print(f"Matched Skills: {', '.join(candidate['matched_skills'])}")



Rank 1: Mahmud Hasan Shanto_FullStack.pdf
Final ATS Score: 14.19%
Matched Skills: python, docker

Rank 2: Resume.pdf
Final ATS Score: 3.6%
Matched Skills: 


In [56]:
#Shortlist Top N Candidates
TOP_N = 2

shortlisted = ranked_candidates[:TOP_N]

print("\n======= SHORTLISTED CANDIDATES =======")

for c in shortlisted:
    print(f"- {c['candidate']} ({c['final_score']}%)")



- Mahmud Hasan Shanto_FullStack.pdf (14.19%)
- Resume.pdf (3.6%)


In [57]:
# for a web app
!pip install streamlit pyngrok


Collecting streamlit
  Downloading streamlit-1.52.2-py3-none-any.whl.metadata (9.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.2-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.5.0 streamlit-1.52.2


In [58]:
!streamlit run app.py &>/content/logs.txt &


In [None]:
from pyngrok import ngrok
public_url = ngrok.connect(8501)
public_url