In [1]:
!apt-get install tesseract-ocr
!pip install pytesseract pillow nltk scikit-learn

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [2]:
import pytesseract
from PIL import Image
import nltk
import re

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [4]:
from google.colab import files

uploaded = files.upload()
image_path = list(uploaded.keys())[0]

Saving Simple-Fresher-Resume-Template1.jpg to Simple-Fresher-Resume-Template1.jpg


In [5]:
def extract_text_from_image(image_path):
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img)
    return text

In [6]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\W+', ' ', text)
    text = ' '.join([word for word in text.split()
                     if word not in stopwords.words('english')])
    return text

In [7]:
def resume_score(resume, job_desc):
    documents = [resume, job_desc]

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(documents)

    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

    return round(similarity[0][0] * 100, 2)

In [8]:
def missing_keywords(resume, job_desc):
    resume_words = set(resume.split())
    job_words = set(job_desc.split())

    return list(job_words - resume_words)[:15]

In [9]:
resume_text = extract_text_from_image(image_path)
resume_clean = clean_text(resume_text)

print("‚úÖ Resume extracted from image successfully!")

‚úÖ Resume extracted from image successfully!


In [10]:
job_description = """
Looking for a Python developer with experience in machine learning,
data analysis, NLP, scikit-learn, and resume parsing.
"""

job_clean = clean_text(job_description)

In [11]:
score = resume_score(resume_clean, job_clean)
missing = missing_keywords(resume_clean, job_clean)

print("üìä ATS Resume Score:", score, "%")

print("\n‚ùå Missing Skills / Keywords:")
for word in missing:
    print("-", word)

if score >= 75:
    print("\n‚úÖ Resume is highly suitable for this job.")
elif score >= 50:
    print("\n‚ö†Ô∏è Resume needs some improvements.")
else:
    print("\n‚ùå Resume is not suitable for this job role.")

üìä ATS Resume Score: 1.14 %

‚ùå Missing Skills / Keywords:
- learning
- resume
- nlp
- machine
- parsing
- python
- data
- scikit
- developer
- analysis
- experience
- looking

‚ùå Resume is not suitable for this job role.
