<a href="https://colab.research.google.com/github/21WH1A1243-USHA/Event_management/blob/main/test_candidate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Place this at the top of your notebook in a separate cell
from IPython.display import display, Javascript

def run_all():
    display(Javascript('google.colab.kernel.executeCells([1])'))  # run the 2nd cell (index starts at 0)

run_all()


In [None]:
!pip install pdfplumber
import logging
logging.getLogger("pdfminer").setLevel(logging.ERROR)
import pdfplumber
from google.colab import files
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
import re

# Downloads
nltk.download('punkt')
#nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
import spacy.cli
spacy.cli.download("en_core_web_sm")

# Load SpaCy and Lemmatizer
nlp = spacy.load("en_core_web_sm")
lemmatizer = WordNetLemmatizer()

# Known Skills List
KNOWN_SKILLS = [
    'java', 'python', 'sql', 'html', 'css', 'javascript', 'c', 'c++', 'ruby', 'r',
    'data analysis', 'machine learning', 'deep learning', 'ai', 'data science', 'cybersecurity', 'cloud computing',
    'devops', 'etl', 'software development', 'project management', 'networking', 'database', 'linux', 'windows',
    'cloud', 'docker', 'kubernetes', 'sql server', 'business intelligence', 'docker', 'aws', 'azure', 'git', 'linux'
]

# Extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ''
    return text

# Preprocess text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalpha() and word not in stopwords.words('english')]
    return ' '.join(tokens)

# Extract noun chunks as keywords
def extract_keywords(text):
    doc = nlp(text)
    return list(set([chunk.text.lower() for chunk in doc.noun_chunks if len(chunk.text) > 1]))

# Detect career gaps
def detect_career_gaps(text):
    years = sorted(set(map(int, re.findall(r'\b(19\d{2}|20\d{2})\b', text))))
    gaps = [years[i + 1] - years[i] for i in range(len(years) - 1)]
    return [years[i] for i in range(len(gaps)) if gaps[i] > 1]

# Extract actual skill tokens from text
def extract_skills(text):
    cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
    doc = nlp(cleaned_text)
    tokens = [token.text for token in doc if token.pos_ in ['NOUN', 'PROPN']]
    return list(set([token for token in tokens if token in KNOWN_SKILLS]))

# Upload and process files
print("Upload Resume (PDF)")
resume_file = files.upload()
resume_key = next(iter(resume_file))
resume_text = extract_text_from_pdf(resume_key)

print("Upload Job Description (PDF)")
jd_file = files.upload()
jd_key = next(iter(jd_file))
jd_text = extract_text_from_pdf(jd_key)

# Cosine Similarity Score
resume_processed = preprocess_text(resume_text)
jd_processed = preprocess_text(jd_text)
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform([resume_processed, jd_processed])
similarity = cosine_similarity(vectors[0:1], vectors[1:2])[0][0]
print(f"\nJob Fit Score: {round(similarity, 2)}")

# Skill and keyword extraction
resume_keywords = set(extract_keywords(resume_text))
jd_keywords = set(extract_keywords(jd_text))
matched_keywords = resume_keywords.intersection(jd_keywords)
top_keywords = list(matched_keywords)[:5]
skills_string = ', '.join(top_keywords)

# Detect red flags
career_gaps = detect_career_gaps(resume_text)
if career_gaps:
    print("\nRed Flag Detected: Career gaps found around years:", career_gaps)
else:
    print("\nRed Flag Detection: No significant gaps found.")

# Generate personalized CV
personalized_cv = f"""
\nPersonalized CV:

Dear Hiring Manager,

I am writing to express my keen interest in the position at your organization. My background and experiences align well with your job requirements. I have hands-on experience in {skills_string}, which are highlighted as key areas in your job description.

Throughout my previous roles, I have effectively used {', '.join(top_keywords)} to contribute meaningfully to team projects and organizational goals. These skills have helped me deliver results and handle challenges efficiently.

I am excited about the opportunity to apply my expertise and continue growing in a dynamic work environment. Thank you for considering my application.

Sincerely,
[Your Name]
"""

# Remove matched keywords from CV
cv_cleaned = personalized_cv
for word in top_keywords:
    cv_cleaned = re.sub(r'\b' + re.escape(word) + r'\b[,\s]*', '', cv_cleaned, flags=re.IGNORECASE)

print(cv_cleaned.strip())

# Skill Gap Detection
def clean_text_for_skills(text):
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)
    return text.lower()

def extract_known_skills(text):
    text = clean_text_for_skills(text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stopwords.words('english')]
    return set([word for word in tokens if word in KNOWN_SKILLS])

resume_skills = extract_known_skills(resume_text)
jd_skills = extract_known_skills(jd_text)
missing_skills = jd_skills - resume_skills

if missing_skills:
    print(f"\nBased on the job description, the following skills are missing from the resume: {', '.join(sorted(missing_skills))}.\nIt is recommended to include these skills to better match the job requirements.")
else:
    print("\nNo skill gaps found. The resume covers all skills in the job description.")




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Upload Resume (PDF)


Saving 10554236.pdf to 10554236 (3).pdf
Upload Job Description (PDF)


Saving QiIdOtq6tp.pdf to QiIdOtq6tp (3).pdf

Job Fit Score: 0.25

Red Flag Detected: Career gaps found around years: [1995, 2000, 2005]
Personalized CV:

Dear Hiring Manager,

I am writing to express my keen interest in the position at your organization. My background and experiences align well with your job requirements. I have hands-on experience in which are highlighted as key areas in your job description.

Throughout my previous roles, I have effectively used to contribute meaningfully to team projects and organizational goals. These skills have helped me deliver results and handle challenges efficiently.

I am excited about the opportunity to apply my expertise and continue growing in a dynamic work environment. Thank you for considering my application.

Sincerely,
[Your Name]

No skill gaps found. The resume covers all skills in the job description.
