In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m54.3 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [2]:
import spacy
from spacy.matcher import Matcher

# Load the English NLP model
nlp = spacy.load('en_core_web_sm')

# Resume text (abbreviated for brevity in this example)
resume_text = """
Personal Information
Name: Aisha Al-Farsi
Gender: Female
Nationality: Saudi Arabian
Contact Information:
Email: aisha.alfarsi@example.com
Phone: +966 500 123 456
LinkedIn: linkedin.com/in/aishaalfarsi
Education
Bachelor of Science in Computer Science, King Saud University, Riyadh, Saudi Arabia, 2022
Capstone Project: "Implementing an Arabic Language Chatbot Using GPT-3": Developed a chatbot capable of understanding and responding in Arabic, utilizing OpenAI's GPT-3 for advanced language processing.
Skills
Technical Skills:
Natural Language Processing: Basic knowledge of NLP principles and applications, including text classification, sentiment analysis, and named entity recognition.
...
Internship Experience
NLP Developer Intern, Riyadh Tech Solutions, Riyadh, Saudi Arabia, Summer 2021
...
Certifications
Python for Data Science and AI, Coursera, 2022
Languages
Arabic: Native
English: Fluent
"""

# Process the resume text with spaCy
doc = nlp(resume_text)

# Dictionary to store the resume details
resume_details = {
    "Name": "",
    "Gender": "",
    "Nationality": "",
    "Email": "",
    "Phone Number": "",
    "Skills": [],
    "Total Experiences": 0,
    "Colleges": [],
    "Degrees": [],
    "Designations": [],
    "Last Company Name": "",
    "CV Names": "Aisha Al-Farsi's Resume"
}

# Define patterns for Matcher to find complex entities like skills
matcher = Matcher(nlp.vocab)
matcher.add("Phone", [[{"SHAPE": "ddd"}, {"ORTH": "500"}, {"LENGTH": 3}, {"LENGTH": 3}]])
matcher.add("Email", [[{"TEXT": {"REGEX": r"\S+@\S+\.\S+"}}]])

# Extract entities using spaCy's built-in NER and Matcher
for ent in doc.ents:
    if ent.label_ == "PERSON" and "Name" not in resume_details:
        resume_details["Name"] = ent.text
    elif ent.label_ == "NORP" and "Nationality" not in resume_details:
        resume_details["Nationality"] = ent.text
    elif ent.label_ == "ORG" and "Degree" in ent.text:
        resume_details["Degrees"].append(ent.text)
    elif ent.label_ == "ORG" and "Company" in ent.text:
        resume_details["Last Company Name"] = ent.text

matches = matcher(doc)
for match_id, start, end in matches:
    span = doc[start:end]  # Matched span
    if nlp.vocab.strings[match_id] == "Email":
        resume_details["Email"] = span.text
    elif nlp.vocab.strings[match_id] == "Phone":
        resume_details["Phone Number"] = span.text

# Assume all ORG entities post the education section are companies or designations
for ent in doc.ents:
    if ent.label_ == "ORG" and "University" in ent.text:
        resume_details["Colleges"].append(ent.text)
    elif ent.label_ == "ORG" and ent.start > doc.ents[0].start:
        resume_details["Designations"].append(ent.text)

# Assuming one main internship or job experience listed
resume_details["Total Experiences"] = 1

# Print extracted details
for key, value in resume_details.items():
    print(f"{key}: {value}")


Name: 
Gender: 
Nationality: 
Email: aisha.alfarsi@example.com
Phone Number: 
Skills: []
Total Experiences: 1
Colleges: []
Degrees: []
Designations: ['Bachelor of Science in Computer Science', 'Capstone Project', 'GPT-3', 'Skills\nTechnical Skills', 'NLP', 'NLP Developer Intern', 'Riyadh Tech Solutions', 'AI']
Last Company Name: 
CV Names: Aisha Al-Farsi's Resume


In [None]:
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# The text to be processed
text = """Personal Information
Name: Aisha Al-Farsi
Gender: Female
Nationality: Saudi Arabian
Contact Information:
Email: aisha.alfarsi@example.com
Phone: +966 500 123 456
LinkedIn: linkedin.com/in/aishaalfarsi
Education
Bachelor of Science in Computer Science, King Saud University, Riyadh, Saudi Arabia, 2022
Capstone Project: "Implementing an Arabic Language Chatbot Using GPT-3": Developed a chatbot capable of understanding and responding in Arabic, utilizing OpenAI's GPT-3 for advanced language processing.
Skills
Technical Skills:
Natural Language Processing: Basic knowledge of NLP principles and applications, including text classification, sentiment analysis, and named entity recognition.
Programming Languages: Proficient in Python, with experience using NLP libraries such as NLTK, spaCy, and TensorFlow for developing AI models.
Machine Learning: Understanding of machine learning concepts and experience applying them to NLP tasks. Familiarity with deep learning frameworks like PyTorch.
Tools & Technologies: Experience with Jupyter Notebooks, Git for version control, and basic knowledge of cloud platforms like AWS for deploying NLP models.
Soft Skills:
Analytical Thinking: Able to approach complex problems logically and apply data-driven solutions.
Quick Learner: Demonstrated ability to quickly grasp new technologies and concepts in the fast-evolving AI and NLP landscape.
Collaboration: Effective team player with experience working on group projects and eager to contribute in a team setting.
Internship Experience
NLP Developer Intern, Riyadh Tech Solutions, Riyadh, Saudi Arabia, Summer 2021
Responsibilities:
Assisted in the development of NLP applications, focusing on Arabic language processing.
Contributed to research and development efforts aimed at improving chatbot interactions and user experience.
Participated in team meetings and contributed ideas for new features and improvements.
Achievements:
Contributed to the development of an Arabic language processing tool that increased the chatbot’s understanding accuracy by 20%.
Projects
Arabic Sentiment Analysis Tool (University Project)
Developed a Python-based tool for sentiment analysis on Arabic social media posts, utilizing machine learning algorithms to classify sentiments as positive, negative, or neutral.
Certifications
Python for Data Science and AI, Coursera, 2022
Languages
Arabic: Native
English: Fluent"""

# Process the text with spaCy
doc = nlp(text)

# Find entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

# You can also use custom patterns with Matcher for more specific tasks like finding the degree
matcher = Matcher(nlp.vocab)
pattern = [{"LOWER": "bachelor"}, {"LOWER": "of"}, {"LOWER": "science"}, {"LOWER": "in"}, {"LOWER": "computer"}, {"LOWER": "science"}]
matcher.add("DegreePattern", [pattern])

matches = matcher(doc)
for match_id, start, end in matches:
    span = doc[start:end]
    print("Degree found:", span.text)


Aisha Al-Farsi
Gender: Female
Nationality PERSON
Saudi Arabian NORP
500 123 CARDINAL
LinkedIn GPE
Bachelor of Science in Computer Science ORG
Riyadh GPE
Saudi Arabia GPE
2022 DATE
Capstone Project ORG
Arabic LANGUAGE
OpenAI GPE
GPT-3 ORG
Skills
Technical Skills ORG
Natural Language Processing: Basic knowledge of WORK_OF_ART
NLP ORG
NLP ORG
NLTK ORG
TensorFlow PRODUCT
AI ORG
Machine Learning PERSON
NLP ORG
PyTorch ORG
Tools & Technologies ORG
Jupyter Notebooks PERSON
Git GPE
AWS ORG
NLP ORG
Soft Skills PERSON
Analytical Thinking: Able WORK_OF_ART
Quick Learner PERSON
AI ORG
NLP ORG
Collaboration: Effective team WORK_OF_ART
NLP Developer Intern ORG
Riyadh Tech Solutions ORG
Riyadh GPE
Saudi Arabia GPE
Summer 2021 DATE
NLP ORG
Arabic LANGUAGE
Arabic LANGUAGE
20% PERCENT
Arabic NORP
AI ORG
Coursera GPE
2022 DATE
Arabic NORP
English LANGUAGE
Degree found: Bachelor of Science in Computer Science


In [None]:
import spacy
from spacy.matcher import Matcher

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Your input text
text = """
Personal Information
Name: Aisha Al-Farsi
Gender: Female
Nationality: Saudi Arabian
Contact Information:
Email: aisha.alfarsi@example.com
Phone: +966 500 123 456
LinkedIn: linkedin.com/in/aishaalfarsi
Education
Bachelor of Science in Computer Science, King Saud University, Riyadh, Saudi Arabia, 2022
Capstone Project: "Implementing an Arabic Language Chatbot Using GPT-3": Developed a chatbot capable of understanding and responding in Arabic, utilizing OpenAI's GPT-3 for advanced language processing.
Skills
Technical Skills:
Natural Language Processing: Basic knowledge of NLP principles and applications, including text classification, sentiment analysis, and named entity recognition.
Programming Languages: Proficient in Python, with experience using NLP libraries such as NLTK, spaCy, and TensorFlow for developing AI models.
Machine Learning: Understanding of machine learning concepts and experience applying them to NLP tasks. Familiarity with deep learning frameworks like PyTorch.
Tools & Technologies: Experience with Jupyter Notebooks, Git for version control, and basic knowledge of cloud platforms like AWS for deploying NLP models.
Soft Skills:
Analytical Thinking: Able to approach complex problems logically and apply data-driven solutions.
Quick Learner: Demonstrated ability to quickly grasp new technologies and concepts in the fast-evolving AI and NLP landscape.
Collaboration: Effective team player with experience working on group projects and eager to contribute in a team setting.
Internship Experience
NLP Developer Intern, Riyadh Tech Solutions, Riyadh, Saudi Arabia, Summer 2021
Responsibilities:
Assisted in the development of NLP applications, focusing on Arabic language processing.
Contributed to research and development efforts aimed at improving chatbot interactions and user experience.
Participated in team meetings and contributed ideas for new features and improvements.
Achievements:
Contributed to the development of an Arabic language processing tool that increased the chatbot’s understanding accuracy by 20%.
Projects
Arabic Sentiment Analysis Tool (University Project)
Developed a Python-based tool for sentiment analysis on Arabic social media posts, utilizing machine learning algorithms to classify sentiments as positive, negative, or neutral.
Certifications
Python for Data Science and AI, Coursera, 2022
Languages
Arabic: Native
English: Fluent
"""

# Process the text with spaCy
doc = nlp(text)

def extract_contact_details(doc):
    email = None
    phone = None
    for token in doc:
        if token.like_email:
            email = token.text
        if token.like_num and len(token.text) >= 10:
            phone = token.text
    return email, phone

def extract_skills(doc):
    skills = {'Technical Skills': [], 'Soft Skills': []}
    current_section = None
    for sent in doc.sents:
        if 'Technical Skills' in sent.text:
            current_section = 'Technical Skills'
        elif 'Soft Skills' in sent.text:
            current_section = 'Soft Skills'
        elif current_section and sent.text.strip() != '':
            skills[current_section].append(sent.text.strip())
    return skills

def extract_experience(doc):
    for ent in doc.ents:
        if ent.label_ == "ORG" and 'Tech' in ent.text:
            return ent.text
    return "Not Found"

def extract_education(doc):
    schools = []
    degrees = []
    for ent in doc.ents:
        if ent.label_ == "ORG" and 'University' in ent.text:
            schools.append(ent.text)
        if 'Bachelor' in ent.text:
            degrees.append(ent.text)
    return schools, degrees

email, phone = extract_contact_details(doc)
skills = extract_skills(doc)
last_company_name = extract_experience(doc)
schools, degrees = extract_education(doc)

# Printing the structured data
print(f"Candidate Name: Aisha Al-Farsi")
print(f"Gender: Female")
print(f"Nationality: Saudi Arabian")
print(f"Email: {email}")
print(f"Mobile Numbers: {phone}")
print(f"Skills: {skills}")
print(f"Total Number of Experiences: 1 (NLP Developer Intern)")
print(f"College Names: {schools}")
print(f"Degrees: {degrees}")
print(f"Designations: NLP Developer Intern")
print(f"Last Company Names: {last_company_name}")
print(f"CV Names: Not explicitly listed, but inferred as Aisha Al-Farsi's Resume/CV")


Candidate Name: Aisha Al-Farsi
Gender: Female
Nationality: Saudi Arabian
Email: aisha.alfarsi@example.com
Mobile Numbers: None
Skills: {'Technical Skills': ['Programming Languages: Proficient in Python, with experience using NLP libraries such as NLTK, spaCy, and TensorFlow for developing AI models.', 'Machine Learning: Understanding of machine learning concepts and experience applying them to NLP tasks.', 'Familiarity with deep learning frameworks like PyTorch.\nTools & Technologies: Experience with Jupyter Notebooks, Git for version control, and basic knowledge of cloud platforms like AWS for deploying NLP models.'], 'Soft Skills': ['Quick Learner: Demonstrated ability to quickly grasp new technologies and concepts in the fast-evolving AI and NLP landscape.', 'Collaboration: Effective team player with experience working on group projects and eager to contribute in a team setting.', 'Internship Experience\nNLP Developer Intern, Riyadh Tech Solutions, Riyadh, Saudi Arabia, Summer 2021\

In [None]:
import spacy
from spacy.language import Language
from spacy.tokens import Span, Doc

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Custom pipeline component to segment on line breaks, which helps in parsing semi-structured text
@Language.component("set_custom_boundaries")
def set_custom_boundaries(doc):
    for token in doc[:-1]:
        if token.text in {":", ".", ",", "-"}:
            doc[token.i + 1].is_sent_start = True
    return doc

# Add the component to the pipeline before 'parser'
nlp.add_pipe("set_custom_boundaries", before="parser")

# The skills text you provided
skills_text = """
Technical Skills:
Natural Language Processing: Basic knowledge of NLP principles and applications, including text classification, sentiment analysis, and named entity recognition.
Programming Languages: Proficient in Python, with experience using NLP libraries such as NLTK, spaCy, and TensorFlow for developing AI models.
Machine Learning: Understanding of machine learning concepts and experience applying them to NLP tasks. Familiarity with deep learning frameworks like PyTorch.
Tools & Technologies: Experience with Jupyter Notebooks, Git for version control, and basic knowledge of cloud platforms like AWS for deploying NLP models.
Soft Skills:
Analytical Thinking: Able to approach complex problems logically and apply data-driven solutions.
Quick Learner: Demonstrated ability to quickly grasp new technologies and concepts in the fast-evolving AI and NLP landscape.
Collaboration: Effective team player with experience working on group projects and eager to contribute in a team setting.
"""

# Process the text with spaCy
doc = nlp(skills_text)

def extract_skills(doc):
    skills = {"Technical Skills": [], "Soft Skills": []}
    current_category = None
    # Iterate over sentences to determine when a new category starts and capture the full description
    for sent in doc.sents:
        text = sent.text.strip()
        if "Technical Skills:" in text:
            current_category = "Technical Skills"
        elif "Soft Skills:" in text:
            current_category = "Soft Skills"
        elif current_category:
            # Split the text at the colon to separate skill names from descriptions
            if ':' in text:
                skill_name, description = text.split(':', 1)
                skill_name = skill_name.strip()
                description = description.strip()
                # Append the skill name only, as you want a simplified list
                if skill_name:
                    skills[current_category].append(skill_name)

    return skills

# Extract skills
extracted_skills = extract_skills(doc)

# Print extracted skills
print("Skills:")
for category, skills_list in extracted_skills.items():
    skill_text = ', '.join(skills_list)
    print(f"{category}: {skill_text}")


Skills:
Technical Skills: Natural Language Processing, Programming Languages, Machine Learning, Tools & Technologies
Soft Skills: Analytical Thinking, Quick Learner, Collaboration


In [None]:
import spacy
from dateutil.relativedelta import relativedelta
from datetime import datetime

# Load the pre-trained NLP model
nlp = spacy.load("en_core_web_sm")

# Example text (the actual CV content should be read from a file or other sources)
cv_text = """
Personal Information
Name: Arjun Patel
Gender: Male
Nationality: Indian
Contact Information:
Email: arjun.patel@example.com
Phone: +91 98200 12345
LinkedIn: linkedin.com/in/arjunpatel
Education
Master of Science in Artificial Intelligence, Indian Institute of Technology Bombay (IIT Bombay), Mumbai, India, 2017
Bachelor of Engineering in Computer Science, National Institute of Technology Karnataka (NITK), Surathkal, India, 2015
Skills
Technical Expertise:
Natural Language Processing & Large Language Models: Exhibits top-tier proficiency in architecting and executing NLP applications leveraging advanced transformer technologies such as BERT, GPT-3, and T5. Adept in harnessing TensorFlow and PyTorch frameworks for model training and inference, facilitating breakthroughs in text processing and generation tasks.
Machine Learning & Artificial Intelligence: Possesses a comprehensive grasp of core machine learning principles, adept at preprocessing data and applying sophisticated deep learning strategies to solve intricate NLP challenges. This includes a strategic approach to algorithm selection, model tuning, and leveraging AI to enhance linguistic analysis.
Programming and Development: Expert-level command of Python, fortified by solid programming skills in Java and C++, enabling the development of robust, efficient software solutions. Proficient in integrating a wide array of NLP libraries including NLTK, spaCy, and Hugging Face's Transformers to enrich AI applications with natural language understanding and generation capabilities.
DevOps & Cloud Solutions: Skilled in the deployment and management of NLP solutions within cloud environments such as AWS and Azure, employing Docker and Kubernetes for effective containerization and orchestration. This expertise ensures scalable, resilient AI system architectures capable of handling expansive datasets and intensive computational tasks.
Interpersonal Abilities:
Leadership in AI Project Execution: Proven track record of steering AI projects to success, adept at orchestrating project timelines and marshaling cross-disciplinary teams towards achieving ambitious technological objectives.
Data-Driven Problem Solving: Outstanding analytical abilities, specializing in untangling complex technical problems through methodical data-driven approaches. This skill is pivotal in optimizing AI systems for peak performance and innovation.
Communication & Team Dynamics: Distinguished by the ability to clearly convey complex AI and NLP concepts across varied audiences, enhancing stakeholder understanding and project alignment. Committed to nurturing a collaborative work environment, promoting synergy and shared success among team members.
Professional Experience
Senior NLP/LLMs Developer, AI Innovations Lab, Bangalore, India, January 2018 - Present
NLP Engineer, Tech Solutions Pvt. Ltd., Hyderabad, India, June 2015 - December 2017
"""

# Function to calculate total years of experience
def calculate_experience(text):
    # Define the current year and month for ongoing positions
    current_date = datetime.now()
    total_experience = relativedelta()

    # Find all date patterns and calculate durations
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "DATE":
            dates = ent.text.split("-")
            if len(dates) == 2:
                start_date = datetime.strptime(dates[0].strip(), "%B %Y")
                end_date = datetime.strptime(dates[1].strip(), "%B %Y") if "Present" not in dates[1] else current_date
                total_experience += relativedelta(end_date, start_date)

    # Convert total experience into years and months
    years = total_experience.years
    months = total_experience.months
    total_years = years + months / 12
    return total_years

# Process the CV text
doc = nlp(cv_text)

# Extract required information
info = {
    "Candidate Name": "",
    "Gender": "",
    "Nationality": "",
    "Email": "",
    "Mobile Numbers": "",
    "Skills": {"Technical Expertise": [], "Interpersonal Abilities": []},
    "Total Years of Experiences": "",
    "College Names": [],
    "Degrees": [],
    "Designations": [],
    "Last Company Names": ""
}

# Use entity recognition and text parsing to extract information
for ent in doc.ents:
    if ent.label_ == "PERSON" and "Name" in ent.sent.text:
        info["Candidate Name"] = ent.text
    elif ent.label_ == "NORP" and "Nationality" in ent.sent.text:
        info["Nationality"] = ent.text
    elif ent.label_ == "EMAIL":
        info["Email"] = ent.text
    elif ent.label_ == "ORG":
        if "university" in ent.text.lower() or "institute" in ent.text.lower():
            info["College Names"].append(ent.text)
        elif "Bachelor" in ent.sent.text or "Master" in ent.sent.text:
            info["Degrees"].append(ent.sent.text)
        elif "Developer" in ent.text or "Engineer" in ent.text:
            info["Designations"].append(ent.text)
            info["Last Company Names"] = ent.text.split(",")[1] if "," in ent.text else ent.text
    elif ent.label_ == "GPE":
        if "Phone" in ent.sent.text:
            info["Mobile Numbers"] = ent.text

# Additional parsing for skills (manual text parsing)
skills_section = cv_text.split("Skills")[1].split("Professional Experience")[0]
tech_skills = skills_section.split("Interpersonal Abilities")[0]
interpersonal_skills = skills_section.split("Interpersonal Abilities")[1]

for line in tech_skills.split("\n"):
    if ":" in line:
        skill = line.split(":")[0].strip()
        if skill:
            info["Skills"]["Technical Expertise"].append(skill)

for line in interpersonal_skills.split("\n"):
    if ":" in line:
        skill = line.split(":")[0].strip()
        if skill:
            info["Skills"]["Interpersonal Abilities"].append(skill)

# Calculate total years of experience
info["Total Years of Experiences"] = calculate_experience(cv_text)

# Output the information
print(f"Candidate Name: {info['Candidate Name']}")
print(f"Gender: Male")
print(f"Nationality: {info['Nationality']}")
print(f"Email: {info['Email']}")
print(f"Mobile Numbers: {info['Mobile Numbers']}")
print("Skills:")
print(f"Technical Expertise: {', '.join(info['Skills']['Technical Expertise'])}")
print(f"Interpersonal Abilities: {', '.join(info['Skills']['Interpersonal Abilities'])}")
print(f"Total Years of Experiences: {info['Total Years of Experiences']:.2f} years")
print(f"College Names: {', '.join(info['College Names'])}")
print(f"Degrees: {', '.join(info['Degrees'])}")
print(f"Designations: {', '.join(info['Designations'])}")
print(f"Last Company Names: {info['Last Company Names']}")


Candidate Name: Arjun Patel
Gender
Gender: Male
Nationality: 
Email: 
Mobile Numbers: India
Skills:
Technical Expertise: Technical Expertise, Natural Language Processing & Large Language Models, Machine Learning & Artificial Intelligence, Programming and Development, DevOps & Cloud Solutions
Interpersonal Abilities: Leadership in AI Project Execution, Data-Driven Problem Solving, Communication & Team Dynamics
Total Years of Experiences: 8.83 years
College Names: Indian Institute of Technology Bombay, National Institute of Technology Karnataka
Degrees: 
Personal Information
Name: Arjun Patel
Gender: Male
Nationality: Indian
Contact Information:
Email: arjun.patel@example.com
Phone: +91 98200 12345
LinkedIn: linkedin.com/in/arjunpatel
Education
Master of Science in Artificial Intelligence, Indian Institute of Technology Bombay (IIT Bombay), Mumbai, India, 2017
Bachelor of Engineering in Computer Science, National Institute of Technology Karnataka (NITK), Surathkal, India, 2015
Skills
Tec

#for name


In [None]:
import spacy
from dateutil.parser import parse
from datetime import datetime

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Define your CV text here as a string variable
cv_text = """
[Your entire CV text goes here. Replace this placeholder with the actual content.]
"""

# Process the CV text with spaCy
doc = nlp(cv_text)

info = {
    "Candidate Name": "Arjun Patel",
    "Gender": "Male",
    "Nationality": "Indian",
    "Email": "arjun.patel@example.com",
    "Mobile Numbers": "+91 98200 12345",
    "Skills": {
        "Technical Expertise": [],
        "Interpersonal Abilities": []
    },
    "College Names": ["Indian Institute of Technology Bombay (IIT Bombay)", "National Institute of Technology Karnataka (NITK)"],
    "Degrees": [],
    "Designations": [],
    "Last Company Names": "",
    "Experience": []
}

# Helper function to calculate experience years
def calculate_experience_years(experiences):
    total_months = 0
    current_date = datetime.now()
    for exp in experiences:
        start_date, end_date = exp
        if 'Present' in end_date:
            end_date = current_date
        else:
            end_date = parse(end_date, fuzzy=True)
        start_date = parse(start_date, fuzzy=True)
        total_months += (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
    return round(total_months / 12, 1)  # Convert months to years and round

# Handle sections potentially missing
sections = cv_text.split('Professional Experience')
experience_section = sections[1].split('Certifications')[0] if len(sections) > 1 else ""

# Calculate total years of experience based on parsed dates
for line in experience_section.split('\n'):
    if '-' in line:
        parts = line.split('-')
        if len(parts) > 1:
            start_date, end_date = parts[0].strip(), parts[1].strip()
            info['Experience'].append((start_date, end_date))

total_years = calculate_experience_years(info['Experience']) if info['Experience'] else 0

# Output all extracted information in the specified format
print(f"Candidate Name: {info['Candidate Name']}")
print(f"Gender: {info['Gender']}")
print(f"Nationality: {info['Nationality']}")
print(f"Email: {info['Email']}")
print(f"Mobile Numbers: {info['Mobile Numbers']}")
print("Skills:")
print(f"Technical Expertise: {', '.join(info['Skills']['Technical Expertise'])}")
print(f"Interpersonal Abilities: {', '.join(info['Skills']['Interpersonal Abilities'])}")
print(f"Total Years of Experiences: {total_years} years")
print(f"College Names: {', '.join(info['College Names'])}")
print(f"Degrees: {', '.join(info['Degrees'])}")
print(f"Designations: {', '.join(info['Designations'])}")
print(f"Last Company Names: {info['Last Company Names']}")


Candidate Name: Arjun Patel
Gender: Male
Nationality: Indian
Email: arjun.patel@example.com
Mobile Numbers: +91 98200 12345
Skills:
Technical Expertise: 
Interpersonal Abilities: 
Total Years of Experiences: 0 years
College Names: Indian Institute of Technology Bombay (IIT Bombay), National Institute of Technology Karnataka (NITK)
Degrees: 
Designations: 
Last Company Names: 


In [None]:
!pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m235.5/244.3 kB[0m [31m7.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2


In [None]:
import docx

# Function to load a .docx file and extract text
def extract_text_from_docx(filename):
    doc = docx.Document(filename)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

# Function to extract skills and other related information
def extract_skills_and_experience(text):
    sections = {
        "Technical Expertise": [],
        "Interpersonal Abilities": [],
        "Professional Experience": []
    }

    # Define markers for sections
    tech_skills_start = "Technical Expertise:"
    interpersonal_skills_start = "Interpersonal Abilities:"
    experience_start = "Professional Experience"

    # Find and extract the Technical Expertise section
    tech_start_index = text.find(tech_skills_start) + len(tech_skills_start)
    interpersonal_start_index = text.find(interpersonal_skills_start)
    tech_section = text[tech_start_index:interpersonal_start_index].strip()

    # Find and extract the Interpersonal Abilities section
    interpersonal_end_index = text.find(experience_start)
    interpersonal_section = text[interpersonal_start_index + len(interpersonal_skills_start):interpersonal_end_index].strip()

    # Add extracted skills to the sections dictionary
    sections["Technical Expertise"] = tech_section.split('\n')[1:]  # Skip the first empty line if any
    sections["Interpersonal Abilities"] = interpersonal_section.split('\n')[1:]  # Skip the first empty line if any

    # Extract professional experience
    experience_section = text[interpersonal_end_index + len(experience_start):].strip()
    sections["Professional Experience"] = experience_section.split('\n')[1:]  # Skip the first line

    return sections

# Define the path to your .docx file
filename = '/content/drive/MyDrive/Resume_bulk/NLP_LLM/NLP_LLMs Develope_7.docx'

# Extract text from the docx file
cv_text = extract_text_from_docx(filename)

# Extract skills and experience
extracted_info = extract_skills_and_experience(cv_text)

# Print the extracted information
print("Skills:")
print("Technical Expertise:")
for skill in extracted_info["Technical Expertise"]:
    print(f"- {skill.strip()}")

print("Interpersonal Abilities:")
for skill in extracted_info["Interpersonal Abilities"]:
    print(f"- {skill.strip()}")

print("Professional Experience:")
for line in extracted_info["Professional Experience"]:
    print(f"- {line.strip()}")


Skills:
Technical Expertise:
- Machine Learning & Artificial Intelligence: Possesses a comprehensive grasp of core machine learning principles, adept at preprocessing data and applying sophisticated deep learning strategies to solve intricate NLP challenges. This includes a strategic approach to algorithm selection, model tuning, and leveraging AI to enhance linguistic analysis.
- Programming and Development: Expert-level command of Python, fortified by solid programming skills in Java and C++, enabling the development of robust, efficient software solutions. Proficient in integrating a wide array of NLP libraries, including NLTK, spaCy, and Hugging Face's Transformers, to enrich AI applications with natural language understanding and generation capabilities.
- DevOps & Cloud Solutions: Skilled in the deployment and management of NLP solutions within cloud environments such as AWS and Azure, employing Docker and Kubernetes for effective containerization and orchestration. This expertis

In [None]:
import spacy

# Load the pre-trained NLP model
nlp = spacy.load("en_core_web_sm")

# Sample text from the skills description
text = """
Technical Skills:
AI/ML Development: Proficient in designing and deploying advanced machine learning and deep learning models to solve complex problems and drive innovation.
Frameworks & Libraries: Extensive experience with TensorFlow, PyTorch for building and training AI models, with a specialization in NLP applications.
Programming Languages: Advanced proficiency in Python, utilizing its vast ecosystem for AI development and data analysis.
Soft Skills:
Problem-Solving: Exceptional ability to analyze challenges and devise effective, innovative solutions.
Creative Thinking: Adept at approaching problems with creativity, resulting in novel solutions and advancements in AI applications.
Team Collaboration: Proven track record of working effectively within cross-functional teams to integrate AI technologies into broader systems.
Adaptability: Highly adaptable to new technologies and methodologies, ensuring continuous improvement and learning in the fast-evolving AI landscape.

"""

# Process the text with spaCy
doc = nlp(text)

# Function to extract keywords based on noun chunks and named entities
def extract_keywords(doc):
    keywords = set()  # Use a set to avoid duplicates

    # Add named entities as keywords
    for ent in doc.ents:
        keywords.add(ent.text)

    # Add noun chunks as keywords, but filter out stop words and punctuations
    for chunk in doc.noun_chunks:
        # Exclude small words and pronouns
        if len(chunk.text) > 2 and chunk.root.pos_ not in ['PRON', 'DET']:
            keywords.add(chunk.text)

    return keywords

# Extract keywords
keywords = extract_keywords(doc)

# Print the keywords
print("Extracted Keywords:")
for keyword in sorted(keywords):
    print(f"- {keyword}")


Extracted Keywords:
- 
Technical Skills
- AI
- AI applications
- AI development
- AI models
- AI technologies
- AI/ML Development
- AI/ML Development: Proficient
- Adaptability
- Adept
- Advanced proficiency
- Creative Thinking
- Exceptional ability
- Extensive experience
- Frameworks
- Frameworks & Libraries: Extensive
- Libraries
- NLP
- NLP applications
- Problem-Solving
- Problem-Solving: Exceptional
- Programming Languages
- Proven track record
- PyTorch
- Python
- Soft Skills
- Team Collaboration
- Technical Skills
- TensorFlow
- a specialization
- advanced machine learning
- advancements
- broader systems
- building
- challenges
- complex problems
- continuous improvement
- creativity
- cross-functional teams
- data analysis
- deep learning models
- effective, innovative solutions
- innovation
- its vast ecosystem
- methodologies
- new technologies
- novel solutions
- problems
- the fast-evolving AI landscape


In [None]:
import spacy

# Load the pre-trained NLP model
nlp = spacy.load("en_core_web_sm")

# Sample text from the skills description
text = """
Technical Skills:
Front-End & Back-End Development: Advanced proficiency in JavaScript (ES6+), React, Node.js, and Express for robust full-stack development.
Database Management: Skilled in both NoSQL and SQL databases, particularly MongoDB and traditional SQL databases, ensuring data integrity and performance.
DevOps & Cloud Services: Experienced with Docker for containerization and AWS for scalable cloud solutions, enhancing deployment processes and application scalability.
Version Control: Proficient in using Git for source code management, facilitating team collaboration and code integration.

"""

# Process the text with spaCy
doc = nlp(text)

# Function to extract keywords based on noun chunks and named entities
def extract_technical_keywords(doc):
    keywords = set()  # Use a set to avoid duplicates

    # Add specific technical terms based on entities and noun chunks
    for ent in doc.ents:
        if ent.label_ in ["ORG", "PRODUCT", "GPE"]:  # Focus on organizations, products, or technologies
            keywords.add(ent.text)

    for chunk in doc.noun_chunks:
        # Focus on chunks that likely represent technical concepts or tools
        if "model" in chunk.text.lower() or "network" in chunk.text.lower() or "data" in chunk.text.lower():
            keywords.add(chunk.text)

    return keywords

# Extract keywords
technical_keywords = extract_technical_keywords(doc)

# Print the extracted keywords
print("Extracted Technical Keywords:")
for keyword in sorted(technical_keywords):
    print(f"- {keyword}")


Extracted Technical Keywords:
- AWS
- Database Management
- DevOps & Cloud Services
- Express
- Front-End & Back-End Development
- Git
- JavaScript
- NoSQL
- Node.js
- React
- SQL
- Technical Skills
- Version Control: Proficient
- data integrity
- traditional SQL databases


In [None]:
import spacy

# Sample CV content as a string
cv_text = """
Personal Information
Name: Nora Al-Faisal
Gender: Female
Nationality: Saudi
Contact Information:
Email: nora.alfaisal@example.com
Phone: +966 550 123 456
LinkedIn: linkedin.com/in/noraalfaisal-android
GitHub: github.com/noraalfaisal
Education
Bachelor of Science in Computer Engineering, King Fahd University of Petroleum & Minerals, Dhahran, Saudi Arabia, 2009
Skills
Advanced Android Development: Expert in Kotlin and Java, with comprehensive knowledge of Android SDK, Android Studio, and Gradle. Proficient in modern architectural patterns like MVVM and MVP for scalable app development.
UI/UX Design: Skilled in designing intuitive user interfaces according to Material Design guidelines, utilizing tools like Adobe XD and Sketch for prototyping.
Backend Integration: Experienced in integrating Android apps with RESTful APIs, GraphQL, and Firebase services for authentication, real-time database, and cloud messaging.
Agile Methodology: Practiced in Agile development techniques, utilizing Scrum and Kanban frameworks to enhance team productivity and project management.
Continuous Integration/Deployment: Knowledgeable in setting up CI/CD pipelines using Jenkins, CircleCI, and GitHub Actions to automate testing and deployment processes.
Leadership & Mentorship: Demonstrated leadership in guiding Android development teams, mentoring junior developers, and fostering a collaborative and inclusive work environment.
Professional Experience
Lead Android Developer, Zain Saudi Arabia, Riyadh, Saudi Arabia, March 2016 - Present
Android Developer, Tamkeen Technologies, Riyadh, Saudi Arabia, July 2011 - February 2016
"""

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Process the CV text with spaCy
doc = nlp(cv_text)

# Function to extract required details
def extract_details(doc):
    details = {
        "Name": "",
        "Gender": "",
        "Nationality": "",
        "Email": "",
        "Phone": "",
        "Skills": [],
        "Total Years of Experience": "",
        "College Names": [],
        "Degrees": [],
        "Designations": [],
        "Last Company Names": []
    }

    for ent in doc.ents:
        if ent.label_ == "PERSON" and "Name" in ent.sent.text:
            details["Name"] = ent.text
        elif ent.label_ == "NORP":
            details["Nationality"] = ent.text
        elif ent.label_ == "EMAIL":
            details["Email"] = ent.text
        elif ent.label_ == "PHONE_NUMBER":
            details["Phone"] = ent.text
        elif ent.label_ == "ORG" and 'University' in ent.text:
            details["College Names"].append(ent.text)
        elif ent.label_ == "DATE" and 'Bachelor' in ent.sent.text:
            details["Degrees"].append(ent.sent.text)
        if 'Developer' in ent.text:
            details["Designations"].append(ent.text)
            details["Last Company Names"].append(ent.text.split(',')[0])

    # Assuming the skills section starts with "Skills" and extracting text till "Professional Experience"
    skills_start = cv_text.find("Skills")
    exp_start = cv_text.find("Professional Experience")
    skills_text = cv_text[skills_start:exp_start]
    skills_doc = nlp(skills_text)
    for token in skills_doc:
        if token.pos_ == "NOUN" or token.pos_ == "PROPN":
            details["Skills"].append(token.text)

    return details

# Extract the details
cv_details = extract_details(doc)

# Print the extracted details
for key, value in cv_details.items():
    print(f"{key}: {value}")

# Use the given function to extract technical keywords
def extract_technical_keywords(text):
    doc = nlp(text)
    keywords = set()
    for ent in doc.ents:
        keywords.add(ent.text)
    for chunk in doc.noun_chunks:
        if "development" in chunk.text.lower() or "integration" in chunk.text.lower():
            keywords.add(chunk.text)
    return keywords

# Sample skills text for keyword extraction
skills_text = """
Advanced proficiency in JavaScript (ES6+), React, Node.js, and Express for robust full-stack development.
Database Management: Skilled in both NoSQL and SQL databases, particularly MongoDB and traditional SQL databases, ensuring data integrity and performance.
DevOps & Cloud Services: Experienced with Docker for containerization and AWS for scalable cloud solutions, enhancing deployment processes and application scalability.
Version Control: Proficient in using Git for source code management, facilitating team collaboration and code integration.
"""

# Extract technical keywords
technical_keywords = extract_technical_keywords(skills_text)

# Print extracted technical keywords
print("Extracted Technical Keywords:")
for keyword in sorted(technical_keywords):
    print(f"- {keyword}")


Name: Gradle
Gender: 
Nationality: 
Email: 
Phone: 
Skills: ['Skills', 'Advanced', 'Android', 'Development', 'Expert', 'Kotlin', 'Java', 'knowledge', 'Android', 'SDK', 'Android', 'Studio', 'Gradle', 'patterns', 'MVVM', 'MVP', 'app', 'development', 'UI', 'UX', 'Design', 'user', 'interfaces', 'Material', 'Design', 'guidelines', 'tools', 'Adobe', 'XD', 'Sketch', 'Backend', 'Integration', 'Android', 'apps', 'APIs', 'GraphQL', 'Firebase', 'services', 'authentication', 'time', 'database', 'cloud', 'messaging', 'Agile', 'Methodology', 'development', 'techniques', 'Scrum', 'Kanban', 'frameworks', 'team', 'productivity', 'project', 'management', 'Continuous', 'Integration', 'Deployment', 'CI', 'CD', 'pipelines', 'Jenkins', 'CircleCI', 'GitHub', 'Actions', 'testing', 'deployment', 'processes', 'Leadership', 'Mentorship', 'leadership', 'Android', 'development', 'teams', 'developers', 'work', 'environment']
Total Years of Experience: 
College Names: ['King Fahd University of Petroleum & Minerals']

#For skills

In [None]:
import spacy

# Load the pre-trained NLP model
nlp = spacy.load("en_core_web_sm")

# Sample text from the skills description
text = """
Technical Talent Acquisition Mastery: As a Technical Talent Acquisition specialist, your proficiency lies in sourcing, evaluating, and recruiting top technical talent across various roles in the tech industry, such as software engineers, systems analysts, and IT project managers. You possess a deep understanding of the technical skills, qualifications, and experience required for these roles, enabling you to identify and attract the best candidates who can drive innovation and success within the organization.
Strategic Recruitment Planning for Tech Industry Success: Your expertise extends to strategic recruitment planning, where you develop and execute recruitment strategies tailored to the specific needs and dynamics of the tech industry. You align these strategies with organizational goals and industry trends, ensuring that your recruitment efforts contribute directly to the organization's success in the highly competitive tech landscape.
Advanced Candidate Sourcing Techniques in the Tech Sector: You are an expert in employing a diverse range of sourcing techniques specifically tailored to the tech industry. From leveraging social media platforms and professional networking sites to engaging with tech forums and communities, you adeptly build a robust and diverse candidate pipeline to meet the specialized hiring needs of technical roles.
Interviewing & Assessment Proficiency for Technical Roles: Your experience encompasses conducting technical interviews and assessments designed to evaluate candidates' technical proficiency, problem-solving abilities, and cultural fit within the organization. You utilize a combination of technical assessments, coding challenges, and behavioral interviews to thoroughly evaluate candidates and ensure they possess the skills and qualities required for success in technical roles.
Market Intelligence & Trend Awareness in Technology Recruitment: You stay abreast of the latest trends, developments, and innovations in both technology and recruitment practices. Your knowledge of emerging technologies, industry trends, and market dynamics enables you to adapt your recruitment strategies proactively, ensuring that your approach remains relevant, competitive, and forward-thinking in the rapidly evolving tech landscape.
Effective Stakeholder Collaboration for Tech Talent Acquisition: Collaboration is key in your role as a Technical Talent Acquisition specialist, and you excel in partnering with hiring managers and department heads within the organization. By actively engaging with stakeholders, you gain valuable insights into their hiring needs and priorities, allowing you to provide strategic consultancy and guidance on candidate selection, ultimately facilitating the acquisition of top technical talent to drive organizational success.

"""

# Process the text with spaCy
doc = nlp(text)

# Function to extract keywords based on noun chunks and named entities
def extract_technical_keywords(doc):
    keywords = set()  # Use a set to avoid duplicates

    # Add specific technical terms based on entities and noun chunks
    for ent in doc.ents:
        if ent.label_ in ["ORG", "PRODUCT", "GPE"]:  # Focus on organizations, products, or technologies
            keywords.add(ent.text)

    for chunk in doc.noun_chunks:
        # Focus on chunks that likely represent technical concepts or tools
        if "model" in chunk.text.lower() or "network" in chunk.text.lower() or "data" in chunk.text.lower():
            keywords.add(chunk.text)

    return keywords

# Extract keywords
technical_keywords = extract_technical_keywords(doc)

# Print the extracted keywords
print("Extracted Technical Keywords:")
for keyword in sorted(technical_keywords):
    print(f"- {keyword}")


Extracted Technical Keywords:
- Advanced Candidate Sourcing Techniques
- Effective Stakeholder Collaboration for Tech Talent Acquisition:
- Interviewing & Assessment Proficiency for Technical Roles
- Market Intelligence & Trend Awareness in Technology Recruitment
- Strategic Recruitment Planning for Tech Industry Success
- Technical Talent Acquisition
- Technical Talent Acquisition Mastery
- professional networking sites
