<a href="https://colab.research.google.com/github/AdarshChintada/Daily_tasks/blob/main/PhraseMatcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Prepare PhraseMatcher patterns for resume parsing.**

In [5]:
import spacy
from spacy.matcher import PhraseMatcher

# Load a spaCy NLP model
nlp = spacy.load("en_core_web_sm")

# Sample resume text
resume_text = """
KUSHAL PATEL
Personal Data
Date of Birth:
Address:
Phone:
email:
Linkedin Id:
18 th June, 1996
B/403 Sagar Sangeet Heights, Sola, Ahmedabad, Gujarat
+91 8401547060
patelkushal96@gmail.com
https://www.linkedin.com/in/patel-kushal/
Career Objectives
Machine Learning and data science enthusiast with experience of executing data-driven so-
lutions of real world problems. Looking to use my Machine learning and data science knowl-
edge to manage statistical machine learning and data-related solutions.
Education
2017-2019 Master of Technology in Structural Engineering
Indian Institute of Technology, Madras
Gpa: 9.11/10
2013-2017 Bachelor of Engineering in Civil Engineering
Vishwakarma Government Engineering College, Gandhinagar
Gpa: 7.76/10
... (the rest of the resume text)
"""

# Initialize the PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

# Define patterns for each section as phrases
patterns = {
    "Name": ["KUSHAL PATEL"],
    "Date_of_Birth": ["18 th June, 1996"],
    "Address": ["B/403 Sagar Sangeet Heights, Sola, Ahmedabad, Gujarat"],
    "Phone": ["+91 8401547060"],
    "Email": ["patelkushal96@gmail.com"],
    "LinkedIn": ["https://www.linkedin.com/in/patel-kushal/"],
    # Add more patterns for other sections...
}

# Add the patterns to the PhraseMatcher
for label, phrase_list in patterns.items():
    phrase_patterns = [nlp(phrase) for phrase in phrase_list]
    matcher.add(label, None, *phrase_patterns)

# Process the resume text with spaCy
doc = nlp(resume_text)

# Apply the PhraseMatcher to the document
matches = matcher(doc)

# Extract annotations
annotations = {}
for match_id, start, end in matches:
    section_label = nlp.vocab.strings[match_id]
    section_text = doc[start:end].text.strip()
    annotations[section_label] = section_text

# Print the annotations
for section, content in annotations.items():
    print(f"{section}: {content}")


Name: KUSHAL PATEL
Date_of_Birth: 18 th June, 1996
Address: B/403 Sagar Sangeet Heights, Sola, Ahmedabad, Gujarat
Phone: +91 8401547060
Email: patelkushal96@gmail.com
LinkedIn: https://www.linkedin.com/in/patel-kushal/


**PHRASE MATCHING EXAMPLES**

In [6]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doc = nlp(
    "i downloaded Fortnite on my laptop and can't open the game at all. Help? "
    "so when I was downloading Minecraft, I got the Windows version where it "
    "is the '.zip' folder and I used the default program to unpack it... do "
    "I also need to download Winzip?"
)

# Write a pattern that matches a form of "download" plus proper noun
pattern = [{"LEMMA": "download"}, {"POS": "PROPN"}]

# Add the pattern to the matcher and apply the matcher to the doc
matcher.add("DOWNLOAD_THINGS_PATTERN", [pattern])
matches = matcher(doc)
print("Total matches found:", len(matches))

# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print("Match found:", doc[start:end].text)

Total matches found: 3
Match found: downloaded Fortnite
Match found: downloading Minecraft
Match found: download Winzip


In [7]:
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

# Iterate over the predicted entities
for ent in doc.ents:
    # Print the entity text and its label
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [8]:
import spacy

nlp = spacy.blank("en")

# Process the text
doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
)

# Iterate over the tokens in the doc
for token in doc:
    # Check if the token resembles a number
    if token.like_num:
        # Get the next token in the document
        next_token = doc[token.i+1]
        # Check if the next token's text equals "%"
        if next_token.text == "%":
            print("Percentage found:", token.text)

Percentage found: 60
Percentage found: 4


In [9]:
import spacy

# Import the Matcher
from spacy.matcher import Matcher

# Load a pipeline and create the nlp object
nlp = spacy.load("en_core_web_sm")

# Initialize the matcher with the shared vocab
matcher = Matcher(nlp.vocab)

# Add the pattern to the matcher
pattern = [{"TEXT": "iPhone"}, {"TEXT": "X"}]
matcher.add("IPHONE_PATTERN", [pattern])

# Process some text
doc = nlp("Upcoming iPhone X release date leaked")

# Call the matcher on the doc
matches = matcher(doc)

for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

iPhone X


In [10]:
import spacy

# Import the Matcher
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
doc = nlp("Upcoming iPhone X release date leaked as Apple reveals pre-orders")

# Initialize the Matcher with the shared vocabulary
matcher = Matcher(nlp.vocab)

# Create a pattern matching two tokens: "iPhone" and "X"
pattern = [{"TEXT": "iPhone"}, {"TEXT": "X"}]

# Add the pattern to the matcher
matcher.add("IPHONE_X_PATTERN", [pattern])

# Use the matcher on the doc
matches = matcher(doc)
print("Matches:", [doc[start:end].text for match_id, start, end in matches])

Matches: ['iPhone X']


In [11]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
doc = nlp(
    "After making the iOS update you won't notice a radical system-wide "
    "redesign: nothing like the aesthetic upheaval we got with iOS 7. Most of "
    "iOS 11's furniture remains the same as in iOS 10. But you will discover "
    "some tweaks once you delve a little deeper."
)
matcher = Matcher(nlp.vocab)
pattern = [{"TEXT":"iOS"}, {"IS_DIGIT":True}]

matcher.add("iPhone_text", [pattern])
matches = matcher(doc)

print("Total matches found:", len(matches))

for match_id, start, end in matches:
  print("Matches are:", doc[start:end].text)


Total matches found: 3
Matches are: iOS 7
Matches are: iOS 11
Matches are: iOS 10
