# 7 Implement semantic role labelling to identify named entities.

In [12]:
# Install necessary libraries
!pip install spacy pandas
!python -m spacy download en_core_web_sm

2024-11-24 10:57:46.261566: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-24 10:57:46.281180: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-24 10:57:46.287172: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Collecting en-core-web-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m78.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load

In [13]:
# Import necessary libraries
import spacy
import pandas as pd
from spacy import displacy

In [14]:
# Set display options for pandas
pd.set_option("display.max_rows", 200)

In [15]:
# Load SpaCy's NER model
print("Loading SpaCy NER model...")
nlp = spacy.load("en_core_web_sm")

Loading SpaCy NER model...


In [16]:
# Input text
text = ("Mahesh Raut, an AI and Data Science student pursuing a B.Tech degree, hails from Kaij in the Beed district. "
        "Currently based in Pune, he is actively seeking internships and opportunities in Machine Learning and Data Science, "
        "leveraging his skills in Python, SQL, and AI to work on innovative projects that solve real-world problems.")


In [17]:
# Process the text using SpaCy for NER and dependency parsing
doc = nlp(text)

In [18]:
# Extract Named Entities
print("\nNamed Entities:")
entities = [(ent.text, ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
for text, start, end, label in entities:
    print(f"Text: {text}, Start: {start}, End: {end}, Label: {label}")



Named Entities:
Text: Mahesh Raut, Start: 0, End: 11, Label: PERSON
Text: Kaij, Start: 81, End: 85, Label: GPE
Text: Pune, Start: 127, End: 131, Label: ORG
Text: Machine Learning and Data Science, Start: 189, End: 222, Label: FAC
Text: Python, Start: 249, End: 255, Label: GPE
Text: SQL, Start: 257, End: 260, Label: ORG


In [19]:
# Visualize entities using displaCy
print("\nVisualizing Named Entities:")
displacy.render(doc, style="ent", jupyter=True)



Visualizing Named Entities:


In [20]:
# Extract semantic roles using SpaCy dependency parsing
print("\nSemantic Roles:")
semantic_roles = []
for token in doc:
    if token.dep_ in ("nsubj", "dobj", "pobj", "iobj", "ROOT"):
        semantic_roles.append((token.text, token.dep_, token.head.text, token.head.pos_, [child.text for child in token.children]))



Semantic Roles:


In [21]:
# Display the semantic roles
for role in semantic_roles:
    print(f"Token: {role[0]}, Dependency: {role[1]}, Head: {role[2]}, Head POS: {role[3]}, Children: {role[4]}")


Token: Raut, Dependency: nsubj, Head: hails, Head POS: VERB, Children: ['Mahesh', ',', 'student', ',']
Token: degree, Dependency: dobj, Head: pursuing, Head POS: VERB, Children: ['a', 'B.Tech']
Token: hails, Dependency: ROOT, Head: hails, Head POS: VERB, Children: ['Raut', 'from', 'in', '.']
Token: Kaij, Dependency: pobj, Head: from, Head POS: ADP, Children: []
Token: district, Dependency: pobj, Head: in, Head POS: ADP, Children: ['the', 'Beed']
Token: Pune, Dependency: pobj, Head: in, Head POS: ADP, Children: []
Token: he, Dependency: nsubj, Head: seeking, Head POS: VERB, Children: []
Token: seeking, Dependency: ROOT, Head: seeking, Head POS: VERB, Children: ['based', ',', 'he', 'is', 'actively', 'internships', ',', 'leveraging', '.']
Token: internships, Dependency: dobj, Head: seeking, Head POS: VERB, Children: ['and', 'opportunities', 'in']
Token: Learning, Dependency: pobj, Head: in, Head POS: ADP, Children: ['Machine', 'and', 'Science']
Token: skills, Dependency: dobj, Head: lever

In [22]:
# Convert semantic roles into a DataFrame
df_roles = pd.DataFrame(semantic_roles, columns=["Token", "Dependency", "Head", "Head_POS", "Children"])
print("\nSemantic Roles DataFrame:")
print(df_roles)


Semantic Roles DataFrame:
          Token Dependency        Head Head_POS  \
0          Raut      nsubj       hails     VERB   
1        degree       dobj    pursuing     VERB   
2         hails       ROOT       hails     VERB   
3          Kaij       pobj        from      ADP   
4      district       pobj          in      ADP   
5          Pune       pobj          in      ADP   
6            he      nsubj     seeking     VERB   
7       seeking       ROOT     seeking     VERB   
8   internships       dobj     seeking     VERB   
9      Learning       pobj          in      ADP   
10       skills       dobj  leveraging     VERB   
11       Python       pobj          in      ADP   
12     projects       pobj          on      ADP   
13         that      nsubj       solve     VERB   
14     problems       dobj       solve     VERB   

                                             Children  
0                             [Mahesh, ,, student, ,]  
1                                         [a