In [70]:
import spacy
from spacy import displacy

In [7]:
nlp = spacy.load("en_core_web_sm")

In [56]:
text = """Carlos III University of Madrid (UC3M) is a public university based in Getafe, in the Community of Madrid (Spain). It was founded by Gregorio Peces-Barba on May 5, 1989, under the framework of the University Reform Law of 1983. UC3M offers undergraduate and postgraduate studies in Social Sciences and Law; Humanities, Communication, and Documentation; and Engineering, as well as a degree in Sciences. It ranks 35th worldwide and is 12th in Europe in the QS ranking of the top 50 universities in the world under 50 years old and is included in the THE academic university rankings. UC3M also stands out for the high employability of its graduates, which reaches 90.6%."""

document = nlp(text)
for s in document.sents:
    print("-- ", s)

--  Carlos III University of Madrid (UC3M) is a public university based in Getafe, in the Community of Madrid (Spain).
--  It was founded by Gregorio Peces-Barba on May 5, 1989, under the framework of the University Reform Law of 1983.
--  UC3M offers undergraduate and postgraduate studies in Social Sciences and Law; Humanities, Communication, and Documentation; and Engineering, as well as a degree in Sciences.
--  It ranks 35th worldwide and is 12th in Europe in the QS ranking of the top 50 universities in the world under 50 years old and is included in the THE academic university rankings.
--  UC3M also stands out for the high employability of its graduates, which reaches 90.6%.


In [57]:
header = ["Token", "Lemma", "POS", "Tag", "Dep", "Shape", "Is_Stop"]

# Print the header with increased spacing
print(f"{'Token':<15} {'Lemma':<15} {'POS':<10} {'Tag':<10} {'Dep':<15} {'Shape':<15} {'Is_Stop':<10}")

# Print each token's information with increased spacing
for token in document:
    print(f"{token.text:<15} {token.lemma_:<15} {token.pos_:<10} {token.tag_:<10} {token.dep_:<15} {token.shape_:<15} {str(token.is_stop):<10}")

Token           Lemma           POS        Tag        Dep             Shape           Is_Stop   
Carlos          Carlos          PROPN      NNP        compound        Xxxxx           False     
III             III             PROPN      NNP        compound        XXX             False     
University      University      PROPN      NNP        nsubj           Xxxxx           False     
of              of              ADP        IN         prep            xx              True      
Madrid          Madrid          PROPN      NNP        pobj            Xxxxx           False     
(               (               PUNCT      -LRB-      punct           (               False     
UC3             UC3             PROPN      NNP        compound        XXd             False     
M               M               PROPN      NNP        appos           X               False     
)               )               PUNCT      -RRB-      punct           )               False     
is              be            

In [69]:
sentence = document.sents
sample = sentence.__next__()
sample

Carlos III University of Madrid (UC3M) is a public university based in Getafe, in the Community of Madrid (Spain).

In [80]:
text = "Carlos III University is a public university based in Getafe."
doc = nlp(text)

for chunk in doc.noun_chunks:
    print()
    print("text chunks: ", chunk.text)
    print("root text: ", chunk.root.text)


text chunks:  Carlos III University
root text:  University

text chunks:  a public university
root text:  university

text chunks:  Getafe
root text:  Getafe


In [81]:
displacy.render(doc, jupyter=True, style="dep")

### Entities identification

In [87]:
header = ["entity", "text", "label", "start", "end"]

# Print the header with increased spacing
print(f"{'entity':<20} {'text':<20} {'label':<20} {'start':<20} {'end':<20}")

# Print each token's information with increased spacing
for e in doc.ents:
    print(f"{str(e):<20} {e.text:<20} {e.label_:<20} {e.start_char:<20} {e.end_char:<20}")

entity               text                 label                start                end                 
Carlos III University Carlos III University ORG                  0                    21                  
Getafe               Getafe               GPE                  54                   60                  


In [88]:
displacy.render(doc, jupyter=True, style="ent")