In [1]:
# Importing the Libraries
import spacy
import pandas as pd

In [2]:
# Loading the English Models
nlp = spacy.load("en_core_web_sm")

In [3]:
# Creating the Doc Object
text = """
Quantum computing is the use of quantum-mechanical phenomena such as superposition and entanglement to perform computation. A quantum computer is used to perform such computation, which can be implemented theoretically or physically.
"""
text = text.lower()

doc = nlp(text)

In [None]:
for token in doc:
    print(token.text, end=',')


,Quantum,computing,is,the,use,of,quantum,-,mechanical,phenomena,such,as,superposition,and,entanglement,to,perform,computation,.,A,quantum,computer,is,used,to,perform,such,computation,,,which,can,be,implemented,theoretically,or,physically,.,
,

In [None]:
# Displaying the tokens
tokens = list()

for token in doc:
    tokens.append(token.text)

print(tokens)

['\n', 'quantum', 'computing', 'is', 'the', 'use', 'of', 'quantum', '-', 'mechanical', 'phenomena', 'such', 'as', 'superposition', 'and', 'entanglement', 'to', 'perform', 'computation', '.', 'a', 'quantum', 'computer', 'is', 'used', 'to', 'perform', 'such', 'computation', ',', 'which', 'can', 'be', 'implemented', 'theoretically', 'or', 'physically', '.', '\n']


In [None]:
# Tokenization
tokens = [ token.text for token in doc]
print(tokens)

# Sentence tokenization
sentences = [token.text for token in doc.sents]
print(sentences)

['\n', 'Quantum', 'computing', 'is', 'the', 'use', 'of', 'quantum', '-', 'mechanical', 'phenomena', 'such', 'as', 'superposition', 'and', 'entanglement', 'to', 'perform', 'computation', '.', 'A', 'quantum', 'computer', 'is', 'used', 'to', 'perform', 'such', 'computation', ',', 'which', 'can', 'be', 'implemented', 'theoretically', 'or', 'physically', '.', '\n']
['\nQuantum computing is the use of quantum-mechanical phenomena such as superposition and entanglement to perform computation.', 'A quantum computer is used to perform such computation, which can be implemented theoretically or physically.\n']


In [None]:
# Filtering stop words and removing the duplicates
filtered_words = list()

for word in doc:
    if word.is_stop==False:
        filtered_words.append(word)

filtered_words = list(dict.fromkeys(filtered_words))
print(filtered_words)

[
, quantum, computing, use, quantum, -, mechanical, phenomena, superposition, entanglement, perform, computation, ., quantum, computer, perform, computation, ,, implemented, theoretically, physically, ., 
]


In [None]:
# Printing the Lemma for each word
doc = nlp("implemented cooking stopping died played disturbing")

words = list()
lemma_list = list()

for word in doc:
    words.append(word.text)
    lemma_list.append(word.lemma_)

In [None]:
df = pd.DataFrame({"Word": words,
                  "Lemma": lemma_list})
df

Unnamed: 0,Word,Lemma
0,implemented,implement
1,cooking,cooking
2,stopping,stopping
3,died,die
4,played,play
5,disturbing,disturb


## Tables

|Keywords | Details |
|---------|---------|
| PERSON | People, including fictional. |
| NORP | Nationalities or religious and political groups |
| FAC | Buildings, airports, highways, bridges, etc. |
| ORG | Companies, agencies, institutions, etc. |
| GPE | Countries, cities, states. |
| LOC | Non-GPE locations, mountain ranges, and bodies of water |
| PRODUCT | Objects, vehicles, foods, etc. (Not services) |
| EVENT | Named hurricanes, battles, wars, sports events, etc. |
| WORK_OF_ART | Titles of books, songs, etc. |
| LAW | Named documents made into laws. |
| LANGUAGE | Any named language. |
| DATE | Absolute or relative dates or periods. |
| TIME | Times smaller than a day. |
| PERCENT | Percentage, including ”%“. |
| MONEY | Monetary values, including unit. |
| QUANTITY | Measurements, as of weight or distance. |
| ORDINAL | “first”, “second”, etc. |
| CARDINAL | Numerals that do not fall under another type. |



In [None]:
# Named Entity Recognizer Function
def named_entity_recognizer(doc):

    words = list()
    named_entities = list()

    for ent in doc.ents:
        words.append(ent.text)
        named_entities.append(ent.label_)

    df = pd.DataFrame({"Word" : words,
                      "Named Entity" : named_entities})
    print(df)

In [None]:
# Example 1
# Creating the Doc Object
text = """The European Commission said on Thursday it disagreed with German advice to consumers to shun
British lamb until scientists determine whether mad cow disease can be transmitted to sheep. Germany’s
representative to the European Union’s veterinary committee Werner Zwingmann said on Wednesday
consumers should buy sheepmeat from countries other than Britain until the scientiﬁc advice was clearer."""

doc = nlp(text)
# Printing Named Entities
named_entity_recognizer(doc)

                      Word Named Entity
0  The European Commission          ORG
1                 Thursday         DATE
2                   German         NORP
3                  British         NORP
4                  Germany          GPE
5     the European Union’s          ORG
6         Werner Zwingmann       PERSON
7                Wednesday         DATE
8                  Britain          GPE


In [None]:
# Creating the Doc Object
text = """Albert Einstein was born in Ulm, in the Kingdom of Württemberg in the German Empire, on 14 March 1879.
His parents were Hermann Einstein, a salesman and engineer, and Pauline Koch. In 1880, the family moved to Munich, where Einstein's father
and his uncle Jakob founded Elektrotechnische Fabrik J. Einstein & Cie, a company that manufactured
electrical equipment based on direct current."""

doc = nlp(text)
# doc = nlp(text.lower())
# Printing Named Entities
named_entity_recognizer(doc)

                       Word Named Entity
0           albert einstein       PERSON
1               württemberg          GPE
2                    german         NORP
3             14 march 1879         DATE
4              pauline koch       PERSON
5                      1880         DATE
6                    munich          GPE
7  fabrik j. einstein & cie       PERSON


In [None]:
# table = """PERSON	People, including fictional.
# NORP	Nationalities or religious and political groups
# FAC	Buildings, airports, highways, bridges, etc.
# ORG	Companies, agencies, institutions, etc.
# GPE	Countries, cities, states.
# LOC	Non-GPE locations, mountain ranges, and bodies of water
# PRODUCT	Objects, vehicles, foods, etc. (Not services)
# EVENT	Named hurricanes, battles, wars, sports events, etc.
# WORK_OF_ART	Titles of books, songs, etc.
# LAW	Named documents made into laws.
# LANGUAGE	Any named language.
# DATE	Absolute or relative dates or periods.
# TIME	Times smaller than a day.
# PERCENT	Percentage, including ”%“.
# MONEY	Monetary values, including unit.
# QUANTITY	Measurements, as of weight or distance.
# ORDINAL	“first”, “second”, etc.
# CARDINAL	Numerals that do not fall under another type."""
# for line in table.split("\n"):
#     words = line.split("\t")
#     print("|", words[0], "|"," ".join(words[1:]), "|")

In [4]:
from spacy import displacy

def dependency_parser(doc):
    # Dependency Parsing Function
     displacy.render(doc, style="dep", jupyter= True)

# Creating the Doc Object
text = """I prefer the morning flight  through Denver."""
doc = nlp(text)
# Visualizing Dependency Parse
dependency_parser(doc)

In [None]:
# Creating the Doc Object
text = """Autonomous cars shift insurance liability toward manufacturers"""
doc = nlp(text)
# Visualizing Dependency Parse
dependency_parser(doc)

In [5]:
text = "Tesla Inc is one of the world's most valuable companies and remains the world's most valuable automaker with a market capitalization of more than US$840 billion."


displacy.render(nlp(text), jupyter=True, style='ent')

In [6]:
text = "Tesla is one of the world's most valuable companies and remains the world's most valuable automaker with a market capitalization of more than US$840 billion."
displacy.render(nlp(text), jupyter=True, style='ent')

In [7]:
text = "tesla inc is one of the world's most valuable companies and remains the world's most valuable automaker with a market capitalization of more than US$840 billion."
displacy.render(nlp(text), jupyter=True, style='ent')

In [8]:
text = "tesla is one of the world's most valuable companies and remains the world's most valuable automaker with a market capitalization of more than US$840 billion."
displacy.render(nlp(text), jupyter=True, style='ent')