In [3]:
import spacy
import textblob

In [None]:
#  Here We Will Deal With The TextBlob with Its Workings in Spacy.
# Textblob is amazing for TextProcessing

In [None]:
# Create a TextBlob

In [1]:
from textblob import TextBlob

In [2]:
text1 = TextBlob("Textblob is useful for textpreprocessing and works fantastically well in python.")

In [3]:
text1.tags  # POS Tagging

[('Textblob', 'NNP'),
 ('is', 'VBZ'),
 ('useful', 'JJ'),
 ('for', 'IN'),
 ('textpreprocessing', 'VBG'),
 ('and', 'CC'),
 ('works', 'VBZ'),
 ('fantastically', 'RB'),
 ('well', 'RB'),
 ('in', 'IN'),
 ('python', 'NN')]

In [4]:
text1.noun_phrases # Noun Phrase Extraction 

WordList(['textblob'])

In [None]:
# Sentiment Analysis

In [5]:
sent1 = TextBlob("Textblob is useful for text preprocessing")
sent1.sentiment
sent1.sentiment.polarity

0.3

In [None]:
# Tokenization

In [6]:
text1.words

WordList(['Textblob', 'is', 'useful', 'for', 'textpreprocessing', 'and', 'works', 'fantastically', 'well', 'in', 'python'])

In [7]:
text1.sentences

[Sentence("Textblob is useful for textpreprocessing and works fantastically well in python.")]

In [8]:
for s in text1.sentences:
    print(s.sentiment)

Sentiment(polarity=0.3, subjectivity=0.0)


In [9]:
# word inflection and lemmatization

In [10]:
sent2 = TextBlob("It is my favorite flower and it is prettier.")
sent2.words  # word object

WordList(['It', 'is', 'my', 'favorite', 'flower', 'and', 'it', 'is', 'prettier'])

In [11]:
sent2.words[4].pluralize()

'flowers'

In [12]:
sent2.words[4].singularize()

'flower'

In [None]:
# lemmatize

In [16]:
from textblob import Word
w = Word("sleepings")
w.lemmatize()

'sleeping'

In [17]:
w = Word("left")
w.lemmatize("v") # pass in WordNet pos(Verb)

'leave'

In [None]:
# WordNet Integration

In [18]:
from textblob import Word
from textblob.wordnet import VERB
word = Word("pen")
word.synsets

[Synset('pen.n.01'),
 Synset('pen.n.02'),
 Synset('playpen.n.01'),
 Synset('penitentiary.n.01'),
 Synset('pen.n.05'),
 Synset('write.v.01')]

In [21]:
Word("pen").get_synsets(pos=VERB)

[Synset('write.v.01')]

In [24]:
Word("pen").definitions  # for acquiring the definition

['a writing implement with a point from which ink flows',
 'an enclosure for confining livestock',
 'a portable enclosure in which babies may be left to play',
 'a correctional institution for those convicted of major crimes',
 'female swan',
 'produce a literary work']

In [None]:
# Creating Synsets directly

In [29]:
from textblob.wordnet import Synset
pen = Synset('pen.v.01')
pencil = Synset('pencil.v.01')
pen.path_similarity(pencil)

0.14285714285714285

In [27]:
# to check whether pencil has any similar definition with pen.
word = Word("pencil")
word.synsets

[Synset('pencil.n.01'),
 Synset('pencil.n.02'),
 Synset('pencil.n.03'),
 Synset('pencil.n.04'),
 Synset('pencil.v.01')]

In [28]:
Word("pencil").definitions

['a thin cylindrical pointed writing implement; a rod of marking substance encased in wood',
 'graphite (or a similar substance) used in such a way as to be a medium of communication',
 'a figure formed by a set of straight lines or light rays meeting at a point',
 'a cosmetic in a long thin stick; designed to be applied to a particular part of the face',
 'write, draw, or trace with a pencil']

In [None]:
# WordLists

In [30]:
art_obj = TextBlob("Pencil Pen Color Page Eraser")
art_obj.words

WordList(['Pencil', 'Pen', 'Color', 'Page', 'Eraser'])

In [31]:
art_obj.words.pluralize()

WordList(['Pencils', 'Pens', 'Colors', 'Pages', 'Erasers'])

In [None]:
# Spelling correction

In [33]:
text2 = TextBlob("Textblob reconize the erors.")
print(text2.correct())

Textblob recognize the errors.


In [34]:
from textblob import Word 

In [35]:
w = Word('efort') # returns a list of (word, confidence) with spelling suggestions.
w.spellcheck()

[('effort', 0.87248322147651), ('fort', 0.12751677852348994)]

In [None]:
# get word and noun phrase frequencies

In [40]:
text3 = TextBlob("Today Tomorrow Today Afterwards later later later maybe never.")
text3.word_counts['later']

3

In [43]:
text3.word_counts['Today'] # sometimes the words are not found thus the freq is displayed to be 0

0

In [41]:
text3.words.count('Today') # another way of getting the word count

2

In [44]:
text3.words.count('Today', case_sensitive=True)  # specify whether the search is case sensitive or not.

2

In [46]:
text2.noun_phrases.count('flower')

0

In [None]:
# Parsing

In [47]:
text4 = TextBlob("the method of parsing the sentences.")
print(text4.parse())

the/DT/B-NP/O method/NN/I-NP/O of/IN/B-PP/B-PNP parsing/VBG/B-VP/I-PNP the/DT/B-NP/I-PNP sentences/NNS/I-NP/I-PNP ././O/O


In [None]:
# Textblobs are like pyhton strings

In [48]:
sentence = "Textblob is amazing with python and spacy for the purpose of textpreprocessing."

In [49]:
sentence

'Textblob is amazing with python and spacy for the purpose of textpreprocessing.'

In [51]:
sentence[0:-1]

'Textblob is amazing with python and spacy for the purpose of textpreprocessing'

In [52]:
sentence.upper()

'TEXTBLOB IS AMAZING WITH PYTHON AND SPACY FOR THE PURPOSE OF TEXTPREPROCESSING.'

In [53]:
sentence.lower()

'textblob is amazing with python and spacy for the purpose of textpreprocessing.'

In [54]:
sentence.find("amazing")

12

In [None]:
# Comparing Textblob and strings

In [56]:
pen_blob = TextBlob('pen')
pencil_blob = TextBlob('pencil')
pen_blob < pencil_blob

True

In [57]:
pen_blob == 'pen'

True

In [58]:
pencil_blob < pen_blob

False

In [59]:
pencil_blob == 'pencil'

True

In [60]:
pen_blob == 'pencil'

False

In [None]:
# Concatenate and interpolate TextBlob and Strings

In [62]:
pen_blob + ' and/or ' + pencil_blob

TextBlob("pen and/or pencil")

In [63]:
"{0} and/or {1}".format(pen_blob, pencil_blob)

'pen and/or pencil'

In [None]:
#n-grams

In [64]:
blob_ = TextBlob("the text has no sentence.")
blob_.ngrams(n=3)

[WordList(['the', 'text', 'has']),
 WordList(['text', 'has', 'no']),
 WordList(['has', 'no', 'sentence'])]

In [65]:
blob_.ngrams(n=2)

[WordList(['the', 'text']),
 WordList(['text', 'has']),
 WordList(['has', 'no']),
 WordList(['no', 'sentence'])]

In [66]:
blob_.ngrams(n=4)

[WordList(['the', 'text', 'has', 'no']),
 WordList(['text', 'has', 'no', 'sentence'])]

In [None]:
# Start and End Indicides of Sentences

In [67]:
for s in text2.sentences:
    print(s)
    print("----- Starts at index {}, Ends at index {}".format(s.start, s.end))

Textblob reconize the erors.
----- Starts at index 0, Ends at index 28


In [None]:
# Detecting language

In [68]:
detect = TextBlob("hola")
detect.detect_language()

'es'

In [None]:
# Translation

In [69]:
trans_late = TextBlob("today is a lively")
trans_late.translate(to='de')  # de=German

TextBlob("Heute ist ein lebhafter")

In [70]:
# Define
W = Word("Graph")
W.define

<bound method Word.define of 'Graph'>