# Installing Text Blob

pip install textblob

In [1]:
from textblob import TextBlob

import requests
from urllib.request import urlopen

In [2]:
#Creating some fucntions

def get_text(url):
    try:
        return requests.get(url).text
    except:
        return urlopen(url).read().decode('utf8')

In [3]:
def get_speech(url):
    page = get_text(url)
    full_text = page.split('\n')
    return " ".join(full_text[2:])

In [4]:
clinton_url = 'https://raw.githubusercontent.com/sul-cidr/python_workshops/master/data/clinton2000.txt'
clinoton_speech = get_speech(clinton_url)
clinoton_speech

'Mr. Speaker, Mr. Vice President, members of Congress, honored guests, my fellow Americans:  We are fortunate to be alive at this moment in history. Never before has our nation enjoyed, at once, so much prosperity and social progress with so little internal crisis and so few external threats. Never before have we had such a blessed opportunity and, therefore, such a profound obligation to build the more perfect Union of our Founders’ dreams.  We begin the new century with over 20 million new jobs; the fastest economic growth in more than 30 years; the lowest unemployment rates in 30 years; the lowest poverty rates in 20 years; the lowest African-American and Hispanic unemployment rates on record; the first back-to-back surpluses in 42 years; and next month, America will achieve the longest period of economic growth in our entire history. We have built a new economy.  And our economic revolution has been matched by a revival of the American spirit: crime down by 20 percent, to its lowes

In [5]:
clinton_blob = TextBlob(clinoton_speech[:500])
clinton_blob.string == clinoton_speech[0:500]

True

In [6]:
clinton_blob

TextBlob("Mr. Speaker, Mr. Vice President, members of Congress, honored guests, my fellow Americans:  We are fortunate to be alive at this moment in history. Never before has our nation enjoyed, at once, so much prosperity and social progress with so little internal crisis and so few external threats. Never before have we had such a blessed opportunity and, therefore, such a profound obligation to build the more perfect Union of our Founders’ dreams.  We begin the new century with over 20 million new jobs")

In [7]:
clinton_blob.sentences

[Sentence("Mr. Speaker, Mr. Vice President, members of Congress, honored guests, my fellow Americans:  We are fortunate to be alive at this moment in history."),
 Sentence("Never before has our nation enjoyed, at once, so much prosperity and social progress with so little internal crisis and so few external threats."),
 Sentence("Never before have we had such a blessed opportunity and, therefore, such a profound obligation to build the more perfect Union of our Founders’ dreams."),
 Sentence("We begin the new century with over 20 million new jobs")]

In [8]:
clinton_blob.words

WordList(['Mr', 'Speaker', 'Mr', 'Vice', 'President', 'members', 'of', 'Congress', 'honored', 'guests', 'my', 'fellow', 'Americans', 'We', 'are', 'fortunate', 'to', 'be', 'alive', 'at', 'this', 'moment', 'in', 'history', 'Never', 'before', 'has', 'our', 'nation', 'enjoyed', 'at', 'once', 'so', 'much', 'prosperity', 'and', 'social', 'progress', 'with', 'so', 'little', 'internal', 'crisis', 'and', 'so', 'few', 'external', 'threats', 'Never', 'before', 'have', 'we', 'had', 'such', 'a', 'blessed', 'opportunity', 'and', 'therefore', 'such', 'a', 'profound', 'obligation', 'to', 'build', 'the', 'more', 'perfect', 'Union', 'of', 'our', 'Founders', '’', 'dreams', 'We', 'begin', 'the', 'new', 'century', 'with', 'over', '20', 'million', 'new', 'jobs'])

In [9]:
clinton_blob.noun_phrases

WordList(['mr.', 'mr.', 'vice president', 'congress', 'never', 'social progress', 'internal crisis', 'external threats', 'never', 'profound obligation', 'perfect union', 'founders', '’ dreams', 'new century', 'new jobs'])

In [10]:
clinton_blob.ngrams(n=5)

[WordList(['Mr', 'Speaker', 'Mr', 'Vice', 'President']),
 WordList(['Speaker', 'Mr', 'Vice', 'President', 'members']),
 WordList(['Mr', 'Vice', 'President', 'members', 'of']),
 WordList(['Vice', 'President', 'members', 'of', 'Congress']),
 WordList(['President', 'members', 'of', 'Congress', 'honored']),
 WordList(['members', 'of', 'Congress', 'honored', 'guests']),
 WordList(['of', 'Congress', 'honored', 'guests', 'my']),
 WordList(['Congress', 'honored', 'guests', 'my', 'fellow']),
 WordList(['honored', 'guests', 'my', 'fellow', 'Americans']),
 WordList(['guests', 'my', 'fellow', 'Americans', 'We']),
 WordList(['my', 'fellow', 'Americans', 'We', 'are']),
 WordList(['fellow', 'Americans', 'We', 'are', 'fortunate']),
 WordList(['Americans', 'We', 'are', 'fortunate', 'to']),
 WordList(['We', 'are', 'fortunate', 'to', 'be']),
 WordList(['are', 'fortunate', 'to', 'be', 'alive']),
 WordList(['fortunate', 'to', 'be', 'alive', 'at']),
 WordList(['to', 'be', 'alive', 'at', 'this']),
 WordList(

In [11]:
def count_chars(text):
    return sum(len(w) for w in TextBlob(text).words)

In [12]:
count_chars(" Hey there. This is Sudhanshu Live online")

33

In [13]:
clinton_blob.tags

[('Mr.', 'NNP'),
 ('Speaker', 'NNP'),
 ('Mr.', 'NNP'),
 ('Vice', 'NNP'),
 ('President', 'NNP'),
 ('members', 'NNS'),
 ('of', 'IN'),
 ('Congress', 'NNP'),
 ('honored', 'VBD'),
 ('guests', 'NNS'),
 ('my', 'PRP$'),
 ('fellow', 'JJ'),
 ('Americans', 'NNPS'),
 ('We', 'PRP'),
 ('are', 'VBP'),
 ('fortunate', 'JJ'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('alive', 'JJ'),
 ('at', 'IN'),
 ('this', 'DT'),
 ('moment', 'NN'),
 ('in', 'IN'),
 ('history', 'NN'),
 ('Never', 'RB'),
 ('before', 'RB'),
 ('has', 'VBZ'),
 ('our', 'PRP$'),
 ('nation', 'NN'),
 ('enjoyed', 'VBN'),
 ('at', 'IN'),
 ('once', 'RB'),
 ('so', 'RB'),
 ('much', 'JJ'),
 ('prosperity', 'NN'),
 ('and', 'CC'),
 ('social', 'JJ'),
 ('progress', 'NN'),
 ('with', 'IN'),
 ('so', 'RB'),
 ('little', 'JJ'),
 ('internal', 'JJ'),
 ('crisis', 'NN'),
 ('and', 'CC'),
 ('so', 'RB'),
 ('few', 'JJ'),
 ('external', 'JJ'),
 ('threats', 'NNS'),
 ('Never', 'RB'),
 ('before', 'RB'),
 ('have', 'VBP'),
 ('we', 'PRP'),
 ('had', 'VBD'),
 ('such', 'JJ'),
 ('a', 'DT'),
 (

In [14]:
for word, pos in clinton_blob.tags:
    print(word, pos)

Mr. NNP
Speaker NNP
Mr. NNP
Vice NNP
President NNP
members NNS
of IN
Congress NNP
honored VBD
guests NNS
my PRP$
fellow JJ
Americans NNPS
We PRP
are VBP
fortunate JJ
to TO
be VB
alive JJ
at IN
this DT
moment NN
in IN
history NN
Never RB
before RB
has VBZ
our PRP$
nation NN
enjoyed VBN
at IN
once RB
so RB
much JJ
prosperity NN
and CC
social JJ
progress NN
with IN
so RB
little JJ
internal JJ
crisis NN
and CC
so RB
few JJ
external JJ
threats NNS
Never RB
before RB
have VBP
we PRP
had VBD
such JJ
a DT
blessed JJ
opportunity NN
and CC
therefore RB
such PDT
a DT
profound JJ
obligation NN
to TO
build VB
the DT
more RBR
perfect JJ
Union NNP
of IN
our PRP$
Founders NNS
’ VBP
dreams NNS
We PRP
begin VBP
the DT
new JJ
century NN
with IN
over IN
20 CD
million CD
new JJ
jobs NNS


In [15]:
clinton_blob.parse()

'Mr./NNP/B-NP/O Speaker/NNP/I-NP/O ,/,/O/O Mr./NNP/B-NP/O Vice/NNP/I-NP/O President/NNP/I-NP/O ,/,/O/O members/NNS/B-NP/O of/IN/B-PP/B-PNP Congress/NNP/B-NP/I-PNP ,/,/O/O honored/VBN/B-VP/O guests/NNS/B-NP/O ,/,/O/O my/PRP$/B-NP/O fellow/NN/I-NP/O Americans/NNPS/I-NP/O :/:/O/O We/PRP/B-NP/O are/VBP/B-VP/O fortunate/JJ/B-ADJP/O to/TO/B-PP/O be/VB/B-VP/O alive/JJ/B-ADJP/O at/IN/B-PP/B-PNP this/DT/B-NP/I-PNP moment/NN/I-NP/I-PNP in/IN/B-PP/B-PNP history/NN/B-NP/I-PNP ././O/O\nNever/RB/B-ADVP/O before/IN/B-PP/O has/VBZ/B-VP/O our/PRP$/B-NP/O nation/NN/I-NP/O enjoyed/VBD/B-VP/O ,/,/O/O at/IN/B-PP/O once/RB/B-ADVP/O ,/,/O/O so/RB/B-NP/O much/JJ/I-NP/O prosperity/NN/I-NP/O and/CC/O/O social/JJ/B-NP/O progress/NN/I-NP/O with/IN/B-PP/B-PNP so/RB/B-NP/I-PNP little/JJ/I-NP/I-PNP internal/JJ/I-NP/I-PNP crisis/NN/I-NP/I-PNP and/CC/O/O so/RB/B-NP/O few/JJ/I-NP/O external/JJ/I-NP/O threats/NNS/I-NP/O ././O/O\nNever/RB/B-ADVP/O before/IN/B-PP/O have/VBP/B-VP/O we/PRP/B-NP/O had/VBD/B-VP/O such/JJ/B-AD

## Word Transformations

In [16]:
from textblob import Word

In [17]:
w = Word("alumni")
w

'alumni'

In [18]:
w.lemmatize()

'alumnus'

In [19]:
# pip install textblob.download_corpora

In [20]:
v = Word("packages")
v.lemmatize()

'package'

In [21]:
v

'packages'

In [22]:
clinton_blob.word_counts

defaultdict(int,
            {'mr': 2,
             'speaker': 1,
             'vice': 1,
             'president': 1,
             'members': 1,
             'of': 2,
             'congress': 1,
             'honored': 1,
             'guests': 1,
             'my': 1,
             'fellow': 1,
             'americans': 1,
             'we': 3,
             'are': 1,
             'fortunate': 1,
             'to': 2,
             'be': 1,
             'alive': 1,
             'at': 2,
             'this': 1,
             'moment': 1,
             'in': 1,
             'history': 1,
             'never': 2,
             'before': 2,
             'has': 1,
             'our': 2,
             'nation': 1,
             'enjoyed': 1,
             'once': 1,
             'so': 3,
             'much': 1,
             'prosperity': 1,
             'and': 3,
             'social': 1,
             'progress': 1,
             'with': 2,
             'little': 1,
             'internal': 1,
     

In [23]:
clinton_blob.word_counts['congress']

1

In [24]:
def get_lexicon(text, n):
    blob = TextBlob(text)
    return {word.lemma for word, tag in blob.tags
            if tag[0].lower() in ['n','j','v'] and blob.words.count(word)>= n}


In [25]:
get_lexicon(clinoton_speech, 25)

{'A',
 'America',
 'Children',
 'New',
 'Thank',
 'Tonight',
 'ask',
 'be',
 'child',
 'do',
 'have',
 'help',
 'make',
 'more',
 'new',
 'people',
 's',
 'thank',
 'tonight',
 'want',
 'work',
 'year',
 '’'}

# Sentiment Analysis

In [26]:
clinton_blob.sentiment

Sentiment(polarity=0.1688683712121212, subjectivity=0.4484848484848485)

In [28]:
for sentences in clinton_blob.sentences:
    print(sentences, sentences.sentiment.polarity)

Mr. Speaker, Mr. Vice President, members of Congress, honored guests, my fellow Americans:  We are fortunate to be alive at this moment in history. 0.25
Never before has our nation enjoyed, at once, so much prosperity and social progress with so little internal crisis and so few external threats. 0.049404761904761896
Never before have we had such a blessed opportunity and, therefore, such a profound obligation to build the more perfect Union of our Founders’ dreams. 0.3166666666666667
We begin the new century with over 20 million new jobs 0.13636363636363635


In [35]:
sad_sent = "Life is good"
sad_blob = TextBlob(sad_sent)
sad_blob.sentiment.polarity

0.7

In [40]:
from  textblob.sentiments import NaiveBayesAnalyzer

blob = TextBlob(clinoton_speech[:500],analyzer=NaiveBayesAnalyzer())

for sentence in blob.sentences:
    print(sentence, sentence.polarity)

Mr. Speaker, Mr. Vice President, members of Congress, honored guests, my fellow Americans:  We are fortunate to be alive at this moment in history. 0.25
Never before has our nation enjoyed, at once, so much prosperity and social progress with so little internal crisis and so few external threats. 0.049404761904761896
Never before have we had such a blessed opportunity and, therefore, such a profound obligation to build the more perfect Union of our Founders’ dreams. 0.3166666666666667
We begin the new century with over 20 million new jobs 0.13636363636363635


In [54]:
para = "Life is okay. Life is good. Life is amazing. life is superb. life sucks. I hate bad flavours. I hate brocolli. I love icecream. John love soda. Tom love tasty Soda"
sent_blob = TextBlob(para)

for sent in sent_blob.sentences:
    print(sent, sent.polarity)

Life is okay. 0.5
Life is good. 0.7
Life is amazing. 0.6000000000000001
life is superb. 1.0
life sucks. -0.3
I hate bad flavours. -0.75
I hate brocolli. -0.8
I love icecream. 0.5
John love soda. 0.5
Tom love tasty Soda 0.5


In [55]:
sent_blob_nb = TextBlob(para, analyzer=NaiveBayesAnalyzer())
for sent in sent_blob_nb.sentences:
    print(sent, sent.sentiment)

Life is okay. Sentiment(classification='pos', p_pos=0.56821946288025, p_neg=0.4317805371197495)
Life is good. Sentiment(classification='pos', p_pos=0.5995917017800413, p_neg=0.4004082982199584)
Life is amazing. Sentiment(classification='pos', p_pos=0.7119750687268934, p_neg=0.2880249312731062)
life is superb. Sentiment(classification='pos', p_pos=0.8430652769324404, p_neg=0.15693472306755937)
life sucks. Sentiment(classification='neg', p_pos=0.12196027933237585, p_neg=0.8780397206676244)
I hate bad flavours. Sentiment(classification='neg', p_pos=0.3562265388680344, p_neg=0.6437734611319655)
I hate brocolli. Sentiment(classification='pos', p_pos=0.523148148148148, p_neg=0.4768518518518517)
I love icecream. Sentiment(classification='pos', p_pos=0.7817055393586007, p_neg=0.21829446064139904)
John love soda. Sentiment(classification='neg', p_pos=0.29335973086122363, p_neg=0.7066402691387763)
Tom love tasty Soda Sentiment(classification='neg', p_pos=0.4597971218038753, p_neg=0.5402028781961

In [58]:
# Creating an average sentiment

def avg_sentiment(text):
    sentences = TextBlob(text, analyzer=NaiveBayesAnalyzer()).sentences
    total = len(sentences)
    sent= sum(s.sentiment.p_pos for s in sentences)
    return sent/total

In [59]:
para = "Life is okay. Life is good. Life is amazing. life is superb. life sucks. I hate bad flavours. I hate brocolli. I love icecream. John love soda. Tom love tasty Soda"
avg_sentiment(para)

0.5259048868691883

In [60]:
avg_sentiment(clinoton_speech)

0.718676998227738

In [61]:
clinton_url = "https://raw.githubusercontent.com/sul-cidr/python_workshops/master/data/clinton2000.txt"
bush_url = "https://raw.githubusercontent.com/sul-cidr/python_workshops/master/data/bush2008.txt"
obama_url = "https://raw.githubusercontent.com/sul-cidr/python_workshops/master/data/obama2016.txt"
trump_url = "https://raw.githubusercontent.com/sul-cidr/python_workshops/master/data/trump.txt"

In [62]:
bush_speech = get_speech(bush_url)
bush_speech

'Madam Speaker, Vice President Cheney, members of Congress, distinguished guests, and fellow citizens:  Seven years have passed since I first stood before you at this rostrum. In that time, our country has been tested in ways none of us could have imagined. We faced hard decisions about peace and war, rising competition in the world economy, and the health and welfare of our citizens. These issues call for vigorous debate, and I think it\'s fair to say, we\'ve answered the call. Yet history will record that amid our differences, we acted with purpose, and together we showed the world the power and resilience of American self-government.  All of us were sent to Washington to carry out the people\'s business. That is the purpose of this body. It is the meaning of our oath. It remains our charge to keep.  The actions of the 110th Congress will affect the security and prosperity of our nation long after this session has ended. In this election year, let us show our fellow Americans that we

In [63]:
avg_sentiment(bush_speech)

0.8006456103438159

In [64]:
obama_speech = get_speech(obama_url)
avg_sentiment(obama_speech)

0.7064523842839437

In [None]:
trump_speech = get_speech(trump_url)
avg_sentiment(trump_speech)