In [1]:
import re

from textblob import TextBlob

In [None]:
# helper function to clean tweets
def processTweet(tweet):
    # Remove HTML special entities (e.g. &amp;)
    tweet = re.sub(r'\&\w*;', '', tweet)
    #Convert @username to AT_USER
    tweet = re.sub('@[^\s]+','',tweet)
    # Remove tickers
    tweet = re.sub(r'\$\w*', '', tweet)
    # To lowercase
    tweet = tweet.lower()
    # Remove hyperlinks
    tweet = re.sub(r'https?:\/\/.*\/\w*', '', tweet)
    # Remove hashtags
    tweet = re.sub(r'#\w*', '', tweet)
    # Remove Punctuation and split 's, 't, 've with a space for filter
    # tweet = re.sub(r'[' + punctuation.replace('@', '') + ']+', ' ', tweet)
    # Remove words with 2 or fewer letters
    tweet = re.sub(r'\b\w{1,2}\b', '', tweet)
    # Remove whitespace (including new line characters)
    tweet = re.sub(r'\s\s+', ' ', tweet)
    # Remove single space remaining at the front of the tweet.
    tweet = tweet.lstrip(' ') 
    # Remove characters beyond Basic Multilingual Plane (BMP) of Unicode:
    tweet = ''.join(c for c in tweet if c <= '\uFFFF') 
    return tweet

In [2]:
tweet_1 = """#angry 

No temperament to be President.

#unhinged

@CoryBooker is a unfit to be President 

#DemocraticDebate https://t.co/e6tWmcZuYx
"""
tweet_2 = "@JohnFromOhio7 @TulsiGabbard @SCDemocrats When she takes the gloves off in the debates and shreds the war loving bitches that are the MSDNC (save for Bernie)"
tweet_3 = "@jwhoopes2 @DearAuntCrabby @amjoyshow @CoryBooker I think most of the candidates would make excellent cabinet officials. I like Cory Booker very much. I just pray the debates don’t turn into an attack festival."

In [8]:
tweet = TextBlob(tweet_1)
tweet.sentiment

Sentiment(polarity=-0.5, subjectivity=1.0)

In [None]:
tweet = TextBlob(tweet_2)
tweet.sentiment

In [None]:
tweet = TextBlob(tweet_3)
tweet.sentiment

In [None]:
tweet.noun_phrases

In [None]:
tweet.words

In [None]:
tweet.sentences

In [None]:
tweet_4 = "It’s a busy weekend in South Carolina, but there’s always time for a birthday song. Happy birthday, @ewarren! https://t.co/p9noaO5Zxe"

In [None]:
tweet = TextBlob(tweet_4)
tweet.sentiment

In [None]:
tweet_5 = "RT @Bizzysims: @CoryBooker How is this dumbass who has a 0% chance even on my twitter feed"

In [None]:
tweet = TextBlob(tweet_5)
tweet.sentiment

In [None]:
tweet.words

In [14]:
tweet = TextBlob("If my history is correct @ASDem, is the first African-American male to run a presidential campaign for a major party candidate. @CoryBooker's manager is a fantastic strategist and joined us on @TheElectables podcast this week. Enjoy! https://t.co/Aaydbaxv8A")
tweet.sentiment

Sentiment(polarity=0.2425, subjectivity=0.4666666666666667)

In [None]:
tweet.noun_phrases

In [None]:
processTweet(tweet_5)

In [None]:
tweet = TextBlob(processTweet(tweet_5).replace('%', ''))
tweet.sentiment

In [None]:
tweet

In [3]:
from bs4 import BeautifulSoup
from nltk.tokenize import WordPunctTokenizer

tok = WordPunctTokenizer()
pat1 = r'@[A-Za-z0-9]+'
pat2 = r'https?://[A-Za-z0-9./]+'
combined_pat = r'|'.join((pat1, pat2))
def tweet_cleaner(text):
    soup = BeautifulSoup(text, 'lxml')
    souped = soup.get_text()
    stripped = re.sub(combined_pat, '', souped)
    try:
        clean = stripped.decode("utf-8-sig").replace(u"\ufffd", "?")
    except:
        clean = stripped
    letters_only = re.sub("[^a-zA-Z]", " ", clean)
    lower_case = letters_only.lower()
    # During the letters_only process two lines above, it has created unnecessay white spaces,
    # I will tokenize and join together to remove unneccessary white spaces
    words = tok.tokenize(lower_case)
    return (" ".join(words)).strip()

In [4]:
tweet_cleaner(tweet_1)

'angry no temperament to be president unhinged is a unfit to be president democraticdebate'

In [5]:
tweet_1

'#angry \n\nNo temperament to be President.\n\n#unhinged\n\n@CoryBooker is a unfit to be President \n\n#DemocraticDebate https://t.co/e6tWmcZuYx\n'

In [7]:
tweet = TextBlob(tweet_cleaner(tweet_1))
tweet.sentiment

Sentiment(polarity=-0.5, subjectivity=1.0)

In [9]:
t = "If my history is correct @ASDem, is the first African-American male to run a presidential campaign for a major party candidate. @CoryBooker's manager is a fantastic strategist and joined us on @TheElectables podcast this week. Enjoy! https://t.co/Aaydbaxv8A"

In [11]:
tweet_cleaner(t)

'if my history is correct is the first african american male to run a presidential campaign for a major party candidate s manager is a fantastic strategist and joined us on podcast this week enjoy'

In [12]:
t

"If my history is correct @ASDem, is the first African-American male to run a presidential campaign for a major party candidate. @CoryBooker's manager is a fantastic strategist and joined us on @TheElectables podcast this week. Enjoy! https://t.co/Aaydbaxv8A"

In [13]:
tweet = TextBlob(tweet_cleaner(t))
tweet.sentiment

Sentiment(polarity=0.15892857142857145, subjectivity=0.33333333333333337)

In [1]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [5]:
tweet_1

'#angry \n\nNo temperament to be President.\n\n#unhinged\n\n@CoryBooker is a unfit to be President \n\n#DemocraticDebate https://t.co/e6tWmcZuYx\n'

In [6]:
tweet_2

'@JohnFromOhio7 @TulsiGabbard @SCDemocrats When she takes the gloves off in the debates and shreds the war loving bitches that are the MSDNC (save for Bernie)'

In [9]:
tweet_3

'@jwhoopes2 @DearAuntCrabby @amjoyshow @CoryBooker I think most of the candidates would make excellent cabinet officials. I like Cory Booker very much. I just pray the debates don’t turn into an attack festival.'

In [10]:
analyser.polarity_scores(tweet_3)

{'neg': 0.079, 'neu': 0.614, 'pos': 0.306, 'compound': 0.8344}

In [8]:
score

{'neg': 0.231, 'neu': 0.653, 'pos': 0.116, 'compound': -0.5994}