## Notebook Goals

The goal of this notebook is to connect to the twitter API and do some basic NLP analysis on the U.S. presidential candidates mentioned in the table below.

## Candidate List

| **Republican**  |**Twitter Handle** |
|:--------------- |:----------------- |
|Donald Trump     |@realDonaldTrump   |



| **Democrat**    |**Twitter Handle** |
|:--------------- |:----------------- |
|Joe Biden        |@JoeBiden          |
|Elizabeth Warren |@ewarren           |
|Bernie Sanders   |@BernieSanders     |
|Pete Buttigieg   |@PeteButtigieg     |
|Tulsi Gabbard    |@TulsiGabbard      |
|Amy Klobuchar    |@amyklobuchar      |
|Kamala Harris    |@KamalaHarris      |
|Andrew Yang      |@AndrewYang        |
|Corey Booker     |@CoreyBooker       |
|Michael Bloomberg|@MikeBloomberg     |
|Tom Steyer       |@TomSteyer         |


### Twitter Dev Information


In [1]:
# Local file contains twitter API Keys
import config

ModuleNotFoundError: No module named 'config'

In [2]:
import tweepy
from textblob import TextBlob 
import re
import numpy as np
import pandas as pd
import datetime

In [8]:
TWITTER_AUTH = tweepy.OAuthHandler(config.TWITTER_CONSUMER_KEY,
                                   config.TWITTER_CONSUMER_SECRET)
TWITTER_AUTH.set_access_token(config.TWITTER_ACCESS_TOKEN,
                              config.TWITTER_ACCESS_TOKEN_SECRET)

In [9]:
TWITTER = tweepy.API(TWITTER_AUTH)

In [10]:
donaldTrump = TWITTER.get_user('realDonaldTrump')
joeBiden = TWITTER.get_user('JoeBiden')
elizabethWarren =  TWITTER.get_user('ewarren')
bernieSanders = TWITTER.get_user('BernieSanders')


In [11]:
donaldTrump

User(_api=<tweepy.api.API object at 0x1a17deb710>, _json={'id': 25073877, 'id_str': '25073877', 'name': 'Donald J. Trump', 'screen_name': 'realDonaldTrump', 'location': 'Washington, DC', 'profile_location': None, 'description': '45th President of the United States of America🇺🇸', 'url': 'https://t.co/OMxB0x7xC5', 'entities': {'url': {'urls': [{'url': 'https://t.co/OMxB0x7xC5', 'expanded_url': 'http://www.Instagram.com/realDonaldTrump', 'display_url': 'Instagram.com/realDonaldTrump', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 67304350, 'friends_count': 47, 'listed_count': 111078, 'created_at': 'Wed Mar 18 13:46:38 +0000 2009', 'favourites_count': 7, 'utc_offset': None, 'time_zone': None, 'geo_enabled': True, 'verified': True, 'statuses_count': 46679, 'lang': None, 'status': {'created_at': 'Sat Dec 07 00:28:48 +0000 2019', 'id': 1203109065070665728, 'id_str': '1203109065070665728', 'text': 'Why is the World Bank loaning money to China? Can 

In [32]:
warrenTweets = elizabethWarren.timeline(
      count=250, exclude_replies=True, include_rts=False,
      tweet_mode='extended')

In [33]:
warrenTweets[86].full_text

'I knew Mark Zandi was skeptical, so I had him check the numbers on my plan to pay for #MedicareForAll. He confirmed they add up. https://t.co/drGc6eGNCD'

## Taken from Geeks for Geeks

In [34]:
class TwitterClient(object): 
    ''' 
    Generic Twitter Class for sentiment analysis. 
    '''
    def __init__(self): 
        ''' 
        Class constructor or initialization method. 
        '''
        # keys and tokens from the Twitter Dev Console 
        consumer_key = '4iNaXAi4Fj5IzvGk1LyUcb5hf'
        consumer_secret = '58nT5sWiKRGvh4RKpD6Vgxeq9K3Fs9VaFVHpdFEavT7tgnSSvA'
        access_token = '1140717029957222401-deCFUkHbshjBn3j9J9SeMEJ7cQjafx'
        access_token_secret = 'xozAhB01RpR934YABm2NPns6SMpff1hLI7XGIhzFyDubx'
  
        # attempt authentication 
        try: 
            # create OAuthHandler object 
            self.auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 
            # set access token and secret 
            self.auth.set_access_token(access_token, access_token_secret) 
            # create tweepy API object to fetch tweets 
            self.api = tweepy.API(self.auth) 
        except: 
            print("Error: Authentication Failed") 
  
    def clean_tweet(self, tweet): 
        ''' 
        Utility function to clean tweet text by removing links, special characters 
        using simple regex statements. 
        '''
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split()) 
  
    def get_tweet_sentiment(self, tweet): 
        ''' 
        Utility function to classify sentiment of passed tweet 
        using textblob's sentiment method 
        '''
        # create TextBlob object of passed tweet text 
        analysis = TextBlob(self.clean_tweet(tweet)) 
        # set sentiment 
        if analysis.sentiment.polarity > 0: 
            return 'positive'
        elif analysis.sentiment.polarity == 0: 
            return 'neutral'
        else: 
            return 'negative'
  
    def get_tweets(self, query, count = 10): 
        ''' 
        Main function to fetch tweets and parse them. 
        '''
        # empty list to store parsed tweets 
        tweets = [] 
  
        try: 
            # call twitter api to fetch tweets 
            fetched_tweets = self.api.search(q = query, count = count) 
  
            # parsing tweets one by one 
            for tweet in fetched_tweets: 
                # empty dictionary to store required params of a tweet 
                parsed_tweet = {} 
  
                # saving text of tweet 
                parsed_tweet['text'] = tweet.text 
                # saving sentiment of tweet 
                parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text) 
  
                # appending parsed tweet to tweets list 
                if tweet.retweet_count > 0: 
                    # if tweet has retweets, ensure that it is appended only once 
                    if parsed_tweet not in tweets: 
                        tweets.append(parsed_tweet) 
                else: 
                    tweets.append(parsed_tweet) 
  
            # return parsed tweets 
            return tweets 
  
        except tweepy.TweepError as e: 
            # print error (if any) 
            print("Error : " + str(e))

In [35]:
def reportPositiveAndNegativeTweetPercentage(targetName): 
    # creating object of TwitterClient Class 
    api = TwitterClient() 
    # calling function to get tweets 
    tweets = api.get_tweets(query = targetName, count = 200) 
  
    # picking positive tweets from tweets 
    ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive'] 
    # percentage of positive tweets 
    print("{}'s positive tweets percentage: {} %".format(targetName, 100*len(ptweets)/len(tweets))) 
    # picking negative tweets from tweets 
    ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative'] 
    # percentage of negative tweets 
    print("{}'s negative tweets percentage: {} %".format(targetName, 100*len(ntweets)/len(tweets))) 
    # percentage of neutral tweets 
    #print("Neutral tweets percentage: {} % ".format(100*(len(tweets) - len(ntweets) - len(ptweets)))/len(tweets)) 
    

In [11]:
# printing first 5 positive tweets 
print("\n\nPositive tweets:") 
for tweet in ptweets[:10]: 
    print(tweet['text'])
    
print('---------------------------------------')

# printing first 5 negative tweets 
print("\n\nNegative tweets:") 
for tweet in ntweets[:10]: 
    print(tweet['text']) 



Positive tweets:


NameError: name 'ptweets' is not defined

In [16]:
for targetName in targetNameList:
    reportPositiveAndNegativeTweetPercentage(targetName)
    print('-----------------------------------------------------')
    print('|||||||||||||||||||||||||||||||||||||||||||||||||||||')
    print('-----------------------------------------------------')

Donald Trump's positive tweets percentage: 23.68421052631579 %
Donald Trump's negative tweets percentage: 27.63157894736842 %
-----------------------------------------------------
|||||||||||||||||||||||||||||||||||||||||||||||||||||
-----------------------------------------------------
Joe Biden's positive tweets percentage: 33.9622641509434 %
Joe Biden's negative tweets percentage: 24.528301886792452 %
-----------------------------------------------------
|||||||||||||||||||||||||||||||||||||||||||||||||||||
-----------------------------------------------------
Elizabeth Warren's positive tweets percentage: 35.416666666666664 %
Elizabeth Warren's negative tweets percentage: 18.75 %
-----------------------------------------------------
|||||||||||||||||||||||||||||||||||||||||||||||||||||
-----------------------------------------------------
Bernie Sanders's positive tweets percentage: 25.0 %
Bernie Sanders's negative tweets percentage: 10.0 %
-----------------------------------------

In [36]:
targetNameList = [
    'Donald Trump',
    'Joe Biden',
    'Elizabeth Warren',
    'Bernie Sanders',
    'Pete Buttigieg',
    'Tulsi Gabbard',
    'Amy Klobuchar',
    'Kamala Harris',
    'Andrew Yang',
    'Corey Booker',
    'Michael Bloomberg',
    'Tom Steyer'
]

In [37]:
presidentialCandidateSentimentTracker = {
    'name': [],
    'timestamp': [],
    'Positive Sentiment Percentage': [],
    'Negative Sentiment Percentage': []
}

In [38]:
def recordPositiveTweetPercentage(targetName): 
    # creating object of TwitterClient Class 
    api = TwitterClient() 
    # calling function to get tweets 
    tweets = api.get_tweets(query = targetName, count = 200) 
  
    # picking positive tweets from tweets 
    posTweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive'] 
    # percentage of positive tweets 
    posTweetsPercentage = 100*len(posTweets)/len(tweets)
    
    return posTweetsPercentage
    

In [39]:
def recordNegativeTweetPercentage(targetName):
    # creating object of TwitterClient Class 
    api = TwitterClient() 
    # calling function to get tweets 
    tweets = api.get_tweets(query = targetName, count = 200) 
    
    # picking negative tweets from tweets 
    negTweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative'] 
    # percentage of negative tweets 
    negTweetsPercentage = 100*len(negTweets)/len(tweets)
    
    return negTweetsPercentage

In [40]:
actionCounter = 0

In [41]:
import time as t
from datetime import datetime
import pandas

In [42]:
while actionCounter != 6:
    for target in targetNameList:
            presidentialCandidateSentimentTracker['name'].append(target)
            presidentialCandidateSentimentTracker['timestamp'].append(datetime.now())
            presidentialCandidateSentimentTracker['Positive Sentiment Percentage'].append(recordPositiveTweetPercentage(target))
            presidentialCandidateSentimentTracker['Negative Sentiment Percentage'].append(recordNegativeTweetPercentage(target))    
    actionCounter+=1
    print(presidentialCandidateSentimentTracker)
    t.sleep(600)

{'name': ['Donald Trump', 'Joe Biden', 'Elizabeth Warren', 'Bernie Sanders', 'Pete Buttigieg', 'Tulsi Gabbard', 'Amy Klobuchar', 'Kamala Harris', 'Andrew Yang', 'Corey Booker', 'Michael Bloomberg', 'Tom Steyer'], 'timestamp': [datetime.datetime(2019, 11, 20, 21, 52, 31, 371023), datetime.datetime(2019, 11, 20, 21, 52, 32, 800462), datetime.datetime(2019, 11, 20, 21, 52, 34, 213570), datetime.datetime(2019, 11, 20, 21, 52, 35, 669507), datetime.datetime(2019, 11, 20, 21, 52, 37, 349844), datetime.datetime(2019, 11, 20, 21, 52, 38, 678261), datetime.datetime(2019, 11, 20, 21, 52, 40, 187704), datetime.datetime(2019, 11, 20, 21, 52, 41, 694149), datetime.datetime(2019, 11, 20, 21, 52, 43, 240999), datetime.datetime(2019, 11, 20, 21, 52, 44, 680465), datetime.datetime(2019, 11, 20, 21, 52, 46, 38206), datetime.datetime(2019, 11, 20, 21, 52, 47, 414074)], 'Positive Sentiment Percentage': [27.419354838709676, 28.0, 34.24657534246575, 35.483870967741936, 40.0, 23.333333333333332, 32.0, 19.354

{'name': ['Donald Trump', 'Joe Biden', 'Elizabeth Warren', 'Bernie Sanders', 'Pete Buttigieg', 'Tulsi Gabbard', 'Amy Klobuchar', 'Kamala Harris', 'Andrew Yang', 'Corey Booker', 'Michael Bloomberg', 'Tom Steyer', 'Donald Trump', 'Joe Biden', 'Elizabeth Warren', 'Bernie Sanders', 'Pete Buttigieg', 'Tulsi Gabbard', 'Amy Klobuchar', 'Kamala Harris', 'Andrew Yang', 'Corey Booker', 'Michael Bloomberg', 'Tom Steyer', 'Donald Trump', 'Joe Biden', 'Elizabeth Warren', 'Bernie Sanders', 'Pete Buttigieg', 'Tulsi Gabbard', 'Amy Klobuchar', 'Kamala Harris', 'Andrew Yang', 'Corey Booker', 'Michael Bloomberg', 'Tom Steyer', 'Donald Trump', 'Joe Biden', 'Elizabeth Warren', 'Bernie Sanders', 'Pete Buttigieg', 'Tulsi Gabbard', 'Amy Klobuchar', 'Kamala Harris', 'Andrew Yang', 'Corey Booker', 'Michael Bloomberg', 'Tom Steyer', 'Donald Trump', 'Joe Biden', 'Elizabeth Warren', 'Bernie Sanders', 'Pete Buttigieg', 'Tulsi Gabbard', 'Amy Klobuchar', 'Kamala Harris', 'Andrew Yang', 'Corey Booker', 'Michael Bloomb

In [58]:
posSentimentList = []
negSentimentList = []
for entry in presidentialCandidateSentimentTracker:
    posSentimentList.append(presidentialCandidateSentimentTracker[entry][0])
    negSentimentList.append(presidentialCandidateSentimentTracker[entry][1])

## Plotting

In [59]:
import matplotlib.pyplot as plt

In [183]:
presidentialCandidateSentimentTracker

{'name': ['Donald Trump',
  'Joe Biden',
  'Elizabeth Warren',
  'Bernie Sanders',
  'Pete Buttigieg',
  'Tulsi Gabbard',
  'Amy Klobuchar',
  'Kamala Harris',
  'Andrew Yang',
  'Corey Booker',
  'Michael Bloomberg',
  'Tom Steyer',
  'Donald Trump',
  'Joe Biden',
  'Elizabeth Warren',
  'Bernie Sanders',
  'Pete Buttigieg',
  'Tulsi Gabbard',
  'Amy Klobuchar',
  'Kamala Harris',
  'Andrew Yang',
  'Corey Booker',
  'Michael Bloomberg',
  'Tom Steyer',
  'Donald Trump',
  'Joe Biden',
  'Elizabeth Warren',
  'Bernie Sanders',
  'Pete Buttigieg',
  'Tulsi Gabbard',
  'Amy Klobuchar',
  'Kamala Harris',
  'Andrew Yang',
  'Corey Booker',
  'Michael Bloomberg',
  'Tom Steyer',
  'Donald Trump',
  'Joe Biden',
  'Elizabeth Warren',
  'Bernie Sanders',
  'Pete Buttigieg',
  'Tulsi Gabbard',
  'Amy Klobuchar',
  'Kamala Harris',
  'Andrew Yang',
  'Corey Booker',
  'Michael Bloomberg',
  'Tom Steyer',
  'Donald Trump',
  'Joe Biden',
  'Elizabeth Warren',
  'Bernie Sanders',
  'Pete Butti

In [46]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [43]:
df = pd.DataFrame(presidentialCandidateSentimentTracker)

In [50]:
df[df['name']=='Kamala Harris']

Unnamed: 0,name,timestamp,Positive Sentiment Percentage,Negative Sentiment Percentage
7,Kamala Harris,2019-11-20 21:52:41.694149,19.354839,39.68254
19,Kamala Harris,2019-11-20 22:02:58.688369,24.285714,37.142857
31,Kamala Harris,2019-11-20 22:13:14.910944,28.125,31.746032
43,Kamala Harris,2019-11-20 22:23:31.626915,27.868852,24.590164
55,Kamala Harris,2019-11-20 22:33:48.244718,39.0625,19.047619
67,Kamala Harris,2019-11-20 22:44:05.087092,27.777778,20.754717


In [51]:
df[df['name']=='Pete Buttigieg']

Unnamed: 0,name,timestamp,Positive Sentiment Percentage,Negative Sentiment Percentage
4,Pete Buttigieg,2019-11-20 21:52:37.349844,40.0,33.333333
16,Pete Buttigieg,2019-11-20 22:02:54.427637,33.823529,23.880597
28,Pete Buttigieg,2019-11-20 22:13:10.984172,38.983051,23.728814
40,Pete Buttigieg,2019-11-20 22:23:27.563050,33.783784,21.917808
52,Pete Buttigieg,2019-11-20 22:33:44.061301,38.983051,26.666667
64,Pete Buttigieg,2019-11-20 22:44:00.549075,29.850746,28.125


In [54]:
df[df['name']=='Tulsi Gabbard']

Unnamed: 0,name,timestamp,Positive Sentiment Percentage,Negative Sentiment Percentage
5,Tulsi Gabbard,2019-11-20 21:52:38.678261,23.333333,18.644068
17,Tulsi Gabbard,2019-11-20 22:02:55.694670,25.925926,26.25
29,Tulsi Gabbard,2019-11-20 22:13:12.317247,40.816327,21.153846
41,Tulsi Gabbard,2019-11-20 22:23:28.903375,44.067797,17.241379
53,Tulsi Gabbard,2019-11-20 22:33:45.543108,34.042553,25.531915
65,Tulsi Gabbard,2019-11-20 22:44:02.052408,36.363636,25.0
