In [3]:
import tweepy           # To consume Twitter's API
import pandas as pd     # To handle data
import numpy as np      # For number computing

# For plotting and visualization:
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
#import our access keys:
from credentials import *    # This will allow us to use the keys as variables

In [5]:
# API's setup:
def twitter_setup():
    """
    Utility function to setup the Twitter's API
    with our access keys provided.
    """
    # Authentication and access using keys:
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

    # Return API with authentication:
    api = tweepy.API(auth)
    return api

In [6]:
#create an extractor object:
extractor = twitter_setup()

#create a tweet list as follows:
tweets = extractor.user_timeline(screen_name="realDonaldTrump", count=200)
print("Number of tweets extracted: {}.\n".format(len(tweets)))

Number of tweets extracted: 200.



In [10]:
#create a pandas dataframe to store tweets:
data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])

#display the first 10 elements of the dataframe:
display(data.head(10))

Unnamed: 0,Tweets
0,“Statement by President Trump on the Apprehens...
1,....earth shattering. He and his brother could...
2,"The biggest story yesterday, the one that has ..."
3,I hope people will start to focus on our Massi...
4,....came to the campaign. Few people knew the ...
5,The Fake News is working overtime. As Paul Man...
6,"....Also, there is NO COLLUSION!"
7,"Sorry, but this is years ago, before Paul Mana..."
8,Great job by MichaelCaputo on @foxandfriends.
9,"Report out that Obama Campaign paid $972,000 t..."


In [11]:
data['len']  = np.array([len(tweet.text) for tweet in tweets])
data['ID']   = np.array([tweet.id for tweet in tweets])
data['Date'] = np.array([tweet.created_at for tweet in tweets])
data['Source'] = np.array([tweet.source for tweet in tweets])
data['Likes']  = np.array([tweet.favorite_count for tweet in tweets])
data['RTs']    = np.array([tweet.retweet_count for tweet in tweets])

In [12]:
data.head()

Unnamed: 0,Tweets,len,ID,Date,Source,Likes,RTs
0,“Statement by President Trump on the Apprehens...,134,925373392827666432,2017-10-31 14:46:24,Twitter for iPhone,11073,3269
1,....earth shattering. He and his brother could...,134,925364408364171265,2017-10-31 14:10:42,Twitter for iPhone,21357,5429
2,"The biggest story yesterday, the one that has ...",140,925363340553211909,2017-10-31 14:06:27,Twitter for iPhone,24579,6886
3,I hope people will start to focus on our Massi...,136,925336826717593600,2017-10-31 12:21:06,Twitter for iPhone,41108,10236
4,....came to the campaign. Few people knew the ...,139,925335577217683456,2017-10-31 12:16:08,Twitter for iPhone,32485,8115


In [13]:
#create time series for data. Not usefule so far:
tlen = pd.Series(data=data['len'].values, index=data['Date'])
tfav = pd.Series(data=data['Likes'].values, index=data['Date'])
tret = pd.Series(data=data['RTs'].values, index=data['Date'])

In [15]:
from textblob import TextBlob
import re
#clean the text in a tweet by removing links and special characters using regex.
def clean_tweet(tweet):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
#classify the polarity of a tweet using textblob.
def analize_sentiment(tweet):
    analysis = TextBlob(clean_tweet(tweet))
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1

In [16]:
#create a column with the result of the analysis:
data['SA'] = np.array([ analize_sentiment(tweet) for tweet in data['Tweets'] ])

In [35]:
data.head(20)

Unnamed: 0,Tweets,len,ID,Date,Source,Likes,RTs,SA
0,“Statement by President Trump on the Apprehens...,134,925373392827666432,2017-10-31 14:46:24,Twitter for iPhone,11073,3269,-1
1,....earth shattering. He and his brother could...,134,925364408364171265,2017-10-31 14:10:42,Twitter for iPhone,21357,5429,-1
2,"The biggest story yesterday, the one that has ...",140,925363340553211909,2017-10-31 14:06:27,Twitter for iPhone,24579,6886,-1
3,I hope people will start to focus on our Massi...,136,925336826717593600,2017-10-31 12:21:06,Twitter for iPhone,41108,10236,0
4,....came to the campaign. Few people knew the ...,139,925335577217683456,2017-10-31 12:16:08,Twitter for iPhone,32485,8115,-1
5,The Fake News is working overtime. As Paul Man...,140,925333956110757888,2017-10-31 12:09:41,Twitter for iPhone,34041,8787,-1
6,"....Also, there is NO COLLUSION!",32,925006418989715456,2017-10-30 14:28:10,Twitter for iPhone,87131,22172,0
7,"Sorry, but this is years ago, before Paul Mana...",143,925005659569041409,2017-10-30 14:25:09,Twitter for iPhone,115978,34284,-1
8,Great job by MichaelCaputo on @foxandfriends.,45,924966722544848896,2017-10-30 11:50:26,Twitter for iPhone,40596,7353,1
9,"Report out that Obama Campaign paid $972,000 t...",132,924963492645437441,2017-10-30 11:37:36,Twitter for iPhone,102496,32613,1


In [18]:
import pysentiment as ps

In [42]:
lm = ps.LM()
tokens = lm.tokenize(data['Tweets'][7])  
score = lm.get_score(tokens)

In [43]:
score

{'Negative': 0, 'Polarity': 0.0, 'Positive': 0, 'Subjectivity': 0.0}

In [24]:
data['Tweets'][0]

'“Statement by President Trump on the Apprehension of Mustafa al-Imam for His Alleged Role in Benghazi Attacks” https://t.co/2U7WJmfGlM'