#### import libraries
install necessary packages and import the relevant libraries

In [None]:
# !pip install tweepy
# !pip install credentials
# !pip install textblob

In [None]:
# General:
import tweepy           # This is an easy-to-use Python library for accessing the Twitter API
import pandas as pd     # To create and manage necessary data strucutres
import numpy as np      # For scientific number computing


# For plotting and visualization:
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

#### Creating a Twitter App:
In order to extract tweets for analysis, we need to access to Twitter account and create an app. The website to do this is https://apps.twitter.com/.

From this app that we're creating we will save the following information in a script called credentials.py:

Consumer Key (API Key)
Consumer Secret (API Secret)
Access Token
Access Token Secret

The following code extracts the tweet information and stores in an object called tweets. This object has following properties
* id
* tweet text
* created_at
* source
* favorite_count
* retweet_count
* geo
* coordinates
* entities

Methods available for the tweets object can be dispayed by the function dir(tweets[0]))

In [None]:
## specify ur api credentials
consumer_key = '6dyaPblOXr95bYQtJWffckIh4'
consumer_secret = 'ZDHkwXqisoVn4BIwBFb7GMRURRD0Ti6nvYIfahClZFYJkCv0hr'
access_token = '2599485630-SLE7epwwoxXpxPHZWt8RDz0zFOuoP2uAfaNahxS'
access_token_secret = 'k3FH1xmFAaqeAXZb0UbscLbEUENrD36YYuh1K5JyMjoMB'

In [None]:
# Now we define function that can authenticate and extract tweeter data

from credentials import *    # This will allow us to use the keys as variables

# API's setup:
def twitter_setup():
    
    # Utility function to setup the Twitter's API with our access keys provided.
    # Authentication and access using keys:

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    # Return API with authentication:
    
    api = tweepy.API(auth)
    return api

In [None]:
# Create an extractor object which is called tweets:
extractor = twitter_setup()

# We create a tweet list as follows:
tweets = extractor.user_timeline(screen_name="realDonaldTrump", count=200)
print("Number of tweets extracted: {}.\n".format(len(tweets)))

# We print the most recent 5 tweets:
print("5 recent tweets:\n")
for tweet in tweets[:5]:
    print(tweet.text)
    print()

#### Collecting and structuring tweet data
Here a pandas dataframe is structed to receive all the tweet information.

In [None]:
# We create a pandas dataframe as follows:
data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])

# We display the first 10 elements of the dataframe:
display(data.head(10))

In [None]:
# We print info from the first tweet:
print(tweets[0].id)
print(tweets[0].created_at)
print(tweets[0].source)
print(tweets[0].favorite_count)
print(tweets[0].retweet_count)
print(tweets[0].geo)
print(tweets[0].coordinates)
print(tweets[0].entities)

In [None]:
# We add relevant data:
data['len']  = np.array([len(tweet.text) for tweet in tweets])
data['ID']   = np.array([tweet.id for tweet in tweets])
data['Date'] = np.array([tweet.created_at for tweet in tweets])
data['Source'] = np.array([tweet.source for tweet in tweets])
data['Likes']  = np.array([tweet.favorite_count for tweet in tweets])
data['RTs']    = np.array([tweet.retweet_count for tweet in tweets])

Display some statistics for the tweet data extracted.

In [None]:
# We extract the mean of lengths:
mean = np.mean(data['len'])

print("The avarage length of tweets: {}".format(mean))

In [None]:
#### We extract the tweet with more FAVs and more RTs:

fav_max = np.max(data['Likes'])
rt_max  = np.max(data['RTs'])

fav = data[data.Likes == fav_max].index[0]
rt  = data[data.RTs == rt_max].index[0]

# Max FAVs:
print("The tweet with more likes is: \n{}".format(data['Tweets'][fav]))
print("Number of likes: {}".format(fav_max))
print("{} characters.\n".format(data['len'][fav]))

# Max RTs:
print("The tweet with more retweets is: \n{}".format(data['Tweets'][rt]))
print("Number of retweets: {}".format(rt_max))
print("{} characters.\n".format(data['len'][rt]))

#### Visualization of tweet data:
How tweets and retweet are happening as the time progresses. Create time series for tweet length, number of likes and retweets.

In [None]:
# We create time series for data:

tlen = pd.Series(data=data['len'].values, index=data['Date'])
tfav = pd.Series(data=data['Likes'].values, index=data['Date'])
tret = pd.Series(data=data['RTs'].values, index=data['Date'])

In [None]:
# Lengths along time:
tlen.plot(figsize=(16,4), color='r');

In [None]:
# Likes vs retweets visualization:
tfav.plot(figsize=(16,4), label="Likes", legend=True)
tret.plot(figsize=(16,4), label="Retweets", legend=True);

#### Simple sentiment analysis
Textblob is a Python library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks. We will use this module to perform some basic sentiment analysis.

In [None]:
from textblob import TextBlob
import re

def clean_tweet(tweet):
    
    # Utility function to clean the text in a tweet by removing links and special characters using regex.

    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

In [None]:
# Example cleaning
# str1 = "abc@def:a?/,    gef;:'' I am a bachelor a@b"
# ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", str1).split())

In [None]:
def analyze_sentiment(tweet):

   #Utility function to classify the polarity of a tweet using textblob.

    analysis = TextBlob(clean_tweet(tweet))
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1

In [None]:
word1 = 'not bad'
print(TextBlob(word1).sentiment.polarity)
print(TextBlob(word1).sentiment.subjectivity)

In [None]:
data.head()

In [None]:
# We create a column with the result of the analysis:
data['SA'] = np.array([ analyze_sentiment(tweet) for tweet in data['Tweets'] ])

# We display the updated dataframe with the new column:
display(data.head(10))

In [None]:
# We construct lists with classified tweets:

pos_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] > 0]
neu_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] == 0]
neg_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] < 0]

In [None]:
print("Percentage of positive tweets: {}%".format(len(pos_tweets)*100/len(data['Tweets'])))
print("Percentage of neutral tweets: {}%".format(len(neu_tweets)*100/len(data['Tweets'])))
print("Percentage of negative tweets: {}%".format(len(neg_tweets)*100/len(data['Tweets'])))

In [None]:
#############  End of Lab Session    ###################