Reference: https://www.earthdatascience.org/courses/use-data-open-source-python/intro-to-apis/twitter-data-in-python/

In [None]:
# ONE OFF INSTALL
# %pip install tweepy

#### Libraries and Credentials

In [None]:
import os
import tweepy as tw
import pandas as pd

import credentials # Import api/access_token keys from credentials.py

In [None]:
auth = tw.OAuthHandler(credentials.API_KEY, credentials.API_SECRET_KEY)
auth.set_access_token(credentials.ACCESS_TOKEN, credentials.ACCESS_TOKEN_SECRET)
api = tw.API(auth, wait_on_rate_limit=True)

#### Search Twitter for Tweets

In [None]:
# Define the search term and the date_since date as variables
search_words = "#uefa"
date_since = "2020-02-16"

In [None]:
# Collect tweets
tweets = tw.Cursor(api.search,
                       q=search_words,
                       lang="en",
                       since=date_since).items(5)

# Collect a list of tweets
[tweet.text for tweet in tweets]

#### Remove retweets

In [None]:
new_search = search_words + " -filter:retweets"
new_search

In [None]:
tweets = tw.Cursor(api.search,
                       q=new_search,
                       lang="en",
                       since=date_since).items(5)

[tweet.text for tweet in tweets]

#### Who is tweeting ?

In [None]:
tweets = tw.Cursor(api.search, 
                           q=new_search,
                           lang="en",
                           since=date_since).items(5)

users_locs = [[tweet.user.screen_name, tweet.user.location] for tweet in tweets]
users_locs

#### Create a Pandas Dataframe From A List of Tweet Data

In [None]:
tweet_text = pd.DataFrame(data=users_locs, 
                    columns=['user', "location"])
tweet_text

#### Customizing Twitter Queries

Exercise -

extract 1000 tweets on climate change and filter out retweets

In [None]:
new_search = "climate+change -filter:retweets"

tweets = tw.Cursor(api.search,
                   q=new_search,
                   lang="en",
                   since='2021-02-16').items(1000)

all_tweets = [tweet.text for tweet in tweets]
all_tweets[:5]

Exercise -

extract last 5 tweets by Christiano Ronaldo

In [None]:
screen_name = "@Cristiano"

tweets = api.user_timeline(screen_name = screen_name,count=10)

all_tweets = [tweet.text for tweet in tweets]
all_tweets

Exercise - get as much info as possible about coronavirus and export it

In [None]:
# Reference:

# https://towardsdatascience.com/how-to-scrape-more-information-from-tweets-on-twitter-44fd540b8a1f
    
text_query = 'Coronavirus'
max_tweets = 150
 
# Creation of query method using parameters
tweets = tw.Cursor(api.search,q=text_query).items(max_tweets)
 
# Pulling information from tweets iterable object
# Add or remove tweet information you want in the below list comprehension
tweets_list = [[tweet.text, tweet.created_at, tweet.id_str, tweet.user.name, tweet.user.screen_name, 
                tweet.user.id_str, tweet.user.location, tweet.user.url, tweet.user.description, 
                tweet.user.verified, tweet.user.followers_count, tweet.user.friends_count, 
                tweet.user.favourites_count, tweet.user.statuses_count, tweet.user.listed_count, 
                tweet.user.created_at, tweet.user.profile_image_url_https, tweet.user.default_profile,
                tweet.user.default_profile_image] for tweet in tweets]
 
# Creation of dataframe from tweets_list
# Did not include column names to simplify code 
tweets_df = pd.DataFrame(tweets_list)
tweets_df.to_csv("covid_export.csv")

In [None]:
tweets_df.head() # quick check

#### Further references

https://www.earthdatascience.org/courses/use-data-open-source-python/intro-to-apis/calculate-tweet-word-frequencies-in-python/

#### Sentiment Analysis

In [None]:
# Exercise -

# Set up a user prompt for a twitter hashtag, including no. of tweets to analyse, 
# return a breakdown of tweet polarity (+ve, -ve and neutral sentiment)
# and then plot a pie chart

In [None]:
keyword = input("Please enter keyword or hashtag to search: ")
noOfTweet = int(input ("Please enter how many tweets to analyze: "))

In [None]:
tweets = tw.Cursor(api.search, q=keyword).items(noOfTweet)

In [None]:
from textblob import TextBlob # API for NLP https://textblob.readthedocs.io/en/dev/
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# initialisation
tweet_list = []
positive = 0
negative = 0
neutral = 0
polarity = 0
polarity = 0

#loop thru tweets
for tweet in tweets:
    #print(tweet.text)
    tweet_list.append(tweet.text)
    analysis = TextBlob(tweet.text)
    score = SentimentIntensityAnalyzer().polarity_scores(tweet.text)
    comp = score['compound']
    polarity += analysis.sentiment.polarity
    
    # keep running total of sentiment of tweets (+ve, -ve, neutral)
    if score['neg'] > score['pos']:
        negative += 1
    elif score['pos'] > score['neg']:
        positive += 1
    elif score['pos'] == score['neg']:
        neutral += 1

In [None]:
# checks

In [None]:
tweet_list

In [None]:
negative

In [None]:
positive

In [None]:
neutral

In [None]:
polarity

In [None]:
# pie chart

import matplotlib.pyplot as plt

labels = ['Positive ['+str(positive)+'%]' , 'Neutral ['+str(neutral)+'%]','Negative ['+str(negative)+'%]']
sizes = [positive, neutral, negative]
colors = ['green', 'blue','red']
patches, texts = plt.pie(sizes,colors=colors, startangle=90)
plt.style.use('default')
plt.legend(labels)
plt.title("Sentiment Analysis Result for keyword= "+keyword+"" )
plt.axis('equal')
plt.show()