# Gather tweets from the Twitter API

### Imports

In [1]:
import tweepy
from tweepy import OAuthHandler
import re
import pandas as pd

### Access to the Twitter API

In [2]:
consumer_key = 'xxx'
consumer_secret = 'xxx'
access_token = 'xxx'
access_secret = 'xxx'
 
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
 
api = tweepy.API(auth)

### Search for Tweets

Constants

In [3]:
# Define the emoticons referentials here
CONST_positive_emoticons = [':)', ':-)', ': )', ':D', '=)', ':p', ';)']
CONST_negative_emoticons = [':(', ':-(', ': (']

# Define the sentiment scores here
CONST_positive_sentiment_score = 4
CONST_negative_sentiment_score = 0

# Nb of tweets by query
CONST_nbTweets = 100

# Export CSV file
CONST_exportFilePath = 'trainingandtestdata/trainingSet.csv'

Global variables

In [4]:
parsed_tweets = []

Functions

In [5]:
# Store the tweets with their calculated polarity into a list
def parse_tweet(tweet, sum_positive, sum_negative, query):
    
    # We ignore the tweets with both positive and negative emoticons, and we ignore the tweets without emoticons as well
    if sum_positive > 0 and sum_negative == 0:
        parsed_tweets.append([CONST_positive_sentiment_score, tweet.id , tweet.created_at, query, tweet.author.name, tweet.text])
        
    if sum_negative > 0 and sum_positive == 0:
        parsed_tweets.append([CONST_negative_sentiment_score, tweet.id , tweet.created_at, query, tweet.author.name, tweet.text])

In [6]:
# Calculate the polarity of a tweet by counting its number of positive and negative emoticons in it
def calculate_polarity_by_emoticons(tweetsList, query):
    
    for tweet in tweetsList:
        sum_positive = 0
        sum_negative = 0

        # Count the number of positive emoticons in the tweet
        for pos_emoticon in CONST_positive_emoticons:
            sum_positive = sum_positive + tweet.text.count(pos_emoticon)
            
        # Count for negative emoticons
        for neg_emoticon in CONST_negative_emoticons:
            sum_negative = sum_negative + tweet.text.count(neg_emoticon)

        parse_tweet(tweet, sum_positive, sum_negative, query)

Main algorithm

In [7]:
# Search for public tweets with a positive emoticon, in english
positive_public_tweets = api.search(CONST_positive_emoticons[0], lang='en', count=CONST_nbTweets)

# Same with negative ones
negative_public_tweets = api.search(CONST_negative_emoticons[0], lang='en', count=CONST_nbTweets)

# Calculate tweets polarity and store them into a Python list
calculate_polarity_by_emoticons(positive_public_tweets, CONST_positive_emoticons[0])
calculate_polarity_by_emoticons(negative_public_tweets, CONST_negative_emoticons[0])

# Convert the list into a Pandas dataframe
df = pd.DataFrame(data = parsed_tweets , columns = ['sentiment_score', 'id', 'date', 'query', 'author', 'tweet'])

df.head()

Unnamed: 0,sentiment_score,id,date,query,author,tweet
0,4,1079461781200912386,2018-12-30 19:38:38,:),lexx ;),happiness in quite a few aspects of my life ha...
1,4,1079461779506368518,2018-12-30 19:38:38,:),Pål Nes,@JeevanSChagger Ole coached Ronaldo as a strik...
2,4,1079461776905908225,2018-12-30 19:38:37,:),♡ 𝒞𝒶𝓂𝒾𝓁𝒶 ♡,"RT @jeonha_32: 2017, Wanna One❤\n2018, Wanna O..."
3,4,1079461772338184192,2018-12-30 19:38:36,:),80K daaru,RT @Art_for_NaMo: Thank you for liking the art...
4,4,1079461771910365184,2018-12-30 19:38:36,:),Mervkd,RT @SportsGamingg: $500 GIVEAWAY!!!\nFOLLOW ME...


### Append the tweets to the training dataset CSV file

In [8]:
# Export - Append in CSV
with open(CONST_exportFilePath, 'a') as f:
    df.to_csv(f, header=False)

### Author : Thibaut BREMAND
- thibaut.bremand [at] gmail.com
- https://github.com/ThibautBremand

### Sources :  
- https://marcobonzanini.com/2015/03/02/mining-twitter-data-with-python-part-1/