In [1]:
# This is Main function.
# Extracting streaming data from Twitter, pre-processing, and loading into MySQL
import credentials # Import api/access_token keys from credentials.py
import settings # Import related setting constants from settings.py 

import re
import tweepy
import mysql.connector
import pandas as pd
from textblob import TextBlob
# Streaming With Tweepy 
# http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html#streaming-with-tweepy


# Override tweepy.StreamListener to add logic to on_status
class MyStreamListener(tweepy.StreamListener):
    '''
    Tweets are known as “status updates”. So the Status class in tweepy has properties describing the tweet.
    https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object.html
    '''
    
    def on_status(self, status):
        '''
        Extract info from tweets
        '''
        
        if status.retweeted:
            # Avoid retweeted info, and only original tweets will be received
            return True
        # Extract attributes from each tweet
        id_str = status.id_str
        created_at = status.created_at
        text = deEmojify(status.text)    # Pre-processing the text  
        sentiment = TextBlob(text).sentiment
        polarity = sentiment.polarity
        subjectivity = sentiment.subjectivity
        
        user_created_at = status.user.created_at
        user_location = deEmojify(status.user.location)
        user_description = deEmojify(status.user.description)
        user_followers_count =status.user.followers_count
        longitude = None
        latitude = None
        if status.coordinates:
            longitude = status.coordinates['coordinates'][0]
            latitude = status.coordinates['coordinates'][1]
            
        retweet_count = status.retweet_count
        favorite_count = status.favorite_count
        
        print(status.text)
        print("Long: {}, Lati: {}".format(longitude, latitude))
        
        # Store all data in MySQL
        if mydb.is_connected():
            mycursor = mydb.cursor()
            sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(settings.TABLE_NAME)
            val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
                user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
            mycursor.execute(sql, val)
            mydb.commit()
            mycursor.close()
    
    
    def on_error(self, status_code):
        '''
        Since Twitter API has rate limits, stop srcraping data as it exceed to the thresold.
        '''
        if status_code == 420:
            # return False to disconnect the stream
            return False


In [2]:
def clean_tweet(self, tweet): 
    ''' 
    Use sumple regex statemnents to clean tweet text by removing links and special characters
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) \
                                |(\w+:\/\/\S+)", " ", tweet).split()) 
def deEmojify(text):
    '''
    Strip all non-ASCII characters to remove emoji characters
    '''
    if text:
        return text.encode('ascii', 'ignore').decode('ascii')
    else:
        return None

In [3]:

mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="gaurav",
    database = "TwitterDB",
    charset= 'utf8'
)
if mydb.is_connected():
    '''
    Check if this table exits. If not, then create a new one.
    '''
    mycursor = mydb.cursor()
    mycursor.execute("""
        SELECT COUNT(*)
        FROM information_schema.tables
        WHERE table_name = '{0}'
        """.format("coronavirus"))
    if mycursor.fetchone()[0] != 1:
        mycursor.execute("CREATE TABLE {} ({})".format("coronavirus", "id_str VARCHAR(255), created_at DATETIME, text VARCHAR(255), \
            polarity INT, subjectivity INT, user_created_at VARCHAR(255), user_location VARCHAR(255), \
            user_description VARCHAR(255), user_followers_count INT, longitude DOUBLE, latitude DOUBLE, \
            retweet_count INT, favorite_count INT"))
        mydb.commit()
    mycursor.close()
    



In [4]:

auth  = tweepy.OAuthHandler("t6jSdRu6YKVYDzRmOzodOimKP", "ONAi7xpDfNy5Rcf369Ysdcskt7mjpj06CddiJuRjYQzh29ak5B")
auth.set_access_token("68491006-g5qhhCcvQqGM7hmSPKoRBjMyL2nG3ryk1dsUVEUKo", "hueMWPssCHEgV0HwdPaJYuZGdtKV4g79V4FnEFFWuvSLf")
api = tweepy.API(auth)

In [5]:
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener = myStreamListener)
myStream.filter(languages=["en"], track = settings.TRACK_WORDS)
# Close the MySQL connection as it finished
# However, this won't be reached as the stream listener won't stop automatically
# Press STOP button to finish the process.
mydb.close()

None
Long: None, Lati: None
RT @MyFavsTrash: This is the most Florida tweet I’ve ever seen.
Long: None, Lati: None
Coronavirus Hello! There is no cure ,Toronto , Australia , South by Sout... https://t.co/AhozX8KWk4 via @YouTube
Long: None, Lati: None
RT @AstikDas142: #NoSir
Where we are going? 
Already #CoronaVirus is ruining the Mankind?
Who will save Humanity?😠😠 https://t.co/9habpHSnlI
Long: None, Lati: None
Sign of the times: Little Leaguers elbow bumping instead of shaking hands at end of game. #coronavirus https://t.co/0UvzUWk4ZJ
Long: None, Lati: None
*Coronavirus enters Mzanzi*
No other country ever:
South Africans: "Let's give it a nickname and do a challenge"… https://t.co/jiX2Z0UQvM
Long: None, Lati: None
RT @Ziggy_Daddy: In an interview with Maddow, without notes, Warren just have a detailed explanation of how susceptible our economy is to a…
Long: None, Lati: None
RT @Oylue1: @marcorubio And China owes US a sorry https://t.co/eplCvtlkds
Long: None, Lati: None
@realDonaldTru

KeyboardInterrupt: 