In [28]:
# This is Main function.
# Extracting streaming data from Twitter, pre-processing, and loading into MySQL
import credentials # Import api/access_token keys from credentials.py
import settings # Import related setting constants from settings.py 

import re
import tweepy
import mysql.connector
import pandas as pd
from textblob import TextBlob
# Streaming With Tweepy 
# http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html#streaming-with-tweepy


# Override tweepy.StreamListener to add logic to on_status


In [29]:
class MyStreamListener(tweepy.StreamListener):
    '''
    Tweets are known as “status updates”. So the Status class in tweepy has properties describing the tweet.
    https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object.html
    '''
    
    def on_status(self, status):
        '''
        Extract info from tweets
        '''
        
        if status.retweeted:
            # Avoid retweeted info, and only original tweets will be received
            return True
        # Extract attributes from each tweet
        id_str = status.id_str
        created_at = status.created_at
        text = deEmojify(status.text)    # Pre-processing the text  
        sentiment = TextBlob(text).sentiment
        polarity = sentiment.polarity
        subjectivity = sentiment.subjectivity
        
        user_created_at = status.user.created_at
        user_location = deEmojify(status.user.location)
        user_description = deEmojify(status.user.description)
        user_followers_count =status.user.followers_count
        longitude = None
        latitude = None
        if status.coordinates:
            longitude = status.coordinates['coordinates'][0]
            latitude = status.coordinates['coordinates'][1]
            
        retweet_count = status.retweet_count
        favorite_count = status.favorite_count
        
        print(status.text)
        print("Long: {}, Lati: {}".format(longitude, latitude))
        
        # Store all data in MySQL
        if mydb.is_connected():
            mycursor = mydb.cursor()
            sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(settings.TABLE_NAME)
            val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
                user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
            mycursor.execute(sql, val)
            mydb.commit()
            mycursor.close()
    
    
    def on_error(self, status_code):
        '''
        Since Twitter API has rate limits, stop srcraping data as it exceed to the thresold.
        '''
        if status_code == 420:
            # return False to disconnect the stream
            return False

In [30]:
def clean_tweet(self, tweet): 
    ''' 
    Use sumple regex statemnents to clean tweet text by removing links and special characters
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) \
                                |(\w+:\/\/\S+)", " ", tweet).split()) 
def deEmojify(text):
    '''
    Strip all non-ASCII characters to remove emoji characters
    '''
    if text:
        return text.encode('ascii', 'ignore').decode('ascii')
    else:
        return None

In [31]:
mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="password",
    database="twitterdb",
    charset = 'utf8'
)
if mydb.is_connected():
    '''
    Check if this table exits. If not, then create a new one.
    '''
    mycursor = mydb.cursor()
    mycursor.execute("""
        SELECT COUNT(*)
        FROM information_schema.tables
        WHERE table_name = '{0}'
        """.format(settings.TABLE_NAME))
    if mycursor.fetchone()[0] != 1:
        mycursor.execute("CREATE TABLE {} ({})".format(settings.TABLE_NAME, settings.TABLE_ATTRIBUTES))
        mydb.commit()
    mycursor.close()

In [32]:
auth  = tweepy.OAuthHandler(credentials.API_KEY, credentials.API_SECRET_KEY)
auth.set_access_token(credentials.ACCESS_TOEKN, credentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

In [33]:
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener = myStreamListener)
myStream.filter(languages=["en"], track = settings.TRACK_WORDS)

mydb.close()

RT @1YoungHustle_: 😂😂😂 Detroit a different breed dog look at what this Coronavirus shit done did to niggas https://t.co/Eho8czHAAQ
Long: None, Lati: None
RT @nayibbukele: Thank you @JackMa and @AlibabaGroup for your donation to our country.

We received 100,000 masks; 10,080 testing kits for…
Long: None, Lati: None
RT @lizzkatherine_: BREAKING NEWS: Louisiana just released COVID-19 data which shows that African-Americans account for 70% of ALL DEATHS i…
Long: None, Lati: None
Coronavirus: News and live updates https://t.co/PwjHH7031I I am so happy with you, Yahoo..so smart , now we don't h… https://t.co/cTulSR9QW6
Long: None, Lati: None
RT @WHO: "We encourage countries that are considering the use of masks for the general population to study their effectiveness so we can al…
Long: None, Lati: None
RT @oldyfan2012: Trump "Has My Mom's Blood on His Hands": NYC Comptroller Stringer Accuses President in Coronavirus Death of His Elderly Mo…
Long: None, Lati: None
RT @NewsHour: "We governors

RT @Atunwa_1: With the confirmation of two cases of #COVID19  in Kwara, one of whom was the wife of a suspected #COVID19 patient who died.…
Long: None, Lati: None
Another Garden supply unboxing video is up! Again, I put affiliate links to the items I bought in the description o… https://t.co/PrWrfssEvr
Long: None, Lati: None
RT @sharonwaxman: This took my breath away https://t.co/h3zKcaQEfo
Long: None, Lati: None
RT @wis10: Video shows the man being held down after shoppers said he was spitting in the produce aisle. https://t.co/RikFMFLcDi
Long: None, Lati: None
RT @realDonaldTrump: Congratulations to State Representative Karen Whitsett of Michigan. So glad you are getting better! https://t.co/v6z46…
Long: None, Lati: None
RT @sahouraxo: Cuba, a country suffering under a brutal US blockade, sends doctors to nations all over the world to fight #COVID19.

In con…
Long: None, Lati: None
RT @erynnabdullah_: Sedih gila tengok ig live kucai, he choose randomly to invite someone go live with 

@juliaioffe “Every patient I've prescribed it to has been very, very ill and within 8 to 12 hours, they were basica… https://t.co/YTLGlotDIA
Long: None, Lati: None
Tennessee went from 4,000 to 500 today. And no mention of it anywhere.
Long: None, Lati: None
RT @Education4Libs: Twitter suspended me 12 hours followed by a WEEK for these tweets.

They claimed I was “promoting or encouraging suicid…
Long: None, Lati: None
RT @FINCA: Crises exacerbate gender inequalities for women, girls, and other vulnerable populations. The global community's response to #CO…
Long: None, Lati: None
Long: None, Lati: None
RT @Jali_Cat: ‼️Dr. Fauci MARCH 26, 2020: 

"[Coronavirus] is akin to a severe flu season. Mortality rates more likely in the 0.1% range".…
Long: None, Lati: None
RT @FrancisBrennan: “A Democratic state representative from Detroit is crediting hydroxychloroquine — and Republican President Donald Trump…
Long: None, Lati: None
@TIME CANADA CORRUPTION!JUST AS🇨🇳! TWIN COUNTRY!GOD BLESS AMERIC

KeyboardInterrupt: 

In [12]:
mydb.close()

In [14]:
mydb

AttributeError: 'CMySQLConnection' object has no attribute 'show'

In [36]:
db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="password",
    database="twitterdb",
    charset = 'utf8'
)

In [37]:
db_connection

<mysql.connector.connection_cext.CMySQLConnection at 0x2351e5858c8>

In [38]:
df = pd.read_sql('SELECT id_str, text, created_at, polarity, user_location FROM {}'.format(settings.TABLE_NAME), con=db_connection)

In [39]:
df

Unnamed: 0,id_str,text,created_at,polarity,user_location
0,1247302110938529793,"RT @RealCandaceO: Apparently, doctors and nurs...",2020-04-06 23:16:11,0,Arizona
1,1247302111093592065,RT @BiggBossBites: We clean oil spots on our c...,2020-04-06 23:16:11,0,ca
2,1247302110988972038,Everything about this angers me.,2020-04-06 23:16:11,0,
3,1247302111274192896,RT @lizzkatherine_: BREAKING NEWS: Louisiana j...,2020-04-06 23:16:11,0,Orangeburg SC to Harlem NY
4,1247302111215394816,RT @BlondieVex: So let me get this straight......,2020-04-06 23:16:11,0,"Battle Creek, MI"
...,...,...,...,...,...
829,1247309298155302918,Clearly this poll needs a truth injection.,2020-04-06 23:44:45,0,MAGA COUNTRY
830,1247309298230865921,Fox News @KristinFisher got busted trying to g...,2020-04-06 23:44:45,0,
831,1247309298323030018,RT @ossoff: NEW: U.S. Senator David Perdue (R-...,2020-04-06 23:44:45,0,USA
832,1247309298541244418,RT @Jeanna2007: OH. Look what this ole war mon...,2020-04-06 23:44:45,0,United States
