### Import Libraries

In [1]:
# Import twitter dependencies
import tweepy
from config import *

In [2]:
# Import transformation dependencies
import pandas as pd
from datetime import datetime, date
import json
import re 

In [3]:
# For sentiment and subjectivity analysis
from textblob import TextBlob

# Tweepy Setup

In [4]:
# Tweepy Setup
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

In [17]:
# Tweepy search parameters
search_terms = ['covid+vaccine OR covid-19+vaccine OR pfizer OR pfizer-biontech OR moderna OR astrazeneca OR astra+zeneca -filter:retweets']
end_date = date(2021,3,11)
max_id ='1370180022342926339'
limit = 900 # this is my 15 minute limit :'( breaks if this becomes 1000

# Save files
output_csv ='csvs/tw15.csv'
output_json ='json/raw15.json'

# Functions

Collect and save data

In [6]:
# Pulls data from api and returns 2 lists:
    # one which is parsed for selected variables
    # one with all the raw data

data = []
raw = []
tweets = tweepy.Cursor(api.search, location='canada', max_id = max_id, place='canada', lang ='en', q=search_terms, tweet_mode='extended').items(limit)
for t in tweets: 
    tweet ={
        'created' :t.created_at,
        'id_str' : t.id_str,
        'text' : t.full_text,
        'user_id' :t.user.id,
        'screen_name': t.user.screen_name,
        'location': t.user.location,
        'followers_count': t.user.followers_count,
        'user_favourites_count': t.user.favourites_count,
        'time_zone' :t.user.time_zone,
        'geo_enabled' :t.user.geo_enabled,
        'verified' :t.user.verified,
        'status_count': t.user.statuses_count,
        'geo':t.geo,
        'coords':t.coordinates,
        'retweet_count': t.retweet_count,
        'tweet_favourite_count' : t.favorite_count
    }
    # add sorted data to one list 
    data.append(tweet)
    # keep raw data in case I realize later that I've removed something important 
    raw.append(t)
print('done!')

done!


In [7]:
# Add parsed data to dataframe, convert to csv
tweets_df = pd.DataFrame(data)

# Save raw data to json 
with open(output_json, 'w') as file:
    for resp in raw:
        jso = json.dumps(resp._json,sort_keys=True, indent=3)
        file.write(jso)
tweets_df.tail(3)

Unnamed: 0,created,id_str,text,user_id,screen_name,location,followers_count,user_favourites_count,time_zone,geo_enabled,verified,status_count,geo,coords,retweet_count,tweet_favourite_count
897,2021-03-12 01:09:09,1370180024171700224,RT @davematt88: I talked to somebody in law en...,805570008873058304,SpiritofBellamy,,2977,168237,,False,False,51833,,,119,0
898,2021-03-12 01:09:09,1370180024003932160,RT @engxl: 24hrs since my covid vaccine update...,1426568444,burntpita,,326,54205,,False,False,34440,,,7,0
899,2021-03-12 01:09:08,1370180022342926339,@John74024187 @ferocioussal I read a statement...,1258670574571671553,helloamysnow,"California, USA",1999,18198,,False,False,4103,,,0,0


### Clean tweets

In [8]:
# not working when I put these into a function
tweets_df = tweets_df.drop_duplicates(subset=['id_str'])
tweets_df = tweets_df.dropna(subset=['id_str', 'screen_name'])

In [9]:
# Makes tweets more readable 
def CleanTweets(txt): 
    # remove @ username 
    # txt = re.sub(r'@[A-Za-z0-9)]+','', txt)

    # remove @ from username
    txt = txt.replace('@','')    
    
    # remove RT    
    txt = re.sub(r'RT[\s]+','',txt)
    
    # remove # but leave txt
    txt = re.sub(r'#','', txt)
    
    # remove hyperlinks
    txt = re.sub(r'https?:\/\/\S+', '', txt)
    return txt

In [10]:
# Clean the text field 
tweets_df['text'] = tweets_df['text'].apply(CleanTweets)

In [11]:
# to get subjectivity 
def RateSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity

In [12]:
# to get polarity
def RatePolarity(txt):
    return TextBlob(txt).sentiment.polarity

In [13]:
# add worded sentiment based on polarity score
def GetSentiment(num):
    if num < 0: 
        return 'negative' 
    elif num == 0:
        return 'neutral'
    else:
        return 'positive'

In [14]:
# Create Columns
def RateTweets(df):
    df['Subjectivity'] = df['text'].apply(RateSubjectivity)
    df['Polarity'] = df['text'].apply(RatePolarity)
    df['Sentiment'] = df['Polarity'].apply(GetSentiment)
    return df 

In [15]:
tweets_df = RateTweets(tweets_df)
tweets_df.to_csv(output_csv, index=False, encoding='UTF-8')
tweets_df.tail(3)

Unnamed: 0,created,id_str,text,user_id,screen_name,location,followers_count,user_favourites_count,time_zone,geo_enabled,verified,status_count,geo,coords,retweet_count,tweet_favourite_count,Subjectivity,Polarity,Sentiment
897,2021-03-12 01:09:09,1370180024171700224,davematt88: I talked to somebody in law enforc...,805570008873058304,SpiritofBellamy,,2977,168237,,False,False,51833,,,119,0,0.4,-0.2,negative
898,2021-03-12 01:09:09,1370180024003932160,engxl: 24hrs since my covid vaccine update: no...,1426568444,burntpita,,326,54205,,False,False,34440,,,7,0,0.35,0.0,neutral
899,2021-03-12 01:09:08,1370180022342926339,John74024187 ferocioussal I read a statement f...,1258670574571671553,helloamysnow,"California, USA",1999,18198,,False,False,4103,,,0,0,0.0,0.0,neutral


# SQLAlchemy
### Connecting to a certain other host 

In [160]:
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

In [161]:
engine = create_engine(conn, echo=False)

In [162]:
Base = automap_base()
Base.prepare(engine, reflect=True)  

In [163]:
# Test connection
# it works!!!
Base.classes.keys()

['tweets1']

### Don't run yet
Clean up your tweets! 

In [164]:
cxn = engine.connect()
tweets_df.to_sql(name='del', con=engine, if_exists='append', index=True)

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "Subjectivity" of relation "del" does not exist
LINE 1: ...eo, coords, retweet_count, tweet_favourite_count, "Subjectiv...
                                                             ^

[SQL: INSERT INTO del (index, created, id_str, text, user_id, screen_name, location, followers_count, user_favourites_count, time_zone, geo_enabled, verified, status_count, geo, coords, retweet_count, tweet_favourite_count, "Subjectivity", "Polarity", "Sentiment") VALUES (%(index)s, %(created)s, %(id_str)s, %(text)s, %(user_id)s, %(screen_name)s, %(location)s, %(followers_count)s, %(user_favourites_count)s, %(time_zone)s, %(geo_enabled)s, %(verified)s, %(status_count)s, %(geo)s, %(coords)s, %(retweet_count)s, %(tweet_favourite_count)s, %(Subjectivity)s, %(Polarity)s, %(Sentiment)s)]
[parameters: ({'index': 0, 'created': datetime.datetime(2021, 3, 12, 1, 21, 25), 'id_str': '1370183110994882563', 'text': 'Joy_Villa Fact 400 000 people died of covid under trump administration. Also scientist developed the vaccine not trump.', 'user_id': 1324129031890051072, 'screen_name': 'LindoFarolito', 'location': '', 'followers_count': 0, 'user_favourites_count': 63, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 415, 'geo': None, 'coords': None, 'retweet_count': 0, 'tweet_favourite_count': 1, 'Subjectivity': 0.3, 'Polarity': 0.1, 'Sentiment': 'positive'}, {'index': 1, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183109484769283', 'text': 'LincolnsBible: My god... I cannot imagine how wretched the vaccine and Covid infection situation in America would be right now, if Jare…', 'user_id': 987795508449103872, 'screen_name': 'ItsmeDonna8', 'location': 'Jacksonville, Florida ', 'followers_count': 7556, 'user_favourites_count': 59091, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 23915, 'geo': None, 'coords': None, 'retweet_count': 460, 'tweet_favourite_count': 0, 'Subjectivity': 0.5357142857142857, 'Polarity': 0.2857142857142857, 'Sentiment': 'positive'}, {'index': 2, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183108696403972', 'text': 'CBSNews: BREAKING: President Biden announces he is directing all states, tribes and territories to make all U.S. adults 18 and older el…', 'user_id': 4897042768, 'screen_name': 'GoKasichGo', 'location': 'Wisconsin, USA', 'followers_count': 601, 'user_favourites_count': 34074, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 20182, 'geo': None, 'coords': None, 'retweet_count': 247, 'tweet_favourite_count': 0, 'Subjectivity': 0.3333333333333333, 'Polarity': 0.16666666666666666, 'Sentiment': 'positive'}, {'index': 3, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183108314681348', 'text': 'nbcwashington: A new COVID vaccine online waitlist system called Dr. B was created to keep precious temperature-regulated doses from go…', 'user_id': 3845051536, 'screen_name': 'ThiccCannon', 'location': '', 'followers_count': 404, 'user_favourites_count': 162499, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 84072, 'geo': None, 'coords': None, 'retweet_count': 5, 'tweet_favourite_count': 0, 'Subjectivity': 0.7272727272727273, 'Polarity': 0.3181818181818182, 'Sentiment': 'positive'}, {'index': 4, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183108272734214', 'text': '“A July 4th with your loved ones is the goal,” President Biden said. \n\nAll adult Americans will be eligible to get the vaccine by May 1. \n\n', 'user_id': 602532288, 'screen_name': 'SoniaDasgupta', 'location': 'Baltimore, MD', 'followers_count': 1364, 'user_favourites_count': 2837, 'time_zone': None, 'geo_enabled': True, 'verified': True, 'status_count': 22602, 'geo': None, 'coords': None, 'retweet_count': 0, 'tweet_favourite_count': 0, 'Subjectivity': 0.55, 'Polarity': 0.39999999999999997, 'Sentiment': 'positive'}, {'index': 5, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183108088233985', 'text': 'navfornol: LOOK: NFNL starts inoculation of Troops against COVID-19 Vaccine\n \nRead more about this here: \n\nMode…', 'user_id': 1300722530081214466, 'screen_name': 'navforcentral', 'location': 'HNFC, Lapu-Lapu City, Cebu', 'followers_count': 86, 'user_favourites_count': 2172, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 1354, 'geo': None, 'coords': None, 'retweet_count': 1, 'tweet_favourite_count': 0, 'Subjectivity': 0.5, 'Polarity': 0.5, 'Sentiment': 'positive'}, {'index': 6, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183107765239810', 'text': "CBCToronto: Here's what you need to know about getting a COVID-19 vaccine at Ontario pharmacies  ", 'user_id': 195971292, 'screen_name': 'Brdcaster', 'location': 'Toronto', 'followers_count': 4530, 'user_favourites_count': 7491, 'time_zone': None, 'geo_enabled': False, 'verified': True, 'status_count': 52545, 'geo': None, 'coords': None, 'retweet_count': 3, 'tweet_favourite_count': 0, 'Subjectivity': 0.0, 'Polarity': 0.0, 'Sentiment': 'neutral'}, {'index': 7, 'created': datetime.datetime(2021, 3, 12, 1, 21, 24), 'id_str': '1370183106947391488', 'text': 'OD_ant: If you die from Covid, but had a comorbidity...you MOST definitely died from Covid.\n\nBUT- if you die from a VACCINE and have a…', 'user_id': 942209429914734592, 'screen_name': 'mmooch1970', 'location': 'United States', 'followers_count': 785, 'user_favourites_count': 83069, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 29636, 'geo': None, 'coords': None, 'retweet_count': 154, 'tweet_favourite_count': 0, 'Subjectivity': 0.5, 'Polarity': 0.25, 'Sentiment': 'positive'}  ... displaying 10 of 900 total bound parameter sets ...  {'index': 898, 'created': datetime.datetime(2021, 3, 12, 1, 18), 'id_str': '1370182250810183683', 'text': 'Everyone will be eligible to receive a covid vaccine after May 1st. Bitch that’s my birthday 🥳', 'user_id': 775012078516068352, 'screen_name': 'ubetterrecogni', 'location': 'Ann Arbor, MI', 'followers_count': 115, 'user_favourites_count': 4256, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 1210, 'geo': None, 'coords': None, 'retweet_count': 0, 'tweet_favourite_count': 4, 'Subjectivity': 0.0, 'Polarity': 0.0, 'Sentiment': 'neutral'}, {'index': 899, 'created': datetime.datetime(2021, 3, 12, 1, 17, 59), 'id_str': '1370182250386530306', 'text': 'AmyKremer: I don’t want to hear Joe Biden talk about Covid and the vaccine as long as he is letting illegal aliens into this country th…', 'user_id': 2523556454, 'screen_name': 'DianaChic1', 'location': '', 'followers_count': 2900, 'user_favourites_count': 12208, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'status_count': 464378, 'geo': None, 'coords': None, 'retweet_count': 157, 'tweet_favourite_count': 0, 'Subjectivity': 0.45, 'Polarity': -0.275, 'Sentiment': 'negative'})]
(Background on this error at: http://sqlalche.me/e/13/f405)

In [None]:
# session things
session = Session(engine)
session.close()
