### Import Libraries

In [1]:
# Import twitter dependencies
import tweepy
from config import *

In [2]:
# Import transformation dependencies
import pandas as pd
from datetime import datetime, date
import json
import re 

In [3]:
# For sentiment and subjectivity analysis
from textblob import TextBlob
#import nltk


In [4]:
#nltk.download('stopwords')
#nltk.download('punkt')

In [5]:
# For stats 
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html
from scipy import stats

# Tweepy Setup

In [6]:
# Tweepy Setup
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

In [52]:
# Tweepy search parameters
search_terms = ['covid+vaccine OR covid-19+vaccine OR corona+virus+vaccine OR coronavirus+vaccine OR pfizer OR pfizer-biontech OR pfizerbiontech OR moderna OR astrazeneca OR astra+zeneca -filter:retweets']
end_date = date(2021,3,11)
max_id = '1370041703395102726'
limit = 1400 # this is my 15 minute limit :'( 

# Save files
output_csv ='csvs/tw139.csv'
output_json ='json/raw139.json'

# Functions

Collect and save data

In [53]:
# Pulls data from api and returns 2 lists:
    # one which is parsed for selected variables
    # one with all the raw data

data = []
raw = []
tweets = tweepy.Cursor(api.search, max_id = max_id, lang ='en', q=search_terms, tweet_mode='extended').items(limit)
for t in tweets: 
    tweet ={
        'created' :t.created_at,
        'id_str' : t.id_str,
        'text' : t.full_text,
        'user_id' :t.user.id,
        'screen_name': t.user.screen_name,
        'location': t.user.location,
        'followers_count': t.user.followers_count,
        'user_favourites_count': t.user.favourites_count,
        'time_zone' :t.user.time_zone,
        'geo_enabled' :t.user.geo_enabled,
        'verified' :t.user.verified,
        'status_count': t.user.statuses_count,
        'geo':t.geo,
        'coords':t.coordinates,
        'retweet_count': t.retweet_count,
        'tweet_favourite_count' : t.favorite_count
    }
    # add sorted data to one list 
    data.append(tweet)
    # keep raw data in case I realize later that I've removed something important 
    raw.append(t)
print('done!')

done!


In [54]:
# Add parsed data to dataframe, convert to csv
tweets_df = pd.DataFrame(data)

# Save raw data to json 
with open(output_json, 'w') as file:
    for resp in raw:
        jso = json.dumps(resp._json,sort_keys=True, indent=3)
        file.write(jso)
tweets_df.tail(2)

Unnamed: 0,created,id_str,text,user_id,screen_name,location,followers_count,user_favourites_count,time_zone,geo_enabled,verified,status_count,geo,coords,retweet_count,tweet_favourite_count
1398,2021-03-11 15:56:10,1370040863372546071,RT @EMA_News: 📢 EMA has just recommended grant...,780980779,claudiavaca5,Colombia,8622,28240,,True,False,29275,,,1211,0
1399,2021-03-11 15:56:10,1370040861732433920,RT @NarvekarMilind_: Took my first shot of COV...,1351480349990141952,Akshada15400665,"Mumbai, India",11,1,,False,False,351,,,67,0


### Clean tweets

In [55]:
# not working when I put these into a function
tweets_df = tweets_df.drop_duplicates(subset=['id_str'])
tweets_df = tweets_df.dropna(subset=['id_str', 'screen_name'])

In [56]:
# Makes tweets more readable 
def CleanTweets(txt): 
    # remove @ username 
    # txt = re.sub(r'@[A-Za-z0-9)]+','', txt)
    # this removes names, which are sometimes vaccine names, so not good for my filter

    # remove @ from username
    txt = txt.replace('@','')   
    
    # remove RT    
    txt = re.sub(r'RT[\s]+','',txt)
    
    # remove # but leave txt
    txt = re.sub(r'#','', txt)
    
    # remove hyperlinks
    txt = re.sub(r'https?:\/\/\S+', '', txt)
    
    txt = re.sub('\n','', txt)
    return txt

In [57]:
# Clean the text field 
tweets_df['text'] = tweets_df['text'].apply(CleanTweets)

In [58]:
# to get subjectivity 
def RateSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity

In [59]:
# to get polarity
def RatePolarity(txt):
    return TextBlob(txt).sentiment.polarity

In [60]:
# add worded sentiment based on polarity score
def GetSentiment(num):
    if num < 0: 
        return 'negative' 
    elif num == 0:
        return 'neutral'
    else:
        return 'positive'

In [61]:
# Create Columns
def RateTweets(df):
    df['Subjectivity'] = df['text'].apply(RateSubjectivity)
    df['Polarity'] = df['text'].apply(RatePolarity)
    df['Sentiment'] = df['Polarity'].apply(GetSentiment)
    return df 

In [62]:
tweets_df = RateTweets(tweets_df)
tweets_df.to_csv(output_csv, index=False, encoding='UTF-8')
tweets_df.tail(3)

Unnamed: 0,created,id_str,text,user_id,screen_name,location,followers_count,user_favourites_count,time_zone,geo_enabled,verified,status_count,geo,coords,retweet_count,tweet_favourite_count,Subjectivity,Polarity,Sentiment
1397,2021-03-11 15:56:10,1370040863968100357,NHSuk: If you have a health condition and you'...,2740752309,kidneydoc101,"London, UK",7187,54272,,False,False,41216,,,1217,0,0.0,0.0,neutral
1398,2021-03-11 15:56:10,1370040863372546071,EMA_News: 📢 EMA has just recommended granting ...,780980779,claudiavaca5,Colombia,8622,28240,,True,False,29275,,,1211,0,0.0,0.0,neutral
1399,2021-03-11 15:56:10,1370040861732433920,NarvekarMilind_: Took my first shot of COVID v...,1351480349990141952,Akshada15400665,"Mumbai, India",11,1,,False,False,351,,,67,0,0.333333,0.25,positive


In [122]:
import geograpy

In [123]:
tweets_df.tail(3)

Unnamed: 0,created,id_str,text,user_id,screen_name,location,followers_count,user_favourites_count,time_zone,geo_enabled,verified,status_count,geo,coords,retweet_count,tweet_favourite_count,Subjectivity,Polarity,Sentiment
1397,2021-03-11 16:24:50,1370048079009869830,BarackObama: Michelle and I got vaccinated aga...,1157188440011747328,PartenMichael,,188,90941,,False,False,14865,,,10712,0,0.3,1.0,positive
1398,2021-03-11 16:24:50,1370048077793484804,thehill: Denmark and Norway temporarily suspen...,1188758191821279232,EthingsG,España,780,13683,,False,False,68912,,,55,0,0.0,0.0,neutral
1399,2021-03-11 16:24:50,1370048077004902401,POTUS: One in four adults in the U.S. has rece...,1154292071601266693,CarlaMeckelborg,,2558,79385,,False,False,27531,,,16534,0,0.333333,0.25,positive


In [9]:
from os import listdir

In [10]:
filepaths = ['csvs/' + f for f in listdir("csvs")]
df = pd.concat(map(pd.read_csv, filepaths))
df = df.drop_duplicates(subset=['id_str'])
len(df)

139727

In [36]:
df = df.reset_index()
df = df.drop(columns=['index'])
df.tail(1)

Unnamed: 0,created,id_str,text,user_id,screen_name,location,followers_count,user_favourites_count,time_zone,geo_enabled,verified,status_count,geo,coords,retweet_count,tweet_favourite_count,Subjectivity,Polarity,Sentiment
139726,2021-03-15 00:02:57,1371250530123718660,Found my old WHO vaccine cert booklet from whe...,92157908,Juliagoolia1982,Anywhere but here,761,16382,,True,False,13107,,,0,0,0.2,0.1,positive


In [12]:
df['tweet_favourite_count'].sum()

405558

# SQLAlchemy
### Connecting to a certain other host 

In [13]:
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

In [14]:
engine = create_engine(conn, echo=False)

In [15]:
Base = automap_base()
Base.prepare(engine, reflect=True)  

In [16]:
# Test connection
Base.classes.keys()

['twt100']

In [17]:
tweets = Base.classes.twt100

In [20]:
session = Session(engine)
qu = session.query(tweets.tweet_favourite_count).filter(tweets.tweet_favourite_count > 0).all()
for line in qu:
    print(line)

(1,)
(1,)
(1,)
(1,)
(2,)
(1,)
(1,)
(2,)
(2,)
(2,)
(3,)
(3,)
(2,)
(2,)
(8,)
(139,)
(2,)
(10,)
(1,)
(2,)
(10,)
(1,)
(3,)
(1,)
(4,)
(1,)
(8,)
(2,)
(2,)
(1,)
(12,)
(1,)
(6,)
(1,)
(8,)
(19,)
(4,)
(1,)
(1,)
(1,)
(7,)
(1,)
(11,)
(2,)
(4,)
(1,)
(157,)
(5,)
(11,)
(1,)
(1,)
(4,)
(2,)
(4,)
(1,)
(1,)
(27,)
(1,)
(23,)
(688,)
(30,)
(1,)
(4,)
(10,)
(12,)
(142,)
(70,)
(11,)
(25,)
(61,)
(8,)
(5,)
(1,)
(1,)
(7,)
(1,)
(2,)
(1,)
(2,)
(2,)
(4,)
(4,)
(1,)
(20,)
(1,)
(36,)
(5,)
(1,)
(15,)
(1,)
(2,)
(21,)
(4,)
(152,)
(23,)
(3,)
(1,)
(1,)
(46,)
(2,)
(2,)
(1,)
(3,)
(12,)
(1,)
(2,)
(4,)
(2,)
(4,)
(1,)
(1,)
(1,)
(30,)
(1,)
(12,)
(1,)
(1,)
(1,)
(3,)
(3,)
(2,)
(4,)
(2,)
(1,)
(9,)
(1,)
(5,)
(5,)
(2,)
(1,)
(2,)
(1,)
(1,)
(21,)
(1,)
(1,)
(3,)
(7,)
(1,)
(1,)
(1,)
(1,)
(4,)
(3,)
(35,)
(3,)
(5,)
(59,)
(2,)
(1,)
(1,)
(1,)
(1,)
(12,)
(2,)
(11,)
(28,)
(1,)
(2,)
(2,)
(10,)
(1,)
(2,)
(6,)
(23,)
(42,)
(2,)
(55,)
(1,)
(3,)
(2,)
(1,)
(55,)
(1,)
(1,)
(27,)
(117,)
(2,)
(1,)
(2,)
(1,)
(70,)
(4,)
(1,)
(2,)
(2,)
(1,)
(1,)
(1,)
(238,)

(1,)
(1,)
(2,)
(11,)
(8,)
(5,)
(1,)
(1,)
(59,)
(4,)
(4,)
(1,)
(1,)
(1,)
(1,)
(15,)
(1,)
(5,)
(3,)
(8,)
(1,)
(6,)
(6,)
(1,)
(8,)
(1,)
(2,)
(59,)
(1,)
(1,)
(3,)
(1,)
(1,)
(4,)
(69,)
(2,)
(2,)
(63,)
(1,)
(18,)
(3,)
(14,)
(91,)
(1,)
(4,)
(23,)
(10,)
(1,)
(1,)
(35,)
(217,)
(2,)
(303,)
(9,)
(10,)
(4,)
(1,)
(2,)
(2,)
(16,)
(3,)
(1,)
(5,)
(35,)
(1,)
(1,)
(89,)
(2,)
(1,)
(1,)
(1,)
(3,)
(1,)
(2,)
(1,)
(17,)
(2,)
(1,)
(3,)
(1,)
(1,)
(1,)
(2,)
(1,)
(1,)
(1,)
(1,)
(2,)
(4,)
(1,)
(9,)
(6,)
(11,)
(7,)
(6,)
(1,)
(3,)
(1,)
(3,)
(6,)
(1,)
(106,)
(2,)
(5,)
(7,)
(40,)
(4,)
(4,)
(3,)
(1,)
(5,)
(1,)
(3,)
(67,)
(2,)
(1,)
(4,)
(2,)
(1,)
(4,)
(2,)
(1,)
(7,)
(1,)
(1,)
(1,)
(1,)
(6,)
(2,)
(1,)
(2,)
(1,)
(3,)
(118,)
(1,)
(1,)
(7,)
(8,)
(1,)
(1,)
(1,)
(2,)
(2,)
(1,)
(1,)
(4,)
(2,)
(6,)
(16,)
(1,)
(2,)
(1,)
(1,)
(35,)
(1,)
(2,)
(1,)
(1,)
(3,)
(1,)
(2,)
(1,)
(3,)
(1,)
(7,)
(23,)
(2,)
(8,)
(3,)
(1,)
(63,)
(1,)
(1,)
(1,)
(1,)
(3,)
(2,)
(3,)
(4,)
(1,)
(253,)
(2,)
(10,)
(38,)
(25,)
(2,)
(1,)
(1,)
(1,)
(8,)
(2,)
(20,)
(2

(1,)
(1,)
(3,)
(12,)
(1,)
(21,)
(47,)
(11,)
(1,)
(2,)
(3,)
(1,)
(1,)
(3,)
(4,)
(6,)
(3,)
(2,)
(3,)
(17,)
(4,)
(2,)
(1,)
(1,)
(5,)
(1,)
(282,)
(1,)
(16,)
(1,)
(61,)
(172,)
(2,)
(5,)
(2,)
(21,)
(4,)
(56,)
(1,)
(2,)
(77,)
(13,)
(17,)
(2,)
(1,)
(1,)
(10,)
(2,)
(4,)
(1,)
(3,)
(3,)
(2,)
(1,)
(2,)
(1,)
(8,)
(18,)
(4,)
(2,)
(1,)
(2,)
(6,)
(13,)
(1,)
(1,)
(1,)
(2,)
(4,)
(13,)
(35,)
(1,)
(1,)
(50,)
(1,)
(1,)
(9,)
(2,)
(2,)
(1,)
(19,)
(4,)
(9,)
(1,)
(1,)
(1,)
(1,)
(1,)
(9,)
(7,)
(1,)
(2,)
(19,)
(1,)
(3,)
(1,)
(3,)
(1,)
(3,)
(4,)
(1,)
(3,)
(4,)
(18,)
(9,)
(5,)
(2,)
(1,)
(48,)
(11,)
(4,)
(9,)
(1,)
(2,)
(8,)
(1,)
(1,)
(2,)
(3,)
(11,)
(2,)
(2,)
(1,)
(6,)
(1,)
(22,)
(8,)
(1,)
(4,)
(1,)
(1,)
(1,)
(3,)
(16,)
(5,)
(1,)
(1,)
(4,)
(1,)
(1,)
(2,)
(8,)
(1,)
(37,)
(1,)
(1,)
(12,)
(1,)
(6,)
(1,)
(2,)
(5,)
(1,)
(1,)
(19,)
(1,)
(1,)
(15,)
(2,)
(1,)
(1,)
(3,)
(1,)
(3,)
(1,)
(2,)
(13,)
(1,)
(1,)
(1,)
(2,)
(31,)
(1,)
(3,)
(1,)
(1,)
(2,)
(1,)
(1,)
(5,)
(17,)
(3,)
(2,)
(3,)
(49,)
(4,)
(2,)
(1,)
(1,)
(1,)
(5,)
(1,)
(3

(2,)
(85,)
(4,)
(3,)
(1,)
(2,)
(13,)
(1,)
(3,)
(2,)
(1,)
(1,)
(1,)
(1,)
(2,)
(8,)
(3,)
(1,)
(16,)
(5,)
(7,)
(5,)
(5,)
(2,)
(2,)
(1,)
(3,)
(92,)
(18,)
(2,)
(5,)
(6,)
(2061,)
(42,)
(8,)
(1,)
(4,)
(4,)
(1,)
(1,)
(8,)
(2,)
(1,)
(3,)
(4,)
(1,)
(55,)
(6,)
(1,)
(8,)
(5,)
(1,)
(9,)
(2,)
(2,)
(9,)
(6,)
(2,)
(5,)
(2,)
(1,)
(3,)
(2,)
(3,)
(4,)
(4,)
(4,)
(11,)
(70,)
(3,)
(6,)
(1,)
(1,)
(3,)
(9,)
(18,)
(2,)
(147,)
(4,)
(81,)
(29,)
(5,)
(2,)
(1,)
(3,)
(36,)
(1,)
(1,)
(2,)
(15,)
(1,)
(40,)
(7,)
(1,)
(61,)
(2,)
(496,)
(1,)
(11,)
(6,)
(2,)
(1,)
(1,)
(2,)
(84,)
(1,)
(33,)
(1,)
(5,)
(2,)
(2,)
(1,)
(30,)
(12,)
(4,)
(4,)
(1,)
(32,)
(2,)
(4,)
(7,)
(3,)
(1,)
(1,)
(6,)
(26,)
(57,)
(1,)
(1,)
(4,)
(5,)
(1,)
(39,)
(1,)
(1,)
(2,)
(39,)
(2,)
(40,)
(32,)
(35,)
(6,)
(1,)
(1,)
(1,)
(1,)
(71,)
(2,)
(1,)
(14,)
(6,)
(1,)
(2,)
(1,)
(1,)
(8,)
(1,)
(5,)
(1,)
(1,)
(3,)
(1,)
(1,)
(28,)
(43,)
(1,)
(17,)
(4,)
(1,)
(13,)
(1,)
(2,)
(23,)
(12,)
(39,)
(1,)
(1,)
(1,)
(81,)
(5,)
(1,)
(1,)
(5,)
(2,)
(1,)
(1,)
(14,)
(7,)
(2,)
(20,)
(1

(1,)
(11,)
(15,)
(3,)
(3,)
(2,)
(7,)
(81,)
(3,)
(10,)
(17,)
(13,)
(4,)
(24,)
(3,)
(1,)
(2,)
(1,)
(1,)
(1,)
(1,)
(2,)
(40,)
(6,)
(3,)
(2,)
(61,)
(1,)
(11,)
(1,)
(45,)
(1,)
(1,)
(5,)
(2,)
(7,)
(1,)
(1,)
(2,)
(13,)
(1,)
(1,)
(1,)
(2,)
(39,)
(43,)
(2,)
(1,)
(5,)
(10,)
(1,)
(3,)
(2,)
(5,)
(1,)
(4,)
(1,)
(1,)
(1,)
(5,)
(8,)
(5,)
(3,)
(34,)
(12,)
(2,)
(1,)
(1,)
(1,)
(1,)
(2,)
(10,)
(9,)
(1,)
(1,)
(1,)
(29,)
(3,)
(2,)
(6,)
(1,)
(1,)
(1,)
(2,)
(8,)
(1,)
(3,)
(1,)
(3,)
(2,)
(2,)
(1,)
(1,)
(36,)
(5,)
(2,)
(1,)
(2,)
(4,)
(4,)
(8,)
(1,)
(5,)
(1,)
(9,)
(3,)
(1,)
(16,)
(1,)
(20,)
(1,)
(4,)
(1,)
(7,)
(1,)
(3,)
(1,)
(5,)
(2,)
(1,)
(31,)
(2,)
(2,)
(15,)
(2,)
(2,)
(1,)
(1,)
(1,)
(2,)
(1,)
(22,)
(3,)
(3,)
(4,)
(2,)
(9,)
(4,)
(2,)
(652,)
(3,)
(1,)
(1,)
(2,)
(7,)
(4,)
(1,)
(32,)
(1,)
(1,)
(2,)
(3,)
(65,)
(6,)
(1,)
(1,)
(1,)
(8,)
(2,)
(3,)
(7,)
(1,)
(2,)
(25,)
(15,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(5,)
(1,)
(1,)
(6,)
(16,)
(2,)
(16,)
(2,)
(1,)
(1,)
(12,)
(1,)
(1,)
(1,)
(2,)
(1,)
(4,)
(28,)
(3,)
(3,)
(23,)
(1,)

(1,)
(1,)
(4,)
(1,)
(3,)
(2,)
(2,)
(1,)
(7,)
(1,)
(8,)
(5,)
(1,)
(1,)
(61,)
(1,)
(1,)
(13,)
(2,)
(1,)
(1,)
(2,)
(1,)
(5,)
(4,)
(1,)
(19,)
(3,)
(10,)
(12,)
(2,)
(5,)
(17,)
(2,)
(4,)
(2,)
(2,)
(3,)
(1,)
(17,)
(6,)
(3,)
(1,)
(7,)
(1,)
(1,)
(2,)
(1,)
(1,)
(1,)
(3,)
(1,)
(56,)
(3,)
(7,)
(2,)
(20,)
(2,)
(2,)
(1,)
(2,)
(1,)
(1,)
(10,)
(4,)
(29,)
(2,)
(2,)
(1,)
(1,)
(4,)
(3,)
(1,)
(4,)
(1,)
(2,)
(1,)
(7,)
(3,)
(8,)
(1,)
(1,)
(8,)
(1,)
(1,)
(7,)
(1,)
(1,)
(996,)
(52,)
(4,)
(7,)
(1,)
(4,)
(2,)
(1,)
(1,)
(7,)
(8,)
(2,)
(8,)
(1,)
(4,)
(1,)
(3,)
(12,)
(2,)
(3,)
(1,)
(1,)
(1,)
(1,)
(7,)
(102,)
(1,)
(1,)
(5,)
(6,)
(1,)
(7,)
(1,)
(5,)
(2,)
(3,)
(1,)
(1,)
(3,)
(6,)
(2,)
(31,)
(8,)
(2,)
(2,)
(2,)
(3,)
(1,)
(1,)
(1,)
(1,)
(3,)
(7,)
(2,)
(62,)
(58,)
(5,)
(1,)
(1,)
(20,)
(1,)
(1,)
(1,)
(19,)
(1,)
(2,)
(1,)
(8,)
(4,)
(3,)
(3,)
(2,)
(7,)
(3,)
(8,)
(2,)
(3,)
(1,)
(33,)
(22,)
(1,)
(1,)
(7,)
(1,)
(1,)
(1,)
(1,)
(1,)
(5,)
(2,)
(1,)
(34,)
(1,)
(7,)
(2,)
(3,)
(5,)
(5,)
(2,)
(71,)
(9,)
(2,)
(5,)
(3,)
(631,)
(1,)
(9

(1,)
(2,)
(3,)
(3,)
(1,)
(15,)
(3,)
(1,)
(6,)
(2,)
(2,)
(54,)
(21,)
(2,)
(1,)
(4,)
(1,)
(2,)
(3,)
(2,)
(1,)
(3,)
(1,)
(1,)
(1,)
(3,)
(2,)
(10,)
(56,)
(1,)
(4,)
(1,)
(11,)
(1,)
(4,)
(2,)
(1,)
(3,)
(8,)
(13,)
(1,)
(1,)
(2,)
(3,)
(1,)
(6,)
(1,)
(2,)
(3,)
(3,)
(2,)
(1,)
(23,)
(2,)
(12,)
(3,)
(1,)
(1,)
(2,)
(1,)
(37,)
(4,)
(66,)
(6,)
(1,)
(2,)
(2,)
(5,)
(5,)
(3,)
(4,)
(4,)
(17,)
(2,)
(22,)
(5,)
(1,)
(1,)
(2,)
(1,)
(13,)
(3,)
(1,)
(18,)
(4,)
(2,)
(1,)
(3,)
(10,)
(8,)
(1,)
(1,)
(3,)
(2,)
(3,)
(4,)
(5,)
(3,)
(1,)
(3,)
(12,)
(4,)
(1,)
(3,)
(4,)
(1,)
(23,)
(10,)
(5,)
(253,)
(35,)
(1,)
(1,)
(7,)
(17,)
(5,)
(2,)
(12,)
(2,)
(1,)
(5,)
(1,)
(11,)
(28,)
(422,)
(3,)
(1,)
(1,)
(1,)
(3,)
(1,)
(5,)
(1,)
(11,)
(17,)
(12,)
(1,)
(1,)
(3,)
(5,)
(9,)
(1,)
(1,)
(8,)
(7,)
(5,)
(12,)
(4,)
(1,)
(12,)
(1,)
(83,)
(3,)
(2,)
(5,)
(4,)
(3,)
(1,)
(1,)
(5,)
(3,)
(1,)
(18,)
(1,)
(1,)
(6,)
(1,)
(3,)
(1,)
(4,)
(1,)
(2,)
(37,)
(7,)
(1,)
(11,)
(17,)
(23,)
(7,)
(23,)
(1,)
(1,)
(172,)
(1,)
(8,)
(2,)
(6,)
(14,)
(5,)
(1,)
(24,)
(

(9,)
(14,)
(2,)
(2,)
(3,)
(1,)
(1,)
(18,)
(2,)
(1,)
(4,)
(1,)
(1,)
(8,)
(9,)
(3,)
(6,)
(19,)
(1,)
(1,)
(1,)
(1,)
(13,)
(9,)
(2,)
(2,)
(2,)
(14,)
(16,)
(1,)
(1,)
(7,)
(2,)
(1,)
(71,)
(5,)
(13,)
(8,)
(2,)
(5,)
(1,)
(1,)
(83,)
(1,)
(2,)
(20,)
(8,)
(9,)
(1,)
(3,)
(2,)
(1,)
(3,)
(65,)
(2,)
(8,)
(1,)
(1,)
(1,)
(9,)
(2,)
(3,)
(9,)
(2,)
(1,)
(9,)
(2,)
(1,)
(1,)
(4,)
(9,)
(3,)
(1,)
(1,)
(20,)
(2,)
(2,)
(1,)
(1,)
(1,)
(2,)
(1,)
(75,)
(10,)
(1,)
(2,)
(1,)
(1,)
(1,)
(1,)
(5,)
(3,)
(1,)
(1,)
(1,)
(2,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(13,)
(1,)
(9,)
(8,)
(15,)
(9,)
(2,)
(1,)
(1,)
(1,)
(3,)
(1,)
(1,)
(2,)
(1,)
(1,)
(12,)
(1,)
(1,)
(2,)
(1,)
(2,)
(1,)
(1,)
(5,)
(1,)
(4,)
(1,)
(3,)
(1,)
(1,)
(1,)
(6,)
(3,)
(1,)
(2,)
(1,)
(4,)
(1,)
(2,)
(2,)
(24,)
(2,)
(2,)
(1,)
(2,)
(147,)
(16,)
(13,)
(1,)
(2,)
(1,)
(86,)
(8,)
(5,)
(6,)
(12,)
(1,)
(4,)
(5,)
(1,)
(1,)
(4,)
(1,)
(45,)
(9,)
(1,)
(14,)
(2,)
(17,)
(1,)
(5,)
(1,)
(30,)
(7,)
(9,)
(1,)
(2,)
(1,)
(119,)
(2,)
(2,)
(1,)
(10,)
(2,)
(1,)
(1,)
(1,)
(1,)

(2,)
(12,)
(10,)
(5,)
(110,)
(6,)
(1,)
(1,)
(13,)
(35,)
(2,)
(1,)
(2,)
(7,)
(13,)
(16,)
(3,)
(2,)
(1,)
(2,)
(1,)
(6,)
(1,)
(1,)
(1,)
(2,)
(4,)
(3,)
(3,)
(1,)
(2,)
(13,)
(4,)
(1,)
(3,)
(2,)
(1,)
(1,)
(14,)
(1,)
(87,)
(1,)
(6,)
(5,)
(7,)
(60,)
(6,)
(2,)
(1,)
(5,)
(1,)
(6,)
(26,)
(8,)
(1,)
(17,)
(13,)
(2,)
(31,)
(172,)
(12,)
(1,)
(5,)
(5,)
(1,)
(4,)
(1,)
(12,)
(2,)
(1,)
(2,)
(7,)
(1,)
(5,)
(33,)
(5,)
(1,)
(1,)
(25,)
(2,)
(36,)
(6,)
(6,)
(9,)
(1,)
(7,)
(4,)
(3,)
(31,)
(1,)
(12,)
(4,)
(3,)
(1,)
(1,)
(1,)
(40,)
(1,)
(1,)
(7,)
(4,)
(1,)
(128,)
(2,)
(18,)
(2,)
(1,)
(1,)
(1,)
(5,)
(4,)
(1,)
(191,)
(1,)
(171,)
(2,)
(3,)
(4,)
(1,)
(1,)
(11869,)
(23,)
(1,)
(2,)
(1,)
(1,)
(4,)
(4,)
(4,)
(2,)
(1,)
(1,)
(2,)
(1,)
(2,)
(543,)
(1,)
(449,)
(237,)
(19,)
(7,)
(10,)
(1,)
(2,)
(4,)
(1,)
(9,)
(1,)
(1,)
(16,)
(2,)
(13,)
(1,)
(6,)
(2,)
(17,)
(1,)
(23,)
(1,)
(31,)
(1,)
(1,)
(1,)
(1,)
(3,)
(4,)
(1362,)
(4,)
(4,)
(1,)
(13,)
(5,)
(3,)
(3,)
(58,)
(3,)
(1,)
(2,)
(12,)
(2,)
(6,)
(1,)
(4,)
(1,)
(7,)
(1,)
(8,)
(10,)
(1

(1,)
(1,)
(1,)
(30,)
(4,)
(1,)
(1,)
(3,)
(1,)
(1,)
(1,)
(2,)
(3,)
(4,)
(1,)
(3,)
(1,)
(2,)
(7,)
(2,)
(1,)
(1,)
(1,)
(1,)
(4,)
(8,)
(6,)
(1,)
(2,)
(1,)
(2,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(1,)
(43,)
(1,)
(1,)
(292,)
(4,)
(26,)
(1,)
(1,)
(7,)
(1,)
(3,)
(1,)
(2,)
(1,)
(5,)
(3,)
(3,)
(2,)
(1,)
(19,)
(2,)
(1,)
(1,)
(2,)
(1,)
(1,)
(3,)
(2,)
(1,)
(2,)
(1,)
(1,)
(2,)
(1,)
(1,)
(1,)
(2,)
(24,)
(5,)
(1,)
(1,)
(12,)
(1,)
(2,)
(1,)
(4,)
(1,)
(2,)
(1,)
(1,)
(10,)
(1,)
(5,)
(2,)
(75,)
(1,)
(1,)
(3,)
(1481,)
(3,)
(189,)
(1,)
(7,)
(11,)
(1,)
(4,)
(1,)
(2,)
(1,)
(1,)
(1,)
(1,)
(1,)
(2,)
(1,)
(3,)
(1,)
(1,)
(1,)
(1,)
(4,)
(1,)
(1,)
(2,)
(1,)
(4,)
(2,)
(2,)
(1,)
(2,)
(1,)
(8,)
(1,)
(1,)
(1,)
(1,)
(2,)
(5,)
(1,)
(10,)
(4,)
(1,)
(2,)
(1,)
(1,)
(1,)
(27,)
(7,)
(1,)
(1,)
(6,)
(22,)
(1,)
(1,)
(13,)
(1,)
(1,)
(1,)
(63,)
(1,)
(15,)
(1,)
(1,)
(2,)
(2,)
(1,)
(2,)
(1,)
(1,)
(18,)
(3,)
(1,)
(1,)
(1,)
(1,)
(1,)
(3,)
(1,)
(1,)
(1,)
(3,)
(3,)
(3,)
(1,)
(1,)
(10,)
(20,)
(8,)
(3,)
(5,)
(1,)
(2,)
(3,)
(1,)
(1,)
(2,)


### Don't run yet
Clean up your tweets! 

In [186]:
# Send data frame to database 
# last in 118
#df.to_sql(name='twt100', con=engine, if_exists='append', index=True)
print('added!')

added!


In [None]:
# session things in case I ever need to open one
session = Session(engine)
session.close()


In [344]:
import schedule
import time

In [None]:
def EveryFifteen():
    print('time')
    
schedule.every(15).minutes


### Stopwords 

In [163]:
def Tokenize(txt):
    return TextBlob(txt).words

In [140]:
dt = [word for word in tt if word not in stop_words]
dt

['mother', 'mashed', 'mnms']

In [98]:
teststring ='my mother mashed my mnms'
tt = TextBlob(teststring).words
tt

WordList(['my', 'mother', 'mashed', 'my', 'mnms'])

In [None]:
def RemoveStopWord(txt):
    stop_words = stopwords.words('english')
    no_stops = [word for word in lst if word not in stopwords]
    return 


In [139]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')