# Filtering/parsing tweets

In [48]:
import pickle
from datetime import datetime
import pandas as pd

PATH_TO_TWEETS = "../data/"

#### Pickled twitter response data used in this example: https://drive.google.com/open?id=0B2x-3KrueUw1b2ozbzM2eWVzWGs



In [11]:
tweets = pickle.load(open(PATH_TO_TWEETS + "tweet0.p", "rb"))

#### Boolean expressions for filtering

In [12]:
no_url = lambda json: not json["entities"]["urls"]
no_media = lambda json: not "media" in json["entities"]
no_mentions = lambda json: not json["entities"]["user_mentions"]

#### Filtering results for those without embedded URLS, media (pic/video), and those that include no mentions (@).

In [63]:
filtered_tweets = [tweet for tweet in tweets["statuses"] if (no_url(tweet) and no_media(tweet) and no_mentions(tweet))]

#### Create pandas dataframe for tweet data

In [78]:
data = {
    "original_tweet": [tweet["text"] for tweet in filtered_tweets],
    "parsed_tweet": [tweet["text"].replace("\n", " ") for tweet in filtered_tweets],
}
tweet_df = pd.DataFrame(data, index=[datetime.strptime(tweet["created_at"],"%a %b %d %H:%M:%S %z %Y") for tweet in filtered_tweets])

In [80]:
tweet_df.head()

Unnamed: 0,original_tweet,parsed_tweet
2017-01-25 00:04:40+00:00,Should dead people be allowed to vote?\n\n#CNN...,Should dead people be allowed to vote? #CNN D...
2017-01-24 20:38:08+00:00,Apparently #Trump is all for horse abuse and d...,Apparently #Trump is all for horse abuse and d...
2017-01-24 17:11:42+00:00,#Trump is firing #FBID James #Comey &amp; appo...,#Trump is firing #FBID James #Comey &amp; appo...
2017-01-23 05:45:42+00:00,No matter what happens #dearrussiantourist you...,No matter what happens #dearrussiantourist you...
2017-01-23 04:10:39+00:00,"Since my team didn't win, I should riot like t...","Since my team didn't win, I should riot like t..."


In [82]:
tweet_df["character_count"] = tweet_df["original_tweet"].apply(lambda original_tweet: len(original_tweet))
tweet_df.head()

Unnamed: 0,original_tweet,parsed_tweet,character_count
2017-01-25 00:04:40+00:00,Should dead people be allowed to vote?\n\n#CNN...,Should dead people be allowed to vote? #CNN D...,139
2017-01-24 20:38:08+00:00,Apparently #Trump is all for horse abuse and d...,Apparently #Trump is all for horse abuse and d...,126
2017-01-24 17:11:42+00:00,#Trump is firing #FBID James #Comey &amp; appo...,#Trump is firing #FBID James #Comey &amp; appo...,142
2017-01-23 05:45:42+00:00,No matter what happens #dearrussiantourist you...,No matter what happens #dearrussiantourist you...,132
2017-01-23 04:10:39+00:00,"Since my team didn't win, I should riot like t...","Since my team didn't win, I should riot like t...",136


In [88]:
pd.set_option("max_colwidth", 400)
tweet_df.head()

Unnamed: 0,original_tweet,parsed_tweet,character_count
2017-01-25 00:04:40+00:00,Should dead people be allowed to vote?\n\n#CNN Debates it tonite all night tune in #lolol #Sarcasm #TRUMP #SeanSpicerSays\n#Trumpismypresident,Should dead people be allowed to vote? #CNN Debates it tonite all night tune in #lolol #Sarcasm #TRUMP #SeanSpicerSays #Trumpismypresident,139
2017-01-24 20:38:08+00:00,Apparently #Trump is all for horse abuse and delays outlawing inhumane 'soring' practice. Way to go. #offtoagoodstart #sarcasm,Apparently #Trump is all for horse abuse and delays outlawing inhumane 'soring' practice. Way to go. #offtoagoodstart #sarcasm,126
2017-01-24 17:11:42+00:00,"#Trump is firing #FBID James #Comey &amp; appointing a totally legit new guy, Jim Comey. Should be an exciting 4 yrs! #Sarcasm #Disappointment","#Trump is firing #FBID James #Comey &amp; appointing a totally legit new guy, Jim Comey. Should be an exciting 4 yrs! #Sarcasm #Disappointment",142
2017-01-23 05:45:42+00:00,No matter what happens #dearrussiantourist you won't get a discount #sorrynotsorry not even if #Trump is #president #sarcasm #retail,No matter what happens #dearrussiantourist you won't get a discount #sorrynotsorry not even if #Trump is #president #sarcasm #retail,132
2017-01-23 04:10:39+00:00,"Since my team didn't win, I should riot like the liberals since I didn't get my way!! #sarcasm #Superbowl #Falcons #Patriots #NFL #Trump","Since my team didn't win, I should riot like the liberals since I didn't get my way!! #sarcasm #Superbowl #Falcons #Patriots #NFL #Trump",136


In [92]:
tweet_df.describe()

Unnamed: 0,character_count
count,20.0
mean,129.7
std,17.767801
min,62.0
25%,128.25
50%,135.0
75%,139.25
max,143.0
