### News Mood

In this activity, we are going to review Tweepy.

BBC, CBS, CNN, Fox, and New York times
- - -

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import tweepy
import json
import time
from datetime import datetime
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
from config import (consumer_key, consumer_secret, 
                    access_token, access_token_secret)

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [2]:
# "Real Person" Filters
min_tweets = 5
max_tweets = 10000
max_followers = 2500
max_following = 2500
lang = "en"

In [6]:
api.search("gavin newsom")

{'search_metadata': {'completed_in': 0.054,
  'count': 15,
  'max_id': 1006725143178153985,
  'max_id_str': '1006725143178153985',
  'next_results': '?max_id=1006719468742103039&q=gavin%20newsom&include_entities=1',
  'query': 'gavin+newsom',
  'refresh_url': '?since_id=1006725143178153985&q=gavin%20newsom&include_entities=1',
  'since_id': 0,
  'since_id_str': '0'},
 'statuses': [{'contributors': None,
   'coordinates': None,
   'created_at': 'Wed Jun 13 02:29:12 +0000 2018',
   'entities': {'hashtags': [],
    'symbols': [],
    'urls': [{'display_url': 'yahoo.com/news/californi…',
      'expanded_url': 'https://www.yahoo.com/news/californias-gavin-newsom-wants-lead-way-post-bernie-post-hillary-party-160041273.html?.tsrc=fauxdal',
      'indices': [85, 108],
      'url': 'https://t.co/RcZ9rA7xbb'},
     {'display_url': 'twitter.com/i/web/status/1…',
      'expanded_url': 'https://twitter.com/i/web/status/1006725143178153985',
      'indices': [117, 140],
      'url': 'https://t.co/qJ

In [3]:
# Search for People Tweeting about Mark Hamill
search_term = "Gavin Newsom"

# Create variable for holding the oldest tweet
oldest_tweet = None

# List to hold unique IDs
unique_ids = []
results = []

# Counter to keep track of the number of tweets retrieved
counter = 0

# Loop through 5 times (total of 500 tweets)
for x in range(5):

    # Retrieve 100 most recent tweets -- specifying a max_id
    public_tweets = api.search(search_term, 
                               count=100,
                               result_type="recent", 
                               max_id=oldest_tweet)

    # Print Tweets
    for tweet in public_tweets["statuses"]:
        
        tweet_id = tweet_id = tweet["id"]
        
        # Use filters to check if user meets conditions
        if (tweet["user"]["followers_count"] < max_followers and
            tweet["user"]["statuses_count"] > min_tweets and
            tweet["user"]["statuses_count"] < max_tweets and
            tweet["user"]["friends_count"] < max_following and
                tweet["user"]["lang"] == lang):

            # Print the username
            #print(tweet["user"]["screen_name"])

            # Print the tweet id
            #print(tweet["id_str"])

            # Print the tweet text
            #print(tweet["text"])
            #print()

            # Print the tweet date
            #print(tweet["created_at"])
            #print()
            
            # Append tweet_id to ids list if it doesn't already exist
            # This allows checking for duplicate tweets
            if tweet_id not in unique_ids:
                unique_ids.append(tweet_id)
                
                

            # Increase counter by 1
            counter += 1
            results.append({"User": tweet["user"]["screen_name"],
                           "Tweet": tweet["text"],
                           "Date": tweet["created_at"],
                           "Tweet ID Str": tweet["id_str"],
                           "Tweet ID": tweet["id"],
                           "Candidate": search_term})
            
        # Reassign the the oldest tweet (i.e. the max_id)
        # Subtract 1 so the previous oldest isn't included
        # in the new search
        oldest_tweet = tweet_id - 1

In [5]:
len(results)

127

In [38]:
results_pd = pd.DataFrame.from_dict(results)
results_pd.head()

Unnamed: 0,Candidate,Date,Tweet,Tweet ID,Tweet ID Str,User
0,Gavin Newsom,Wed Jun 13 02:19:21 +0000 2018,RT @KamVTV: I just paid OVER $4 for gas!\n\nJe...,1006722662549344256,1006722662549344256,J44morris
1,Gavin Newsom,Wed Jun 13 02:11:51 +0000 2018,"@Kate1Kincaid @skramerbyu_82 Unfortunately, it...",1006720776882479106,1006720776882479106,Leslie97606493
2,Gavin Newsom,Wed Jun 13 02:07:27 +0000 2018,RT @KamVTV: I just paid OVER $4 for gas!\n\nJe...,1006719669108199430,1006719669108199430,MySuebell42
3,Gavin Newsom,Wed Jun 13 02:04:10 +0000 2018,The loudest cheers were for KD (w/ MVP chants)...,1006718841643077632,1006718841643077632,_emptyyourmind
4,Gavin Newsom,Wed Jun 13 01:53:07 +0000 2018,RT @KamVTV: I just paid OVER $4 for gas!\n\nJe...,1006716060358225920,1006716060358225920,tblbkb


In [26]:
# Print total number of tweets retrieved
print(counter)

129


In [27]:
# Print the number of unique ids retrieved
print(len(unique_ids))


129


In [39]:
# Export to CSV
file_name = str(time.strftime("%m-%d-%y")) + "-Tweets.csv"
results_pd.to_csv(file_name, encoding="utf-8")