# Setting up query on twython

In [3]:
from twython import Twython  
import json

# Load credentials from json file
with open("twitter_credentials.json", "r") as file:  
    creds = json.load(file)

# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])

# Create our query
query = {'q': 'BJP',  
        'result_type': 'popular',
        'count': 10,
        'lang': 'en',
        }



# Perform the search

#### Note about search from twitter docs
Before digging in, it’s important to know that the standard search API is focused on relevance and not completeness. This means that some Tweets and users may be missing from search results. If you want to match for completeness you should consider the premium or enterprise search APIs

In [4]:
import pandas as pd

# Search tweets
dict_ = {'user': [], 'date': [], 'text': [], 'favorite_count': []}  
for status in python_tweets.search(**query)['statuses']:  
    dict_['user'].append(status['user']['screen_name'])
    dict_['date'].append(status['created_at'])
    dict_['text'].append(status['text'])
    dict_['favorite_count'].append(status['favorite_count'])

# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)  
df.sort_values(by='favorite_count', inplace=True, ascending=False)  
df.head(5)  

Unnamed: 0,date,favorite_count,text,user
2,Tue Mar 19 06:52:03 +0000 2019,30016,BJP leaders must stop saying publicly that Ind...,Swamy39
0,Tue Mar 19 05:28:24 +0000 2019,17708,CMs picked by BJP\n\nDevendra Fadnavis: 48 yea...,Tejasvi_Surya
5,Mon Mar 18 13:37:40 +0000 2019,7751,Rahul is transferring millions of votes via hi...,muglikar_
4,Tue Mar 19 04:47:43 +0000 2019,5928,"Out of BJP’s 12 incumbent chief ministers, fiv...",akhileshsharma1
3,Mon Mar 18 17:18:08 +0000 2019,5461,Aaj Tak channel spots lone anti-BJP voice in a...,UnSubtleDesi


# For the Streaming API

Collect Data into csv as and when twitter sends

For fields you may want to filter on, check out https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object.html

In [25]:
from twython import TwythonStreamer  
import csv

# Filter out unwanted data
def process_tweet(tweet):  
    d = {}
    d['hashtags'] = [hashtag['text'] for hashtag in tweet['entities']['hashtags']]
    d['text'] = tweet['text']
    d['user'] = tweet['user']['screen_name']
    d['user_loc'] = tweet['user']['location']
    return d


# Create a class that inherits TwythonStreamer
class MyVanillaStreamer(TwythonStreamer):     

    # Received data
    def on_success(self, data):

        # Only collect tweets in English
        if data['lang'] == 'en':
            tweet_data = process_tweet(data)
            self.save_to_csv(tweet_data)

    # Problem with the API
    def on_error(self, status_code, data):
        print(status_code, data)
        self.disconnect()

    # Save each tweet to csv file
    def save_to_csv(self, tweet):
        with open(r'saved_tweets.csv', 'a') as file:
            writer = csv.writer(file)
            writer.writerow(list(tweet.values()))

In [31]:
from twython import TwythonStreamer  
import csv

# Filter out unwanted data
def process_tweet(tweet):  
    ## bypassing the processing
    # return tweet
    d = {}  
    d['hashtags'] = [hashtag['text'] for hashtag in tweet['entities']['hashtags']]
    d['text'] = tweet['text']
    d['user'] = tweet['user']['screen_name']
    d['user_loc'] = tweet['user']['location']
    return d


# Create a class that inherits TwythonStreamer
class MyStreamer(TwythonStreamer):
    # csv_counter
    csv_counter = 0
    
    def __init__(self, *args, **kwargs):
        self.keyword = ''
        
        if 'tweet_count' in kwargs:
            self.tweet_count = kwargs.pop('tweet_count')
        if 'csv_name' in kwargs:
            self.csv_name = kwargs.pop('csv_name')
        else:
            MyStreamer.csv_counter += 1
            self.csv_name = 'saved_tweets_'+str(MyStreamer.csv_counter)
        super().__init__(*args, **kwargs)
            
    # Received data
    def on_success(self, data):
        if self.tweet_count <= 0:
            self.disconnect()
        # Only collect tweets in English
        if data['lang'] == 'en':
            tweet_data = process_tweet(data)
            self.save_to_csv(tweet_data)
            self.tweet_count -= 1

            

    # Problem with the API
    def on_error(self, status_code, data):
        print(status_code, data)
        self.disconnect()

    # Save each tweet to csv file
    def save_to_csv(self, tweet):
        with open(self.csv_name + '_'+ self.keyword + r'.csv', 'a') as file:
            writer = csv.writer(file)
            writer.writerow(list(tweet.values()))
            

# Get the data

Must find way to gracefully interrupt streaming  
Real time may be necessary or not - depending on if we want trends. Twitter has an api to filter trends based on location - but IDK if twython has bindings to it or we have to play around with requests  
Trends may be found in a different way to - needs to be explored

In [30]:
# Modded
# Instantiate from our streaming class
stream = MyStreamer(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'],  
                    creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'], tweet_count = 100, csv_name = 'election_data')
# Start the stream
# specify the keyword to be tracked
stream.keyword = 'BJP'
stream.statuses.filter(track=stream.keyword)  

In [27]:
# Vanilla
# Instantiate from our streaming class
vanilla_stream = MyVanillaStreamer(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'],  
                    creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
# Start the stream
# specify the keyword to be tracked
keyword = 'BJP'
vanilla_stream.statuses.filter(track=keyword)  

KeyboardInterrupt: 

# Read csv

In [17]:
import pandas as pd

file_name = 'saved_tweets_2.csv'
tweets = pd.read_csv(file_name)

tweets.head()  

Unnamed: 0,RT @plittooo: #ChowkidaarNahiJawabdarChahiye Tribute to our 1st Prime Minister Pt. Jawaharlal Nehru Ji🙏🙏. Past 5 years of BJP is summed u…,Aazaad_India,Unnamed: 2,['ChowkidaarNahiJawabdarChahiye']
0,RT @abpnewstv: #LokSabhaElections2019 : Chhatt...,AdityaJha93,"New Delhi, India","['LokSabhaElections2019', 'BJP']"
1,RT @kanimozhi: Twitter directly campaigning fo...,v_jai_ho,,['CambridgeAnalytica']
2,"Cong-JDS to campaign jointly, vow to reduce BJ...",State_Times,Jammu | Kashmir | Delhi,[]
3,"RT @OnlyNakedTruth: Ok folks , time to call Ra...",always_salil,,[]
4,"RT @thenglishpost: #EC notice, police complain...",onlynishank,India,"['EC', 'BJP', 'LokSabhaElections2019']"
