Author 1 : Chandan Awasthi (cawasthi@deloitte.com) <br>
Author 2 : Srikanth Reddy Metlakunta (srmetlakunta@deloitte.com)<br>
Description : The script downloads the specified number of tweets as per No_of_tweet mentioned in the tweet_parm.json along with location, screen_name,retweet_count,source of tweet and created_at (time of tweet).<br>
Inputs : tweet_parm.json<br>
Output : tweet_hashtag.csv<br>

In [None]:
#importing pakages
import tweepy                       # To access twitter API
import pandas as pd                 # to do data manipulation
import os                           # To access os level files and folders
import json                         # to work with json objects 
from time import gmtime, strftime   # To change time format, datatime related manipulations

In [None]:
### print the current GMT time
print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))

#read parameters from json file
json_file_path = os.getcwd()

#reading the paramters from the json file
with open(json_file_path+"\\tweet_parm.json") as json_file:
    config = json.load(json_file)

In [None]:
## reading parameters from json file 

# to load file path from json file
file_path = str(config['file_path'])
# to read no_of_tweet count from json file
No_of_tweet = int(config['No_of_tweet'])
# to load consumer key from json file
consumer_key = str(config['consumer_key'])
# to load consumer key from json file
consumer_secret = str(config['consumer_secret'])
# to load access token from json file
access_token = str(config['access_token'])
# to load access token secret key from json file
access_token_secret = str(config['access_token_secret'])
# to load tweet hashtag key from json file
tweeter_hashtag = str(config['tweeter_hashtag'])
# to load tweet handle series from json file
tweeter_handle = str(config['tweeter_handle'])
# to load tweet start from json file
tweet_start = str(config['tweet_start'])
# to load tweet end from json file
tweet_end = str(config['tweet_end'])
# to load tweet count from json file
tweet_count = config['tweet_count']

In [None]:
#creatig authentication using Tweeter credetials
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

In [None]:
#declaring empty dataframe and list to append values
df = pd.DataFrame()
created_at = []
tweet_location = []
Screen_Name = []
Retweet_count = []
Tweet_Source = []
tweet_text = []

In [None]:
#extract tweets for a hashtag
print("Reading Tweets")

try:
    ### connecting to twitter API
    tweets = tweepy.Cursor(api.search,
                           q=tweeter_hashtag,
                           count=tweet_count,
                           lang="en",
                           since=tweet_start,
                           until=tweet_end).items(No_of_tweet)
    
    ### putting extracted tweets into a variable
    users_locs = [[tweet.user.screen_name,
                   tweet.retweet_count,
                   tweet.source,
                   tweet.created_at,
                   tweet.text.encode('utf-8'),
                   tweet.user.location] for tweet in tweets]
    
    ## looping using for and assinging to a dataframe
    for i in range(len(users_locs)):
        created_at.append(users_locs[i][3])
        tweet_location.append(users_locs[i][-1])
        Screen_Name.append(users_locs[i][0])
        Retweet_count.append(users_locs[i][1])
        Tweet_Source.append(users_locs[i][2])
        tweet_text.append(users_locs[i][4])
    df['created_at'] = created_at
    df['tweet_location'] = tweet_location
    df['Screen_Name'] = Screen_Name
    df['Retweet_count'] = Retweet_count
    df['Tweet_Source'] = Tweet_Source
    df['tweet_text'] = tweet_text

    print("writing tweets to file")
    file_path = file_path+"_"+tweet_end+".csv"
    print('file path is :-', file_path)
    df.to_csv(file_path, index=None, encoding='utf-8')

except tweepy.TweepError as e:
        if e == "[{u'message': u'Rate limit exceeded', u'code': 88}]":
            time.sleep(60*5) #Sleep for 5 minutes
        else:
            print(e)

In [None]:
#Extract tweet from a twitter handle
print("Reading Tweets")

for handle in tweeter_handle:
    try:
        tweets = tweepy.Cursor(api.user_timeline,
                               id=handle,
                               #until=tweet_since, #doesn't work
                               ).items(No_of_tweet)
    except tweepy.TweepError as e:
        print(e)
        if e == "[{u'message': u'Rate limit exceeded', u'code': 88}]":
            time.sleep(60*5) #Sleep for 5 minutes
            
        elif e == "[{u'code': 401}]": #locked twitter accounts
            continue
        
        else:
            print(e)


    users_locs = [[tweet.user.screen_name,
                   tweet.retweet_count,
                   tweet.user.verified,
                   tweet.created_at,
                   tweet.text.encode('utf-8'),
                   tweet.user.location,
                   tweet.favorite_count,
                   ] for tweet in tweets]

    for i in range(len(users_locs)):
        created_at.append(users_locs[i][3])
        tweet_location.append(users_locs[i][5])
        Screen_Name.append(users_locs[i][0])
        Retweet_count.append(users_locs[i][1])
        Verified_Handle.append(users_locs[i][2])
        tweet_text.append(users_locs[i][4])
        Fav_count.append(users_locs[i][6])
    
df['created_at'] = created_at
df['tweet_location'] = tweet_location
df['Screen_Name'] = Screen_Name
df['Retweet_count'] = Retweet_count
df['Fav_count'] = Fav_count
df['Verified_Handle'] = Verified_Handle
df['tweet_text'] = tweet_text


print("writing tweets to file")
    file_path = file_path+"_handle"+".csv"
    print('file path is :-', file_path)
    df.to_csv(file_path, index=None, encoding='utf-8')
print("File is Ready")