In [1]:
import tweepy
import json
import pandas as pd
import csv
import time
from datetime import datetime
from tqdm import tqdm_notebook

In [2]:
# Twitter credentials

df = pd.read_csv('twitter_credencials.csv')

consumer_key = df['API key'][0]
consumer_secret = df['API secret key'][0]
access_token = df['Access token'][0]
access_token_secret = df['Access token secret'][0]


auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

# Verify connection
try:
    redirect_url = auth.get_authorization_url()
    print('Connection established!')
except tweepy.TweepError:
    print('Error! Failed to get request token.')

Connection established!


In [3]:
#bot_function

def twitter_bot(query, date_since, nTweets, nRuns=1,output_file=True):
    
    '''This function performing a parsing of tweets for a given query'''

    df = pd.DataFrame(columns = ['username', 'account_description', 'location', 'following',
                                 'followers', 'total_tweets','total_retweets',
                                 'user_created', 'tweet_created','text', 'hashtags'])
    for i in tqdm_notebook(range(nRuns)):
        
        tweets = tweepy.Cursor(api.search, q=query, lang="en", since=date_since, tweet_mode='extended',result_type='recent').items(nTweets)
        tweets = [tweet for tweet in tweets] 
        number_of_tweets_parsed = len(tweets)
        
        for tweet in tweets:
            username = tweet.user.screen_name
            account_description = tweet.user.description
            location = tweet.user.location
            following = tweet.user.friends_count
            followers = tweet.user.followers_count
            total_tweets = tweet.user.statuses_count
            total_retweets = tweet.retweet_count
            user_created = tweet.user.created_at
            tweet_created = tweet.created_at
            
            try:
                text = tweet.retweeted_status.full_text
            except AttributeError:  # Not a retweet
                text = tweet.full_text
            hashtags = tweet.entities['hashtags']

            data = [username, account_description, location, following, followers, total_tweets,
                    total_retweets,user_created, tweet_created, text, hashtags]
            df.loc[len(df)] = data
        
        
        print('Total number of {} tweets scraped'.format(number_of_tweets_parsed))
        
        if nRuns > 1:
            time.sleep(600) #sleep time between runs ~ 10 minutes

    if output_file:
        timestamp = datetime.today().strftime('%Y%m%d_%H%M%S')
        filename = 'data/' + timestamp + '.csv'
        df.to_csv(filename, index = False)
    
    print('Procedure has been completed!')
    
    return df

In [5]:
#query = "#CoronaVirusUpdate OR #covid19Gr"
query = "#CoronaVirus"
date_since = "2020-03-10"
nTweets = 100
df = twitter_bot(query, date_since, nTweets,output_file=False)
df.head(10)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Total number of 100 tweets scraped

Procedure has been completed!


Unnamed: 0,username,account_description,location,following,followers,total_tweets,total_retweets,user_created,tweet_created,text,hashtags
0,FasihAkram11,"When you pray for others, GOD listens to you &...",Lahore,284,205,8131,52,2014-05-19 17:55:09,2020-03-21 16:10:58,"I hope some of us, men, take #coronavirus seri...","[{'text': 'coronavirus', 'indices': [48, 60]}]"
1,kr3at,An independent citizen journalist that aggrega...,,7125,17714,139321,0,2010-05-02 03:52:20,2020-03-21 16:10:58,"USA reports 2,789 new cases bringing total con...","[{'text': 'CoronaVirus', 'indices': [153, 165]..."
2,TakeANumber2325,,,66,6,2620,0,2014-12-03 16:38:57,2020-03-21 16:10:58,The media continues to fail on many levels. No...,"[{'text': 'journalismisdead', 'indices': [62, ..."
3,IprctumbaS,this is iprc tumba student's union account.,,285,155,119,1,2019-09-30 16:32:01,2020-03-21 16:10:58,Hey #RwOT \nhope you are safe and staying hom...,"[{'text': 'RwOT', 'indices': [20, 25]}, {'text..."
4,CarysJAuthor,Author. Irrepressible dreamer. Loves cupcakes ...,www.carys-jones.com,207,1445,14155,53,2009-05-26 10:46:23,2020-03-21 16:10:58,Looking for a couple of #authors with books im...,"[{'text': 'authors', 'indices': [42, 50]}, {'t..."
5,SassyDee12,"Jesus✝️Wife💍Mom👩‍👧‍👧Gram🧑🏻‍🦳,RN🧑🏼‍⚕️vet⚓️proud...",Boston,1864,1833,2338,0,2018-12-08 01:21:04,2020-03-21 16:10:58,#Coronavirus Could Overwhelm U.S. Without Urge...,"[{'text': 'Coronavirus', 'indices': [0, 12]}]"
6,xakduvwi,#standwithHK #科勞手足,,252,215,207,1057,2019-11-13 16:56:09,2020-03-21 16:10:58,#Coronavirus began in China &amp; the Communis...,"[{'text': 'Coronavirus', 'indices': [21, 33]}]"
7,Rana1996P,I Can’t Control My Attitude Because It Is Not ...,,169,51,404,2749,2019-10-20 01:01:34,2020-03-21 16:10:58,The #Coronavirus is a sledgehammer blow to our...,"[{'text': 'Coronavirus', 'indices': [21, 33]}]"
8,pankajpinjarkar,Technology | Management | Economics | Polity |...,India,267,99,454,0,2010-03-28 09:35:52,2020-03-21 16:10:58,@BBCWorld Currently 'Fake news' is more danger...,"[{'text': 'coronavirus', 'indices': [55, 67]},..."
9,jagatinfinity,,,1061,327,42809,3,2014-02-08 04:20:03,2020-03-21 16:10:58,Most scary Thread on #CoronaVirus https://t.co...,"[{'text': 'CoronaVirus', 'indices': [42, 54]}]"
