# Install Twitterscrapper

Open Anaconda command and type "pip install twitterscrapper"

# Loading Libraries and Querying Tweets


In [1]:
import datetime as dt
import pandas as pd
import codecs,json
from twitterscraper import query_tweets

# Set up date range for the extraction
begindate = dt.date(2019,1,1)
enddate = dt.date.today() - dt.timedelta(days=1)

# Querying tweets using twitterscraper's query_tweets
# Query_tweets function:
'''
query_tweets(query = args.query, limit = args.limit,
                              begindate = args.begindate, enddate = args.enddate,
                              poolsize = args.poolsize, lang = args.lang)
'''

list_of_tweets = query_tweets("cibc", limit = None, begindate = begindate, enddate = enddate, poolsize = 10, lang='en')
list_of_encoded_tweets = [] # create empty list to save multiple tweets which is type of dictionary

for tweets in list_of_tweets:
    tweets.timestamp = dt.datetime.strftime(tweets.timestamp, '%Y-%m-%d %H:%M:%S')
    list_of_encoded_tweets.append(vars(tweets))

print('--Querying Ended--')



INFO: {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko'}
INFO: queries: ['cibc since:2019-01-01 until:2019-01-26', 'cibc since:2019-01-26 until:2019-02-20', 'cibc since:2019-02-20 until:2019-03-17', 'cibc since:2019-03-17 until:2019-04-12', 'cibc since:2019-04-12 until:2019-05-07', 'cibc since:2019-05-07 until:2019-06-01', 'cibc since:2019-06-01 until:2019-06-27', 'cibc since:2019-06-27 until:2019-07-22', 'cibc since:2019-07-22 until:2019-08-16', 'cibc since:2019-08-16 until:2019-09-11']
INFO: Got 3522 tweets (3522 new).
INFO: Got 7112 tweets (3590 new).
INFO: Got 11015 tweets (3903 new).
INFO: Got 14956 tweets (3941 new).
INFO: Got 18963 tweets (4007 new).
INFO: Got 23028 tweets (4065 new).
INFO: Got 27318 tweets (4290 new).
INFO: Got 32016 tweets (4698 new).
INFO: Got 36852 tweets (4836 new).
INFO: Got 41751 tweets (4899 new).


--Querying Ended--


# Save Output as DataFrame

In [8]:
# save output as df 

list_tweets = [list(elem.values()) for elem in list_of_encoded_tweets]
list_columns = list(list_of_encoded_tweets[0].keys())
df = pd.DataFrame(list_tweets, columns = list_columns)

print(df.head())


         username           fullname              user_id  \
0             BMO                BMO            222249603   
1      daveaurkov         daveaurkov  1068479286892523520   
2  JethroOfCanada  JethroTheCanadian           2726878401   
3             BMO                BMO            222249603   
4             BMO                BMO            222249603   

              tweet_id                                   tweet_url  \
0  1162150952390074368             /BMO/status/1162150952390074368   
1  1162150214058356739      /daveaurkov/status/1162150214058356739   
2  1162149984197914630  /JethroOfCanada/status/1162149984197914630   
3  1162142804895850496             /BMO/status/1162142804895850496   
4  1162142734846763009             /BMO/status/1162142734846763009   

             timestamp  timestamp_epochs  replies  retweets  likes  \
0  2019-08-15 23:55:53        1565913353        0         0      0   
1  2019-08-15 23:52:57        1565913177        0         0      1   
2 

# Save Output as CSV

In [10]:
# dataframe to csv file

df.to_csv(r'cibc_tweets.csv', index=False)

# Save Output as JSON

In [1]:
# save output as JSON file
#jsonfile = open('cibc_tweets.json','w')
#json.dump(list_of_encoded_tweets, jsonfile)

# Using CLI/Anaconda Command

It's easier to save a file as json or csv using CLI. Here's an exmaple.
<br>
For example: 
<br>
1) twitterscraper "#cibc" -l 100 -bd 2017-01-01 -ed 2017-06-01 -o cibc_tweets.csv -c
<br>
2) twitterscraper "#cibc or @cibc" -l 100 -bd 2017-01-01 -ed 2017-06-01 -p 10 -o cibc_tweets.json
<br>
<br>
For more info, go to https://github.com/taspinar/twitterscraper