## Using the Tweety module in Python for retrieving tweets on the handle "@midasIIITD"

In [None]:
# Importing required modules
import tweepy
from tweepy import Cursor

# Modules for storing in jsonl file
import json
import jsonlines

# For displaying the results in a dataframe
import pandas as pd

# tqdm module for progress bar
from tqdm import tqdm

In [None]:
# Twitter's various tokens(please enter your tokens for checking)
access_token = 'ACCESS_TOKEN'
access_token_secret = 'ACCESS_TOKEN_SECRET'
consumer_key = 'CONSUMER_KEY'
consumer_secret = 'CONSUMER_SECRET_KEY'

In [None]:
# Authentication with the Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
print('Authorised!')

In [None]:
# Handle name by which to search
uname = '@midasIIITD'

In [None]:
# Cursor implementation of the Twitter API for pagination

# Query for the twitter search. It returns all tweets with screename as 'midas@IIITD'
tweets = Cursor(api.user_timeline, screen_name = uname).items()

In [None]:
# The jsonl file to which the content will be written 
writer = jsonlines.open("content_dump.jsonl", "w")
writer.close()

In [None]:
# Iterating over all the tweets and storing required content in a dictionary 'store_dict'
for tweet in tqdm(tweets):
    # Fetching the number of images in a tweet
    image_count = 0
    for media in tweet.entities.get("media",[{}]):
        if media.get("type",None) == "photo":
            image_count += 1

    store_dict = {'text': tweet._json['text'], "date_time": tweet._json['created_at'],
    'favorite_count': tweet._json['favorite_count'], 'retweet_count': tweet._json['retweet_count'],
    'image_count': image_count}
    
    # Writing(appending) into the file the values stored in 'store_dict'
    with jsonlines.open("content_dump.jsonl","a") as writer:
        writer.write(store_dict)

In [None]:
# Identifying the keys to feed into a dataframe
keys = ['text', 'date_time', 'favorite_count', 'retweet_count', 'image_count']

# Creating a dataframe to display in a tabular format
display_df = pd.DataFrame(columns=keys)

print("Reading from jsonl file......")


with open("content_dump.jsonl", "r") as tweets:
    # Using the Reader object to iterate over the tweets
    for item in tqdm(jsonlines.Reader(tweets)):
        # Storing each dictionary into a dataframe
        temp_df = pd.DataFrame(item, columns=keys, index=[0])
        # Appending this dataframe to the main dataframe
        display_df = display_df.append(temp_df, ignore_index=True)

# Displaying only the first 30 records stored in the jsonl file
print(display_df.head(30))