# Fetching tweets from Twitter API using Twarc and Tweepy

Twitter API V2 Official Documentation: Get to know more about parameters <br>
[Link](https://github.com/twitterdev/getting-started-with-the-twitter-api-v2-for-academic-research/blob/main/modules/5-how-to-write-search-queries.md)

In [22]:
# import libraries 
import matplotlib.pyplot as plt 
from twarc import Twarc2,expansions 
import tweepy 
import configparser
import time
import pandas as pd

## Authentication 

### 1. Read Configs 

By using `configparser` library, the authorization files can be stored seperatly without hurting the availibility of sharing working files. 

In [13]:
config = configparser.ConfigParser(interpolation=None)
config. read('config.ini')

api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']
bearer_token = config['twitter']['bearer_token']

In [3]:
print(bearer_token)

AAAAAAAAAAAAAAAAAAAAAOoRhwEAAAAAoltExyr68LLCseYjV%2FP8C14YO%2Fs%3DOhgscT7YtgWhudmS3J1bDYvY3iIE2LCnf7NF62J5QMCUPppwmg


### 2. Authenticate 
Authenticate the account/app to the Twitter API. 

In [19]:
from multiprocessing.connection import wait


auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)

# create the API instance
twarc2_client = tweepy.API(auth)

# or use Tweepy 
client = tweepy.Client(bearer_token = bearer_token, wait_on_rate_limit= True)

## Fetching Data

### 1. user search

In [16]:
user = 'Nike'

# fetch the last 100 tweets 
limit = 300 

# prevent to reach the cap 
tweets = tweepy.Cursor(twarc2_client.user_timeline, 
              screen_name = user, 
              count = 200,
              tweet_mode = 'extended').items(limit)

# tweets = api.user_timeline(
#                             screen_name = user, 
#                             cont = limit,
#                             tweet_mode = 'extended', # prevent the API to truncate only 140 characters
#                             ) 

# create DataFrame
columns = ['user_id','user_location','user_name','text']
data = []

for tweet in tweets: 
    # the tab provides multiple parameter selection 
    data.append([tweet.user.id, 
                tweet.user.location, 
                tweet.user.screen_name, 
                tweet.full_text])

df1 = pd.DataFrame(data, columns = columns)
df1.head()

Unnamed: 0,user_id,user_location,user_name,text
0,415859364,"Beaverton, Oregon",Nike,"@kam_htl Hey, @kam_htl nous sommes navrés de v..."
1,415859364,"Beaverton, Oregon",Nike,"@sarahmaispasla Eh bien, tu as été chanceuse ! 😀"
2,415859364,"Beaverton, Oregon",Nike,"@MeganTo09520759 Hey Megan, sorry to hear. For..."
3,415859364,"Beaverton, Oregon",Nike,"@sarahmaispasla Hello,\nL'offre anniversaire e..."
4,415859364,"Beaverton, Oregon",Nike,"@Skyblues2i Hello,\nL'offre anniversaire est e..."


### 2. keywords or hashtags search 

In [18]:
keywords = 'sneakers'
limit = 500 # why must add limit? 

tweets_keyword_search = tweepy.Cursor(
              twarc2_client.search_tweets, 
              q= keywords, 
              count = 100,
              tweet_mode = 'extended').items(limit)

# create DataFrame
columns = ['author_id','author_name','created_time','location','text']
data = []

for tweet in tweets_keyword_search: 
    data.append([tweet.author.id, 
                 tweet.author.name,
                 # time can be more granualler depending on the need 
                 tweet.created_at,
                 tweet.user.location, 
                 tweet.full_text])

df2 = pd.DataFrame(data, columns = columns)
df2.head()

Unnamed: 0,author_id,author_name,created_time,location,text
0,507865095,nerdom,2022-10-05 21:31:27+00:00,"Space, CA",Nike LeBron 19 Chosen 1 One Allstars Mens Size...
1,3244118543,",,",2022-10-05 21:31:13+00:00,,RT @CaminoTV: La meilleure sneakers Lacoste ju...
2,3270330139,Wakanda Forever.,2022-10-05 21:31:11+00:00,wakanda,"RT @rogxmor: Fuck the snkrs app, fuck the nike..."
3,1470482906325962758,JD,2022-10-05 21:31:10+00:00,,RT @PWCCmarketplace: An unreal view of Kobe Br...
4,1559926366212182021,AY-Tex,2022-10-05 21:31:01+00:00,Abuja,RT @xaynab_jay: Sneakers shopping 🤩\nAvailable...


### 3. Full-Archive Search
Get more than 500 Tweets at a time using `paginator`, which will automately go for the next page.

[Reference](https://www.youtube.com/watch?v=rQEsIs9LERM)

In [35]:
# set query, parameters changable 
input_query = 'sneakers -is:retweet lang:en place_country:US'

def full_archive_search(input_query): 
    result = []

    for response in tweepy.Paginator(client.search_all_tweets, 
                                    # doesn't include the retweet
                                    query = input_query,
                                    user_fields = ['username', 'public_metrics', 'description', 'location'],
                                    tweet_fields = ['created_at', 'geo', 'public_metrics', 'text'],
                                    expansions = 'author_id',
                                    start_time = '2022-08-01T00:00:00Z',
                                    end_time = '2022-09-30T23:59:59Z',
                                    max_results=500, limit = 20):
    # the `search_all_tweets` has per second limit, therefore should wait for a second before the code progresses                              
        time.sleep(1)
        result.append(response)

    return result

tweepy_query = full_archive_search(input_query)

In [37]:
def full_archive_search_df(tweepy_query):

    """
    Function for reformat the function the query into dataframe.
    --------------------
    Input: the tweepy query
    Output: pandas dataframe format of query result

    """
    result = []
    user_dict = {}

    # loop through each response object
    for response in tweepy_query:
        
        # take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
        for user in response.includes['users']:
            user_dict[user.id] = {'username': user.username, 
                                'followers': user.public_metrics['followers_count'],
                                'tweets': user.public_metrics['tweet_count'],
                                'description': user.description,
                                'location': user.location
                                }

        # for each tweet, find the author information                        
        for tweet in response.data:
            author_info = user_dict[tweet.author_id]

            # put all of the information we want to keep in a single dictionary for each tweet 
            result.append({'author_id': tweet.author_id, 
                        'username': author_info['username'],
                        'author_followers': author_info['followers'],
                        'author_tweets': author_info['tweets'],
                        'author_description': author_info['description'],
                        'author_location': author_info['location'],
                        'text': tweet.text,
                        'created_at': tweet.created_at,
                        'retweets': tweet.public_metrics['retweet_count'],
                        'replies': tweet.public_metrics['reply_count'],
                        'likes': tweet.public_metrics['like_count'],
                        'quote_count': tweet.public_metrics['quote_count']
                        })

    tweepy_query_df = pd.DataFrame(result)
    return tweepy_query_df

sneakers_df = full_archive_search_df(tweepy_query)

In [38]:
sneakers_df.shape

(2783, 12)

In [39]:
sneakers_df.head()

Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count
0,1493612427304488965,1KHABYS,127,2163,Be ¥a $elf or Bring ¥a $hooters! 👀🎮 BYS LLC™️ ...,Around da corner⭐️,Privileged never paid over box price for sneakers,2022-09-30 23:28:41+00:00,0,0,0,0
1,29560488,CarrieMae_,344,12286,"the devil works hard, kris jenner works harder","Brooklyn, NY",Trying to have a peaceful evening and at the r...,2022-09-30 23:21:03+00:00,0,0,0,0
2,22680919,MattHalfhill,13056,533,Founder + CEO of @nicekicks. DMs are open. sz 11,"Austin, TX",Drop sneakers at a job fair if you don’t want ...,2022-09-30 23:13:22+00:00,112,46,1197,32
3,997270922976481282,JEFF_SON_334,609,3805,"Husband, Father to a son ,COOL MF in General U...","Montgomery, AL",I hate the fact that Puma ain’t got no sneaker...,2022-09-30 22:36:38+00:00,0,0,0,0
4,101915799,kwamemorgan,847,25441,Follow my IG : @Kwamemorgan,"ÜT: 38.899236,-76.797741",@1_Bundles You know my lil buddies gone geek t...,2022-09-30 22:33:35+00:00,0,1,0,0
