# Fetching tweets from Twitter API using Tweepy

Twitter API V2 Official Documentation: Get to know more about parameters <br>
[Link](https://github.com/twitterdev/getting-started-with-the-twitter-api-v2-for-academic-research/blob/main/modules/5-how-to-write-search-queries.md)

In [1]:
# import libraries 
import matplotlib.pyplot as plt 
from twarc import Twarc2,expansions 
import tweepy 
import configparser
import time
import pandas as pd

## Authentication 

### 1. Read Configs 

By using `configparser` library, the authorization files can be stored seperatly without hurting the availibility of sharing working files. 

In [2]:
config = configparser.ConfigParser(interpolation=None)
config. read('config.ini')

api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']
bearer_token = config['twitter']['bearer_token']

### 2. Authenticate 
Authenticate the account/app to the Twitter API. 

In [3]:
from multiprocessing.connection import wait


auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)

# create the API instance
twarc2_client = tweepy.API(auth)

# or use Tweepy 
client = tweepy.Client(bearer_token = bearer_token, wait_on_rate_limit= True)

## Fetching Data

### 1. user search

In [4]:
user = 'Nike'

# fetch the last 100 tweets 
limit = 300 

# prevent to reach the cap 
tweets = tweepy.Cursor(twarc2_client.user_timeline, 
              screen_name = user, 
              count = 200,
              tweet_mode = 'extended').items(limit)

# tweets = api.user_timeline(
#                             screen_name = user, 
#                             cont = limit,
#                             tweet_mode = 'extended', # prevent the API to truncate only 140 characters
#                             ) 

# create DataFrame
columns = ['user_id','user_location','user_name','text']
data = []

for tweet in tweets: 
    # the tab provides multiple parameter selection 
    data.append([tweet.user.id, 
                tweet.user.location, 
                tweet.user.screen_name, 
                tweet.full_text])

df1 = pd.DataFrame(data, columns = columns)
df1.head()

Unnamed: 0,user_id,user_location,user_name,text
0,415859364,"Beaverton, Oregon",Nike,@Skyblues2i C'est super dans ce cas ! Tr√®s bon...
1,415859364,"Beaverton, Oregon",Nike,"@Moha_lassel Salut, nous sommes navr√©s de voir..."
2,415859364,"Beaverton, Oregon",Nike,"@anissa_zaraoui Bonjour, peux-tu venir en DM e..."
3,415859364,"Beaverton, Oregon",Nike,"@Skyblues2i Bonjour, peux-tu venir en DM et no..."
4,415859364,"Beaverton, Oregon",Nike,@deejaysoulrebel Tell your son Nike says to ke...


### 2. keywords or hashtags search 

In [5]:
keywords = 'sneakers'
limit = 500 # why must add limit? 

tweets_keyword_search = tweepy.Cursor(
              twarc2_client.search_tweets, 
              q= keywords, 
              count = 100,
              tweet_mode = 'extended').items(limit)

# create DataFrame
columns = ['author_id','author_name','created_time','location','text']
data = []

for tweet in tweets_keyword_search: 
    data.append([tweet.author.id, 
                 tweet.author.name,
                 # time can be more granualler depending on the need 
                 tweet.created_at,
                 tweet.user.location, 
                 tweet.full_text])

df2 = pd.DataFrame(data, columns = columns)
df2.head()

Unnamed: 0,author_id,author_name,created_time,location,text
0,1279230639544373248,magnito,2022-10-10 14:54:33+00:00,,RT @zenmagafrica: Sneakers and Tracksuits on S...
1,497299265,Twini,2022-10-10 14:54:28+00:00,,RT @martinicandoll: Les sneakers üò≠ je trouve √ß...
2,1569548409719394304,CozeUnisex,2022-10-10 14:54:21+00:00,,RT @zenmagafrica: Sneakers and Tracksuits on S...
3,2879236727,RA ‚öΩ,2022-10-10 14:54:17+00:00,,RT @zenmagafrica: Sneakers and Tracksuits on S...
4,1314504233207525376,Susmoy Hredoy üî∂,2022-10-10 14:54:14+00:00,"Mymensingh, Bangladesh",RT @amazyio: üöÄ GIVEAWAY 100+ NFT SNEAKERS AMAZ...


### 3. Full-Archive Search
Get more than 500 Tweets at a time using `paginator`, which will automately go for the next page.

[Reference](https://www.youtube.com/watch?v=rQEsIs9LERM)

In [6]:
# set query, parameters changable 
input_query = 'sneakers -is:retweet lang:en place_country:US'

def full_archive_search(input_query): 
    """
    Function for API query with input parameters
    ----------------------
    Input: query 
    Output: API request result

    """
    result = []

    for response in tweepy.Paginator(client.search_all_tweets, 
                                    # doesn't include the retweet
                                    query = input_query,
                                    user_fields = ['username', 'public_metrics', 'description', 'location'],
                                    tweet_fields = ['created_at', 'geo', 'public_metrics', 'text'],
                                    expansions = 'author_id',
                                    start_time = '2022-08-01T00:00:00Z',
                                    end_time = '2022-09-30T23:59:59Z',
                                    max_results=500, limit = 20):
    # the `search_all_tweets` has per second limit, therefore should wait for a second before the code progresses                              
        time.sleep(1)
        result.append(response)

    return result

tweepy_query = full_archive_search(input_query)

In [7]:
def full_archive_search_df(tweepy_query):

    """
    Function for reformat the function the query into dataframe.
    --------------------
    Input: the tweepy query result 
    Output: pandas dataframe format of query result

    """
    result = []
    user_dict = {}

    # loop through each response object
    for response in tweepy_query:
        
        # take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
        for user in response.includes['users']:
            user_dict[user.id] = {'username': user.username, 
                                'followers': user.public_metrics['followers_count'],
                                'tweets': user.public_metrics['tweet_count'],
                                'description': user.description,
                                'location': user.location
                                }

        # for each tweet, find the author information                        
        for tweet in response.data:
            author_info = user_dict[tweet.author_id]

            # put all of the information we want to keep in a single dictionary for each tweet 
            result.append({'author_id': tweet.author_id, 
                        'username': author_info['username'],
                        'author_followers': author_info['followers'],
                        'author_tweets': author_info['tweets'],
                        'author_description': author_info['description'],
                        'author_location': author_info['location'],
                        'text': tweet.text,
                        'created_at': tweet.created_at,
                        'retweets': tweet.public_metrics['retweet_count'],
                        'replies': tweet.public_metrics['reply_count'],
                        'likes': tweet.public_metrics['like_count'],
                        'quote_count': tweet.public_metrics['quote_count']
                        })

    tweepy_query_df = pd.DataFrame(result)
    return tweepy_query_df

sneakers_df = full_archive_search_df(tweepy_query)

In [8]:
sneakers_df.shape

(2786, 12)

In [9]:
sneakers_df.head()

Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count
0,1493612427304488965,1KHABYS,128,2236,Be ¬•a $elf or Bring ¬•a $hooters! üëÄüéÆ BYS LLC‚Ñ¢Ô∏è ...,Around da corner‚≠êÔ∏è,Privileged never paid over box price for sneakers,2022-09-30 23:28:41+00:00,0,0,0,0
1,29560488,CarrieMae_,344,12287,"the devil works hard, kris jenner works harder","Brooklyn, NY",Trying to have a peaceful evening and at the r...,2022-09-30 23:21:03+00:00,0,0,0,0
2,22680919,MattHalfhill,13052,540,Founder + CEO of @nicekicks. Product of Fresno.,"Austin, TX",Drop sneakers at a job fair if you don‚Äôt want ...,2022-09-30 23:13:22+00:00,111,45,1196,32
3,997270922976481282,JEFF_SON_334,614,3830,"Husband, Father to a son ,COOL MF in General U...","Montgomery, AL",I hate the fact that Puma ain‚Äôt got no sneaker...,2022-09-30 22:36:38+00:00,0,0,0,0
4,101915799,kwamemorgan,847,25453,Follow my IG : @Kwamemorgan,"√úT: 38.899236,-76.797741",@1_Bundles You know my lil buddies gone geek t...,2022-09-30 22:33:35+00:00,0,1,0,0


### 4. Full-Archive Search for brands 

#### 4.1 Nike

In [10]:
# Nike 
input_query = 'nike -is:retweet lang:en place_country:US'
nike_query = full_archive_search(input_query)
nike_df = full_archive_search_df(nike_query)

In [11]:
print(nike_df.shape)
nike_df.head()

(7877, 12)


Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count
0,87144412,GarrettKGray,376,12086,Land Economist & Economic Development Speciali...,"Coos Bay, OR",@ShaneDaleAZ Totally. The Nike uniforms since ...,2022-09-30 23:46:59+00:00,0,0,0,0
1,492330913,LockDown_Lopes,470,97876,"@nicekicks, sports, & memes | University of Ar...","Scottsdale, AZ",Hats off to Tom Sachs and the marketing team a...,2022-09-30 23:43:45+00:00,0,0,0,0
2,37706001,RyanGensler,6683,13623,315 Born and Raised: Assistant Basketball Coac...,"Champaign, IL",The look on @makiracook face! üòÇ \n\nThanks @Ni...,2022-09-30 23:38:15+00:00,1,1,27,0
3,17417435,ShellzBoss,564,22093,"#TeamLibra #TeamLesbian Hibernating, should be...","Maryland, Michigan",Check out my new pickup from Nike‚Å† SNKRS: http...,2022-09-30 23:35:28+00:00,0,1,0,0
4,853714067692806144,DJKingJam,395,3774,Jordan Shoe collector || DJ Jamez || Music Pro...,"Seattle, WA",@jameslfreelance @Jumpman23 @Nike @nikestore O...,2022-09-30 23:15:57+00:00,0,0,2,0


In [12]:
nike_df['text'][0]

'@ShaneDaleAZ Totally. The Nike uniforms since have replaced a distinguished/identifiable look to enhance their own brand at the expense of Arizona.'

### 4.2 New Balance

In [13]:
# newbalance 
input_query = 'newbalance -is:retweet lang:en place_country:US'
nb_query = full_archive_search(input_query)
nb_df = full_archive_search_df(nb_query)

In [14]:
print(nb_df.shape)
nb_df['text'][200]

(239, 12)


'The sun drained me, today. But we came out and I got it done. #newbalance #runinrabbit #suunto9 @ Joe B Freeman Park https://t.co/Z6EhN0eLG6'

### 4.3 Adidas

In [15]:
# adidas  
input_query = 'adidas -is:retweet lang:en place_country:US'
adidas_query = full_archive_search(input_query)
adidas_df = full_archive_search_df(adidas_query)

In [16]:
print(adidas_df.shape)
adidas_df['text'][178]

(2347, 12)


'You don‚Äôt need adidas or a zebra to see my stripes @Mr_Camron üå™üî•üî•üî•'

### 4.4 Converse 

In [17]:
# converse  
input_query = 'converse -is:retweet lang:en place_country:US'
converse_query = full_archive_search(input_query)
converse_df = full_archive_search_df(converse_query)

In [18]:
print(converse_df.shape)
converse_df['text'][169]

(942, 12)


'@carizmen @Converse Me encantaron ‚ù§Ô∏è'

### 4.5 Reebok

In [19]:
# reebok  
input_query = 'reebok -is:retweet lang:en place_country:US'
reebok_query = full_archive_search(input_query)
reebok_df = full_archive_search_df(reebok_query)

In [20]:
print(reebok_df.shape)
reebok_df['text'][152]

(243, 12)


'@chinababee Well if you‚Äôre only talking about one song vs the other then sure, but lets talk about his Reebok collection though, his new crackhead appearance, his new bitch, him continuously claiming he‚Äôs #1 worldwide when we all know who is lmaooo I can keep going.'

In [21]:
len(reebok_df)

243

### 4.6 Combine the info 

In [22]:
# text count of each brand 
d = {'brand':['Nike','New Balance','Adidas','Converse','Reebok'],
     'count':[len(nike_df), len(nb_df), len(adidas_df),len(converse_df),len(reebok_df)]}

brand_text_count = pd.DataFrame(columns=['brand','count'], data = d)

brand_text_count.sort_values(by = ['count'],ascending = False)

Unnamed: 0,brand,count
0,Nike,7877
2,Adidas,2347
3,Converse,942
4,Reebok,243
1,New Balance,239


In [50]:
# full dataset 
df = pd.concat([nike_df, adidas_df, nb_df, converse_df, reebok_df], axis = 0 )
df.shape

(11648, 12)

In [51]:
# export the five brands twitter data in the data folder 
os.chdir('C:\\Users\\hs324\\OneDrive\\Desktop\\Class_Files\\06_2022Fall\\04_Practicum\\Quantilope_Core\\data')

df.to_csv('five_brands_text.csv',index=False)