## This Notebook would be used to Fetch Twitter Data via Twitter API

> import libraries

In [1]:
import pandas as pd
import tweepy
from tweepy import OAuthHandler
from tweepy import API  
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
from collections import Counter
import os, sys
import csv

> Load dotenv to expose api keys to the application

In [2]:
from dotenv import load_dotenv
load_dotenv('../.env')

True

In [3]:
API_KEY="API_KEY"
API_SECRET_KEY="API_SECRET_KEY"
ACCESS_TOKEN="ACCESS_TOKEN"
ACCESS_TOKEN_SECRET="ACCESS_TOKEN_SECRET"
print(API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

API_KEY API_SECRET_KEY ACCESS_TOKEN ACCESS_TOKEN_SECRET


In [4]:
API_KEY = os.environ.get(API_KEY)
API_SECRET_KEY = os.getenv(API_SECRET_KEY)
ACCESS_TOKEN = os.getenv(ACCESS_TOKEN)
ACCESS_TOKEN_SECRET=os.getenv(ACCESS_TOKEN_SECRET)

In [5]:
auth = OAuthHandler(API_KEY, API_SECRET_KEY)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)
auth_api = API(auth)

> Testing Api

In [41]:
search_words = "airquality"
date_since="2020-03-03"  
# Collect tweets
tweets = tweepy.Cursor(api.search,
              q=search_words, tweet_mode='extended',
              lang="en", 
              since=date_since
                      ).items(2)
# Iterate and print tweets
for tweet in tweets:
    print(tweet.full_text)
   # print(tweet._json['full_text'])

RT @IQAir: Cold and flu season are almost here, and air pollution could prolong your illness.  Learn the facts about air quality and protec…
Keep a constant eye on your home’s air quality in this Pandemic with this Gadget.
#topfiveme #airqualitymeters #bestairqualitymeters #airquality #easybreathing #bettersleep #cleanair #coronavirus #COVID-19 #stayhomestaysafe
Source: https://t.co/wVqBcBDpkB https://t.co/TZ7CQQTpIM


In [162]:
tweets = Cursor(api.user_timeline, id='WestAfricaAQ',
               tweet_mode='extended',
              lang="en", count=10).items(2)
for tweet in tweets:
    print(tweet.full_text) 

RT @AguGeohealth: Are you a #BlackGeoscientist (anywhere in the world!) who is interested in how our environment and Earth impacts human he…
RT @cleanaironea: 1/n
While this is preliminary, we have tried to firstly test our open source data mining tools plus compare current trend…


> TWITTER API ALL SET UP!

### Data Extraction

In [149]:
hashtags= ['#airquality ','#cleanair','#airpollution' ,'#pollution',
           '#hvac', '#airpurifier', '#indoorairquality','#health',
           '#covid', '#air', '#climatechange',' #indoorair',
           '#environment','#airconditioning', '#coronavirus', '#heating',
           '#mold', '#freshair', '#safety', '#ac', '#airfilter', '#allergies',
           '#hvacservice', '#ventilation','#wellness','#delhipollution',
           '#airconditioner','#airqualityindex','#bhfyp',
           'particulate matter', 'fine particulate matter','#pm2_5',
           '#emissions', '#natureishealing','#nature','#pollutionfree',
           '#wearethevirus']

accounts = ['@GhanaAQ','@asap_eastafrica', '@WestAfricaAQ']


geocodes = {'lagos':(6.48937,3.37709),'cape_town':(-33.99268,18.46654),
            'joburg' : (-26.22081,28.03239),
            'accra' : (5.58445,-0.20514),
            'nairobi' : (-1.27467,36.81178),
            'mombasa' : (-4.04549,39.66644),
            'kigali' : (-1.95360,30.09186),
            'kampala' : (0.32400,32.58662)}

In [127]:
geocodes['lagos']+(65,)

(6.48937, 3.37709, 65)

In [13]:
class GetCursor():
    
    import tweepy
    from tweepy import OAuthHandler
    from tweepy import API  
    from tweepy import Cursor
    from dotenv import load_dotenv
    import os, sys

    
    def __init__(self,env_file=None):
        if env_file is None:
            self.env = load_dotenv('../.env')
        else:
            self.env = load_dotenv(env_file)
            
    
    def __repr__(self):
        
        return "Twitter API Auth Object"
            
    
    def get_auth(self):
        
        API_KEY="API_KEY"
        API_SECRET_KEY="API_SECRET_KEY"
        ACCESS_TOKEN="ACCESS_TOKEN"
        ACCESS_TOKEN_SECRET="ACCESS_TOKEN_SECRET"
        
        self.__API_KEY = os.environ.get(API_KEY)
        self.__API_SECRET_KEY = os.getenv(API_SECRET_KEY)
        self.__ACCESS_TOKEN = os.getenv(ACCESS_TOKEN)
        self.__ACCESS_TOKEN_SECRET=os.getenv(ACCESS_TOKEN_SECRET)
        
        try:
            self.__auth = OAuthHandler(API_KEY, API_SECRET_KEY)
            self.__auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
            self.api = API(auth, wait_on_rate_limit=True)
            self.auth_api = API(auth, retry_count=5,retry_delay=5,
                               timeout=60, 
                                wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
            
        except tweepy.TweepError as e:
            print(e.reason())
                    

In [171]:
class GetTweets(GetCursor):
    
    import tweepy
    from tweepy import Cursor
    from datetime import datetime, date, time, timedelta

    
    def __init__(self,env_file=None):
        super().__init__(env_file)
        self.get_auth()
        print('Authentication successful')

    def __repr__(self):
        return "Get tweets from Hashtags -> # & Users -> @"
    
    #helper functions
    def limit_handled(cursor):
        while True:
            try:
                yield cursor.next()
            except tweepy.RateLimitError:
                time.sleep(15 * 60) #default 15mins
                
    
    def check_is_bot(self, handle)-> bool:
        
        self.is_bot = False
        account_age_days = 0
        
        item = self.auth_api.get_user(handle)
        account_created_date = item.created_at
        delta = datetime.utcnow() - account_created_date
        account_age_days = delta.days
        if account_age_days < 180: #(6 months)
            is_bot=True
            
        return self.is_bot
        
            
    
    
    def get_handle_tweets(self, handles:list=[], items_count=20):
        self.handles = handles
        
        if len(self.handles) > 0: 
            for handle in self.handles:
                print(f"collecting tweets of -> {handle}")
                users_tweets = {}
                # this helps avoid Tweepy errors like suspended users or user not found errors
                try: 
                    item = self.auth_api.get_user(handle)
                except tweepy.TweepError as e:
                    print("found errors!!!")
                    continue
                    
                #check if handle is a potential bot    
                if self.check_is_bot(handle):
                    print('bot alert!!!, skipping the bad guy :(')
                    continue
                else:
                    current_handle_tweets = Cursor(api.user_timeline, id=handle,
                                                        tweet_mode='extended',
                                                        lang="en").items(items_count)
                    
                for tweet in current_handle_tweets:
                    users_tweets[handle] = ({'tweet_text':tweet.full_text,'tweet_date':tweet._json['created_at']})
            
            
        return users_tweets
               
    
    def get_tag_tweets(self, tags:list=[], geocode:tuple=None,
                       radius:int=None,
                       until_date:str="2020-03-30", no_of_items=10):
        
        """
        until_date should be formatted as  YYYY-MM-DD
        
        geocode should be used 
        """
        #if geocode is not None
        self.tags = tags 
        tags_tweets = {}
        for tag in self.tags:
            print(f"collecting tweets of -> {tag}")
            if radius is not None and geocode is not None:
                geocode = geocode+(radius,) 
            current_tag_tweets = tweepy.Cursor(api.search,
                                               q=tag, tweet_mode='extended',
                                               lang="en", 
                                               since=until_date,
                                               geocode=geocode,
                                              ).items(no_of_items)
            
            
            for tweet in current_tag_tweets:
                tags_tweets[tag] = ({'tweet_text':tweet.full_text,'tweet_date':tweet._json['created_at']})
            
        return tags_tweets

    
def main():
    return "wip"
    
if __name__== main():
    pass

In [172]:
get_tweet= GetTweets()

Authentication successful


In [173]:
trial_tags = ['#airquality']#,'#cleanair','#airpollution' ,'#pollution',
           #'#hvac', '#airpurifier']

In [174]:
trial_accounts = ['@GhanaAQ']#,'@asap_eastafrica', '@WestAfricaAQ']created_at

>> test for tags

In [175]:
trial_tags_result  = get_tweet.get_tag_tweets(trial_tags) 

collecting tweets of -> #airquality


In [176]:
trial_tags_result

{'#airquality': {'tweet_text': 'RT @OregonDEQ: Eng: Today, OR DEQ issued an #airquality advisory for South, South Central &amp; parts of Central OR due to smoke from #wildfire…',
  'tweet_date': 'Wed Sep 30 23:29:26 +0000 2020'}}

In [177]:
trial_tags_result['#airquality']['tweet_date']

'Wed Sep 30 23:29:26 +0000 2020'

>> test for accounts 

In [178]:
trial_account_results = get_tweet.get_handle_tweets(trial_accounts)

collecting tweets of -> @GhanaAQ


In [179]:
trial_account_results

{'@GhanaAQ': {'tweet_text': 'RT @subu_caps: Nice video of @albertpresto talking about the mobile monitoring and RAMP network in the Pittsburgh area. (Also featuring @Sr…',
  'tweet_date': 'Tue Jan 14 19:47:41 +0000 2020'}}