## This Notebook would be used to Fetch Twitter Data via Twitter API

> import libraries

In [1]:
import pandas as pd
import tweepy
from tweepy import OAuthHandler
from tweepy import API  
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
from collections import Counter
import os, sys
import csv

> Load dotenv to expose api keys to the application

In [2]:
from dotenv import load_dotenv
load_dotenv('../.env')

True

In [3]:
API_KEY="API_KEY"
API_SECRET_KEY="API_SECRET_KEY"
ACCESS_TOKEN="ACCESS_TOKEN"
ACCESS_TOKEN_SECRET="ACCESS_TOKEN_SECRET"
print(API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

API_KEY API_SECRET_KEY ACCESS_TOKEN ACCESS_TOKEN_SECRET


In [4]:
API_KEY = os.environ.get(API_KEY)
API_SECRET_KEY = os.getenv(API_SECRET_KEY)
ACCESS_TOKEN = os.getenv(ACCESS_TOKEN)
ACCESS_TOKEN_SECRET=os.getenv(ACCESS_TOKEN_SECRET)

In [5]:
auth = OAuthHandler(API_KEY, API_SECRET_KEY)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)
auth_api = API(auth)

> Testing Api

In [41]:
search_words = "airquality"
date_since="2020-03-03"  
# Collect tweets
tweets = tweepy.Cursor(api.search,
              q=search_words, tweet_mode='extended',
              lang="en", 
              since=date_since
                      ).items(2)
# Iterate and print tweets
for tweet in tweets:
    print(tweet.full_text)
   # print(tweet._json['full_text'])

RT @IQAir: Cold and flu season are almost here, and air pollution could prolong your illness.  Learn the facts about air quality and protec…
Keep a constant eye on your home’s air quality in this Pandemic with this Gadget.
#topfiveme #airqualitymeters #bestairqualitymeters #airquality #easybreathing #bettersleep #cleanair #coronavirus #COVID-19 #stayhomestaysafe
Source: https://t.co/wVqBcBDpkB https://t.co/TZ7CQQTpIM


In [11]:
tweets = Cursor(api.user_timeline, id='WestAfricaAQ',
               tweet_mode='extended',
              lang="en", pages=1).items(2)
for tweet in tweets:
    print(tweet.full_text) 

RT @AguGeohealth: Are you a #BlackGeoscientist (anywhere in the world!) who is interested in how our environment and Earth impacts human he…
RT @cleanaironea: 1/n
While this is preliminary, we have tried to firstly test our open source data mining tools plus compare current trend…


> TWITTER API ALL SET UP!

### Data Extraction

In [12]:
hashtags= ['#airquality ','#cleanair','#airpollution' ,'#pollution',
           '#hvac', '#airpurifier', '#indoorairquality','#health',
           '#covid', '#air', '#climatechange',' #indoorair',
           '#environment','#airconditioning', '#coronavirus', '#heating',
           '#mold', '#freshair', '#safety', '#ac', '#airfilter', '#allergies',
           '#hvacservice', '#ventilation','#wellness','#delhipollution',
           '#airconditioner','#airqualityindex','#bhfyp',
           'particulate matter', 'fine particulate matter','#pm2_5',
           '#emissions', '#natureishealing','#nature','#pollutionfree',
           '#wearethevirus']

accounts = ['@GhanaAQ','@asap_eastafrica', '@WestAfricaAQ']

In [13]:
class GetCursor():
    
    import tweepy
    from tweepy import OAuthHandler
    from tweepy import API  
    from tweepy import Cursor
    from dotenv import load_dotenv
    import os, sys

    
    def __init__(self,env_file=None):
        if env_file is None:
            self.env = load_dotenv('../.env')
        else:
            self.env = load_dotenv(env_file)
            
    
    def __repr__(self):
        
        return "Twitter API Auth Object"
            
    
    def get_auth(self):
        
        API_KEY="API_KEY"
        API_SECRET_KEY="API_SECRET_KEY"
        ACCESS_TOKEN="ACCESS_TOKEN"
        ACCESS_TOKEN_SECRET="ACCESS_TOKEN_SECRET"
        
        self.__API_KEY = os.environ.get(API_KEY)
        self.__API_SECRET_KEY = os.getenv(API_SECRET_KEY)
        self.__ACCESS_TOKEN = os.getenv(ACCESS_TOKEN)
        self.__ACCESS_TOKEN_SECRET=os.getenv(ACCESS_TOKEN_SECRET)
        
        try:
            self.__auth = OAuthHandler(API_KEY, API_SECRET_KEY)
            self.__auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
            self.api = API(auth, wait_on_rate_limit=True)
            self.auth_api = API(auth, retry_count=5,retry_delay=5,
                               timeout=60, 
                                wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
            
        except tweepy.TweepError as e:
            print(e.reason())
                    

In [84]:
class GetTweets(GetCursor):
    
    import tweepy
    from tweepy import Cursor
    from datetime import datetime, date, time, timedelta

    
    def __init__(self,env_file=None):
        super().__init__(env_file)
        self.get_auth()
        print('Authentication successful')

    def __repr__(self):
        return "Get tweets from Hashtags -> # & Users -> @"
    
    #helper functions
    def limit_handled(cursor):
        while True:
            try:
                yield cursor.next()
            except tweepy.RateLimitError:
                time.sleep(15 * 60) #default 15mins
                
    
    def check_is_bot(self, handle)-> bool:
        
        self.is_bot = False
        account_age_days = 0
        
        item = self.auth_api.get_user(handle)
        account_created_date = item.created_at
        delta = datetime.utcnow() - account_created_date
        account_age_days = delta.days
        if account_age_days < 180: #(6 months)
            is_bot=True
            
        return self.is_bot
        
            
    
    
    def get_handle_tweets(self, handles:list=[]):
        self.handles = handles
        if len(self.handles) > 0: 
            for handle in self.handles:
                print(f"collecting tweets for -> {handle}")
                users_tweets = {}
                print("Getting data for " + handle) 
                # this helps avoid Tweepy errors like suspended users or user not found errors
                try: 
                    item = self.auth_api.get_user(handle)
                except tweepy.TweepError as e:
                    print("found errors!!!")
                    continue
                    
                if self.check_is_bot(handle):
                    print('bot alert!!!')
                    continue
                else:
                    handle_props = Cursor(api.user_timeline, id='WestAfricaAQ',
                                                        tweet_mode='extended',
                                                        lang="en").items()
                    
                users_tweets[handle]=[]
                for prop in handle_props:
                    users_tweets[handle].append(prop.full_text)
            
        return handle_props
               
    
    def get_tag_tweets(self, tags:list=[], geocode=None, until_date="2020-03-30"):
        
        """
        until_date should be formatted as  YYYY-MM-DD
        """
        #if geocode is not None
        self.tags = tags 
        tags_tweets = {}
        for tag in self.tags:
            print(f"collecting tweets for -> {tag}")
            current_tag_tweets = tweepy.Cursor(api.search,
                                               q=tag, tweet_mode='extended',
                                               lang="en", since=date_since,
                                               #geocode=geocode,
                                              ).items(2)
            
            
            for tweet in current_tag_tweets:
               # print(tweet)
                #break
                tags_tweets[tag] = ({'twwet':tweet.full_text,'date':tweet._json['created_at']})
            
        return tags_tweets

    
def main():
    return "wip"
    
if __name__== main():
    pass

In [85]:
get_tweet= GetTweets()

Authentication successful


In [86]:
trial = ['#airquality']#,'#cleanair','#airpollution' ,'#pollution',
           #'#hvac', '#airpurifier']

In [87]:
accounts = ['@GhanaAQ']#,'@asap_eastafrica', '@WestAfricaAQ']created_at

In [88]:
trial_tags  = get_tweet.get_tag_tweets(trial) 

collecting tweets for -> #airquality


In [89]:
trial_tags

{'#airquality': [{'twwet': "RT @BoulderCAST: It has been quite the ride today for air quality due to smoke. Looks like we've turned a corner though (for now)... #COwx…",
   'date': 'Wed Sep 30 21:29:37 +0000 2020'},
  {'twwet': "It has been quite the ride today for air quality due to smoke. Looks like we've turned a corner though (for now)... #COwx #Smoke #AirQuality #Boulderwx\n\nBoulder's latest AQI: https://t.co/5TRScpdzWg https://t.co/4k1NUN8x0O",
   'date': 'Wed Sep 30 21:28:11 +0000 2020'}]}