## This Notebook would be used to Fetch Twitter Data via Twitter API

> import libraries

In [3]:
import pandas as pd
import tweepy
from tweepy import OAuthHandler
from tweepy import API  
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
from collections import Counter
import os, sys
import csv

> Load dotenv to expose api keys to the application

In [4]:
from dotenv import load_dotenv
load_dotenv('../.env')

True

In [5]:
API_KEY="API_KEY"
API_SECRET_KEY="API_SECRET_KEY"
ACCESS_TOKEN="ACCESS_TOKEN"
ACCESS_TOKEN_SECRET="ACCESS_TOKEN_SECRET"
print(API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

API_KEY API_SECRET_KEY ACCESS_TOKEN ACCESS_TOKEN_SECRET


In [6]:
API_KEY = os.environ.get(API_KEY)
API_SECRET_KEY = os.getenv(API_SECRET_KEY)
ACCESS_TOKEN = os.getenv(ACCESS_TOKEN)
ACCESS_TOKEN_SECRET=os.getenv(ACCESS_TOKEN_SECRET)

In [7]:
auth = OAuthHandler(API_KEY, API_SECRET_KEY)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)
auth_api = API(auth)

> Testing Api

In [88]:
search_words = "airquality"
date_since="2020-03-03"  
# Collect tweets
tweets = tweepy.Cursor(api.search,
              q=search_words, tweet_mode='extended',
              lang="en", 
              since=date_since).items(2)
# Iterate and print tweets
for tweet in tweets:
    print(tweet.full_text)
   # print(tweet._json['full_text'])

RT @ProfJonFairburn: 8th October is Clean Air Day - I will be one of the speakers at 12.30 onwards with community advocates, charities and…
https://t.co/lGDmdnpoue

What a great article.  This is why we LOVE what we do, brining plants to cities to support #pollinators #biodiversity, #airquality #forage #health


In [113]:
# tweets = Cursor(api.user_timeline, id='WestAfricaAQ',
#                tweet_mode='extended',
#               lang="en", pages=1).items()
# for tweet in tweets:
#     print(tweet.full_text) 

> TWITTER API ALL SET UP!

### Data Extraction

In [2]:
hashtags= ['#airquality ','#cleanair','#airpollution' ,'#pollution',
           '#hvac', '#airpurifier', '#indoorairquality','#health',
           '#covid', '#air', '#climatechange',' #indoorair',
           '#environment','#airconditioning', '#coronavirus', '#heating',
           '#mold', '#freshair', '#safety', '#ac', '#airfilter', '#allergies',
           '#hvacservice', '#ventilation','#wellness','#delhipollution',
           '#airconditioner','#airqualityindex','#bhfyp',
           'particulate matter', 'fine particulate matter','#pm2_5',
           '#emissions', '#natureishealing','#nature','#pollutionfree',
           '#wearethevirus']

accounts = ['@GhanaAQ','@asap_eastafrica', '@WestAfricaAQ']

In [31]:
class GetCursor():
    
    import tweepy
    from tweepy import OAuthHandler
    from tweepy import API  
    from tweepy import Cursor
    from dotenv import load_dotenv
    import os, sys

    
    def __init__(self,env_file=None):
        if env_file is None:
            self.env = load_dotenv('../.env')
        else:
            self.env = load_dotenv(env_file)
            
    
    def __repr__(self):
        
        return "Twitter API Auth Object"
            
    
    def get_auth(self):
        
        API_KEY="API_KEY"
        API_SECRET_KEY="API_SECRET_KEY"
        ACCESS_TOKEN="ACCESS_TOKEN"
        ACCESS_TOKEN_SECRET="ACCESS_TOKEN_SECRET"
        
        self.__API_KEY = os.environ.get(API_KEY)
        self.__API_SECRET_KEY = os.getenv(API_SECRET_KEY)
        self.__ACCESS_TOKEN = os.getenv(ACCESS_TOKEN)
        self.__ACCESS_TOKEN_SECRET=os.getenv(ACCESS_TOKEN_SECRET)
        
        try:
            self.__auth = OAuthHandler(API_KEY, API_SECRET_KEY)
            self.__auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
            self.api = API(auth, wait_on_rate_limit=True)
            self.auth_api = API(auth, retry_count=5,retry_delay=5,
                               timeout=60, 
                                wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
            
        except tweepy.TweepError as e:
            print(e.reason())
                    

In [115]:
class GetTweets(GetCursor):
    
    import tweepy
    from tweepy import Cursor
    from datetime import datetime, date, time, timedelta

    
    def __init__(self,env_file=None):
        super().__init__(env_file)
        self.get_auth()
        print('Authentication successful')

    def __repr__(self):
        return "Get tweets from Hashtags -> # & Users -> @"
    
    #helper functions
    def limit_handled(cursor):
        while True:
            try:
                yield cursor.next()
            except tweepy.RateLimitError:
                time.sleep(15 * 60) #default 15mins
                
    
    def check_is_bot(self, handle)-> bool:
        
        self.is_bot = False
        account_age_days = 0
        
        item = self.auth_api.get_user(handle)
        account_created_date = item.created_at
        delta = datetime.utcnow() - account_created_date
        account_age_days = delta.days
        if account_age_days < 180: #(6 months)
            is_bot=True
            
        return self.is_bot
        
            
    
    
    def get_handle_tweets(self, handles:list=[]):
        self.handles = handles
        if len(self.handles) > 0: 
            for handle in self.handles:
                print(f"collecting tweets for -> {handle}")
                user = []
                print("Getting data for " + handle) 
                # this helps avoid Tweepy errors like suspended users or user not found errors
                try: 
                    item = self.auth_api.get_user(handle)
                except tweepy.TweepError as e:
                    continue
                    
                if self.check_is_bot(handle):
                    continue
                else:
                    handle_props = limit_handled(
                        Cursor(api.user_timeline, id='WestAfricaAQ',
                                                        tweet_mode='extended',
                                                        lang="en").items())
                    
                user[handle]=[]
                for prop in handle_props:
                    handle_props.append(prop.full_text)
            
        return handle_props
               
    
    def get_tag_tweets(self, tags:list=[], geocode=None, until_date=None):
        
        """
        until_date should be formatted as  YYYY-MM-DD
        """
        #if geocode is not None
        self.tags = tags 
        collected_tag_tweets = {}
        for tag in self.tags:
            print(f"collecting tweets for -> {tag}")
            current_tag_tweets = limit_handled(Cursor(api.search, q=tag, 
                                                      geocode=geocode,
                                                      until=until_date,
                                                      truncated=False,lang="en",
                                                      tweet_mode='extended',
                                                      since=date_since).items())#.items(20))
            
            collected_tag_tweets[tag]=[]
            for tweet in current_tag_tweets:
                collected_tags_tweets.append(tweet.full_text)
            
        return collected_tags_tweets
    
    
    def get_geocode_tag_tweets(self, geocode):
        pass
    
    
def main():
    return "wip"
    
if __name__== main():
    pass