# Initializing Tweepy API Instance

In [None]:
import os
import tweepy

CONSUMER_KEY = os.environ.get("CONSUMER_KEY")
CONSUMER_SECRET = os.environ.get("CONSUMER_SECRET")
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
ACCESS_TOKEN_SECRET = os.environ.get("ACCESS_TOKEN_SECRET")

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=False)

In [None]:
# 0
Karachi= [
  [25.021844, 67.290971, 22],
  [25.432985, 67.306322, 9],
  [25.395774, 67.295336, 10],
  [25.311384, 67.292589, 12],
  [25.209543, 67.273363, 17],
  [25.142429, 67.300829, 25],
  [25.003414, 67.057641, 7],
  [24.903400, 66.812640, 5],
  [24.859432, 67.051105, 25],
  [24.923171, 67.381485, 15],
]
# 0
Lahore = [
    [31.558596, 74.376605, 14],
    [31.423887, 74.335050, 14],
    [31.302613, 74.201341, 3],
    [31.376580, 74.197494, 6]
]

# 2
Islamabad = [
  [33.663429, 72.901708, 8],
  [33.596235, 72.867393, 5],
  [33.709498, 73.074380, 5.5],
  [33.674092, 73.048638, 2.5],
  [33.690704, 73.199413, 10],
  [33.639984, 73.219902, 9],
  [33.512778794168945,73.19197468523352, 5.3025],
  [33.548753, 73.230600, 5],
  [33.685959, 72.981599, 7.5],
  [33.51963793680349,73.12640712868851,2.3632914299404906]
]
#2
Quetta = [
    [30.080933, 66.960185, 4.5],
    [30.163010, 66.987202, 4.3],
    [30.204778, 67.002995, 5.5],
    [30.227309, 67.055644, 3.8],
    [30.238115, 67.100313, 3.3],
    [30.238997, 67.012762, 7.6],
    [30.267661, 66.931336, 4],
    [30.314606, 66.926487, 2.5],
    [30.271188, 66.915511, 3],
    [30.120738695676636,66.97116785648417,3.242853615283123],
]
#0
Peshawar = [
    [34.007387, 71.528380, 11],
    [33.930931, 71.507817, 8],
    [34.018612, 71.674968, 7.6],
    [34.074802, 71.614244, 3.2],
    [34.000716, 71.386124, 1.3],
    [34.008504, 71.408680, 1.35],
]

cities_point_radius = {
    "Quetta":Quetta,
    "Peshawar":Peshawar,
    "Islamabad":Islamabad,
    "Karachi": Karachi,
    "Lahore":Lahore
}


#point: an array like [30.080933, 66.960185, 4.5]
def createGeocode(point):
    return f'{point[0]},{point[1]},{point[2]}km'

## Tweet Class

In [None]:
class Tweet:
    def __init__(self, tweet_id, text, tags, created_at):
        self.tweet_id = tweet_id
        self.text = text
        self.tags = tags
        self.created_at = created_at
        
    def __str__(self):
        return f'Tweet ID:{self.tweet_id}  Tags:{self.tags}  Created At:{self.created_at}  Text:{self.text}'        

## Methods for processing a status object

In [None]:
import pandas as pd
from datetime import datetime

def processStatus(tweetID)->Tweet:
    status = api.get_status(tweetID, tweet_mode="extended")
    
    text = extractFullText(status)
    tags = extractTags(status)
    created_at = extractDate(status)
    
    tweet = Tweet(tweet_id=tweetID, text=text, tags=tags, created_at=created_at)
    
    return tweet
    

def extractFullText(status)->str:
    try:
        return status.retweeted_status.full_text
    except AttributeError:  # Not a Retweet
        return status.full_text
    
def extractTags(status)->str:
    tags = status._json['entities']['hashtags']
    tagsString = ""
    
    for tag in tags:
        tagsString += f"{tag['text']} "
        
    return tagsString.strip()
    

def extractDate(status)->str:
    createdAt = status._json['created_at']
    return createdAt

## Method to save tweets to csv

In [None]:
import pandas as pd

def createDataFrame(tweets:list[Tweet]):
    data = {
            "ID" : [tweet.tweet_id for tweet in tweets],
            "Tags": [tweet.tags for tweet in tweets],
            "Created At": [tweet.created_at for tweet in tweets],
            "Text": [tweet.text for tweet in tweets]
    }
    
    return pd.DataFrame(data)

def saveDfToCSV(df, ofDate:str, ofCity:str):
    filename = f"{ofDate}_{ofCity}.csv"

    path = os.path.join('Data',ofCity, filename)
    df.to_csv(path, index=False)

    print(f'SAVED! Total Tweets: {df.shape[0]} from: {ofCity}  On: {ofDate}')
    

    
def saveTweets(tweets:list[Tweet], ofDate:str, ofCity:str):
    df = createDataFrame(tweets)
        
    saveDfToCSV(df, ofDate, ofCity)       
    
    
def doesDataExists(cityName, date):
    p = os.path.join('Data', cityName, f'{date}_{cityName}.csv')
    return os.path.exists(p)

## Methods to collect and store tweets from cities

In [None]:
from datetime import timedelta
import os

def nextDay(date):
    return date + timedelta(days=1)
    
def toYMD(date):
    return date.strftime('%Y-%m-%d')

In [None]:

import time

def collectTweets(fromDate:datetime.date, toDate:datetime.date):
    #currentDate is the date for which we are collecting tweets
    currentDate = fromDate
    while(currentDate <= toDate):
        
        collectTweetsOfDate(currentDate)
        currentDate = nextDay(currentDate)        
        

def collectTweetsOfDate(date:datetime.date):
    print(f"\n\n====================={toYMD(date)}=====================\n")
    
    
    for cityName in cities_point_radius:
        if doesDataExists(cityName, toYMD(date)): 
            print(f"\n    City: {cityName} Skipped")
            continue
                    
        tweets = getTweetsOfCity(date, cityName)
        saveTweets(tweets, date, cityName)
    
    
    print(f"=========================================================")
    


def getTweetsOfCity(date:datetime.date, cityName:str):
    print(f"\n    City: {cityName}")
    
    
    tweets = [] # tweets of city:cityName at date:date
    
    cityPointRadii = cities_point_radius[cityName] # points of city:cityName
    
    for i,point in enumerate(cityPointRadii):
            
        geocode = createGeocode(point)
        
        geotweets = getTweetsOfGeocode(tweets, date, geocode, i)
        tweets.extend(geotweets)
        
    return tweets
        
    
def getTweetsOfGeocode(cityTweets, date:datetime.date, geocode, i):
    print(f"            Gecode: {i}",end='\r')
    
    tweets = [] # tweets of geocode:geocode at date:date

    keywords = "covid OR virus OR lockdown"
    until    = toYMD(nextDay(date))
    
    success = False

    while not success:
        try:
            max_id   = (None if len(tweets)==0 else tweets[-1].tweet_id)

            for status in tweepy.Cursor(api.search, q=keywords, geocode=geocode,lang="en",until=until,max_id=max_id).items():

                if status.created_at.date() != date: 
                    print("break\n")
                    break
                elif any(item.tweet_id==status.id for item in cityTweets): 
                    print("continue\n")
                    continue

                print(f'            Gecode: {i} Count:{len(tweets)}', end='\r')
                tweet = processStatus(status.id)
                tweets.append(tweet)
                
            print()
            success = True
                
        except Exception as e:
            print(e)
            pass
        
    return tweets

In [None]:
import datetime
# fromDate = datetime.date(2021,4,16)
# toDate = datetime.date(2021,4,21)

fromDate = datetime.datetime.now().date() - timedelta(days=8)
toDate = datetime.datetime.now().date() - timedelta(days=1)

collectTweets(fromDate = fromDate, toDate = toDate)


continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue

continue


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=e54c0725-da7e-42c7-b0f5-b4641b95089b' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>