In [10]:
import twitter
import os
from dotenv import load_dotenv
load_dotenv()

# Authentication
API_KEY = os.getenv("TWITTER_API_KEY")
API_SECRET = os.getenv("TWITTER_API_SECRET")

auth = twitter.oauth.OAuth("", "", API_KEY, API_SECRET)

twitter_api = twitter.Twitter(auth=auth)

In [35]:
# get tweets related to covid19 in uganda using the search query "COVID19UG"
ug_covid_tweets = twitter_api.search.tweets(q="COVID19UG")

In [51]:
import json

# pretty print the json returned above. uncomment this to view the tweets
# print(json.dumps(ug_covid_tweets, indent=2))

In [44]:
# ministry of health tweets
min_of_health_tweets = twitter_api.search.tweets(q="from:MinofHealthUG")

In [58]:
statuses = min_of_health_tweets['statuses'] + ug_covid_tweets['statuses']

print("Length of statuses", len(statuses))
print("Sample tweet: ", json.dumps(statuses[2], indent=2))

Length of statuses 30
Sample tweet:  {
  "created_at": "Tue May 12 04:18:26 +0000 2020",
  "id": 1260061749090422792,
  "id_str": "1260061749090422792",
  "text": "RT @MorikuJoyce: My visit at @BuliisaDistrict Hospital.\nHandwashing remains the No.1 tip for preventing the spread of Coronavirus (COVID-19\u2026",
  "truncated": false,
  "entities": {
    "hashtags": [],
    "symbols": [],
    "user_mentions": [
      {
        "screen_name": "MorikuJoyce",
        "name": "Dr. Joyce Moriku Kaducu",
        "id": 1089092515893051392,
        "id_str": "1089092515893051392",
        "indices": [
          3,
          15
        ]
      },
      {
        "screen_name": "BuliisaDistrict",
        "name": "Buliisa district",
        "id": 3377196910,
        "id_str": "3377196910",
        "indices": [
          29,
          45
        ]
      }
    ],
    "urls": []
  },
  "metadata": {
    "iso_language_code": "en",
    "result_type": "recent"
  },
  "source": "<a href=\"http://twitter.c

**Fields of a tweet/status**:
- `created_at` : date and time on which the tweet was created
- `id` and `id_str`: id of the tweet as an int and a string respectively
- `text`: the text of the tweet
- `entities`: an object containing some aspects of the tweet such as hashtags, symbols, user mentions, urls
- `metadata`: an object containing the language of the tweet and the result_type
- `source`: device from which the tweet came
- `in_reply_to_status_id`, `in_reply_to_status_id_str`, `in_reply_to_user_id`, `in_reply_to_user_id_str` and `in_reply_to_screen_name`: If the tweet is a reply, these fields contain the status id and user id of the original tweet.
- `user`: object which contains info about the user that tweeted this
- `retweeted_status`: if this tweet has been retweeted, this field provides significant detail about the original tweet and the author
- `geo`, `coordinates` and `place`: contains geographic information about the location of the tweet
- `retweet_count` and `favorite_count`: how many times a tweet has been retweeted and favorited respectively. Helps to tell how popular a tweet is.
- `lang`: the language of the tweet.

Refer to the [documentation of a tweet object](https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object) for more info.

In [59]:
tweet = statuses[3]

# View the top-level fields for the tweet
tweet.keys()

dict_keys(['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'lang'])

In [60]:
# entities in a tweet
tweet['entities']

{'hashtags': [],
 'symbols': [],
 'user_mentions': [{'screen_name': 'MorikuJoyce',
   'name': 'Dr. Joyce Moriku Kaducu',
   'id': 1089092515893051392,
   'id_str': '1089092515893051392',
   'indices': [3, 15]}],
 'urls': []}

#### Some simple analysis (to view how to retrieve different entities in a tweet)

In [71]:
def get_hashtags():
    return [hashtag['text'] for status in statuses for hashtag in status['entities']['hashtags']]

def get_words():
    # texts in all the tweets in the data above
    status_texts = [status['text'] for status in statuses]
    
    return [word for text in status_texts for word in text.split()]

# hashtags in the data above
hash_tags = get_hashtags()

# all words from all tweets
words = get_words()

print("Sample tweet text:\n", status_texts[0:5], "\n\n")
print("Sample hash tags:\n", hash_tags[0:5], "\n\n")
print("Sample words:\n", words[0:5])

Sample tweet text:
 ['Today is International Nurses Day held under the theme “A voice to lead, Nursing the world to health” \nNurses signi… https://t.co/JttTijlX8d', '“Hepatitis B Testing, Vaccination and Treatment Services have NOT been affected by lockdown. These services are FRE… https://t.co/SOjii5biZ3', 'RT @MorikuJoyce: My visit at @BuliisaDistrict Hospital.\nHandwashing remains the No.1 tip for preventing the spread of Coronavirus (COVID-19…', 'RT @MorikuJoyce: Paid a courtesy call to Soroti Regional Referral Hospital. I appreciate the hard work and diligence of medical and non-med…', '🔹 Total samples tested today at @UVRIug : 2,854\n#STAYSAFEUG'] 


Sample hash tags:
 ['STAYSAFEUG', 'SRHR', 'SafeAbortion', 'COVID19UG', 'COVID19UG'] 


Sample words:
 ['Today', 'is', 'International', 'Nurses', 'Day']


In [72]:
# frequency analysis: print the 10 most common words and hashtags in the data we have above
from collections import Counter
from prettytable import PrettyTable

def print_frequency_table(label, data):
    pt = PrettyTable(field_names=[label, 'Count'])
    c = Counter(data)
    [pt.add_row(kv) for kv in c.most_common()[:10]]
    pt.align[label], pt.align['Count'] = 'l', 'r' # left align label column, right align Count column
    print(pt)

print_frequency_table('Word', words)

print_frequency_table('Hashtag', hash_tags)

+------------+-------+
| Word       | Count |
+------------+-------+
| to         |    76 |
| the        |    71 |
| of         |    61 |
| RT         |    59 |
| #COVID19UG |    50 |
| a          |    30 |
| have       |    25 |
| total      |    23 |
| national   |    23 |
| task       |    23 |
+------------+-------+
+------------------------+-------+
| Hashtag                | Count |
+------------------------+-------+
| COVID19UG              |    50 |
| StayHome               |     5 |
| KeepSafe               |     5 |
| COVID19                |     5 |
| HIV                    |     5 |
| Aids                   |     5 |
| socialprotection4Covid |     5 |
| STAYSAFEUG             |     1 |
| SRHR                   |     1 |
| SafeAbortion           |     1 |
+------------------------+-------+


**Note**: Each request to the twitter api returns only 15 tweets, we can continue getting more tweets by using `next_results` field in the api response.

In [68]:
from urllib.parse import unquote

# Get 5 more batches of results and append them to the statuses list
for _ in range(5):
    print('Length of statuses:', len(statuses))
    try:
        next_results = ug_covid_tweets['search_metadata']['next_results']
    except KeyError as e: # no more results when next_results doesn't exist
        break
    kwargs = dict([kv.split('=') for kv in unquote(next_results[1:]).split("&")])
    search_results = twitter_api.search.tweets(**kwargs)
    statuses += search_results['statuses']

Length of statuses: 30
Length of statuses: 45
Length of statuses: 60
Length of statuses: 75
Length of statuses: 90


In [70]:
print("Length of statuses: ", len(statuses))

Length of statuses:  105


In [73]:
# Find out if there are tweets with location data
num_of_tweets_with_loc_data = 0
for status in statuses:
    if status['geo'] is not None or status['coordinates'] or status['place'] is not None:
        num_of_tweets_with_loc_data += 1
print(num_of_tweets_with_loc_data)

0


In [74]:
my_tweets = twitter_api.search.tweets(q="from:IsaacOwomugisha")

In [75]:
len(my_tweets['statuses'])

5

In [84]:
def find_tweets_with_location_data(tweets):
    _num = 0
    loc_tweets = []
    for status in tweets:
        if status['geo'] is not None or status['coordinates'] or status['place'] is not None:
            _num += 1
            loc_tweets.append(status)
    return _num, loc_tweets

find_tweets_with_location_data(my_tweets['statuses'])

(0, [])

In [85]:
kampala_tweets = twitter_api.search.tweets(q="Kampala")
print(len(kampala_tweets['statuses']))
find_tweets_with_location_data(kampala_tweets['statuses'])

15


(1,
 [{'created_at': 'Wed May 13 08:52:50 +0000 2020',
   'id': 1260493190718803974,
   'id_str': '1260493190718803974',
   'text': 'Looking forward to the Weekend #BellJamz #Enjoyments #NTVMixshow @ntvuganda @bell_lager @ Kampala, Uganda https://t.co/mLwWB6rYZO',
   'truncated': False,
   'entities': {'hashtags': [{'text': 'BellJamz', 'indices': [31, 40]},
     {'text': 'Enjoyments', 'indices': [41, 52]},
     {'text': 'NTVMixshow', 'indices': [53, 64]}],
    'symbols': [],
    'user_mentions': [{'screen_name': 'ntvuganda',
      'name': 'NTV UGANDA',
      'id': 109220563,
      'id_str': '109220563',
      'indices': [65, 75]},
     {'screen_name': 'Bell_Lager',
      'name': 'Bell Lager 🍻',
      'id': 378450709,
      'id_str': '378450709',
      'indices': [76, 87]}],
    'urls': [{'url': 'https://t.co/mLwWB6rYZO',
      'expanded_url': 'https://www.instagram.com/p/CAHzzJQnXss/?igshid=u74eh4kypn6h',
      'display_url': 'instagram.com/p/CAHzzJQnXss/…',
      'indices': [106, 129]