In [1]:
import csv
from functools import wraps
import os
import os.path
import re
import time

import tweepy

In [2]:
twitter_consumer = os.environ["TWITTER_CONSUMER"]
twitter_consumer_secret = os.environ["TWITTER_CONSUMER_SECRET"]
twitter_token = os.environ["TWITTER_TOKEN"]
twitter_token_secret = os.environ["TWITTER_TOKEN_SECRET"]

auth = tweepy.OAuthHandler(twitter_consumer, twitter_consumer_secret)
auth.set_access_token(twitter_token, twitter_token_secret)
api = tweepy.API(auth)


query = "(infosec OR cve OR cybersec OR cybersecurity OR ransomware)"
twitter_filter = "-filter:retweets"
mininum_favorites = "min_faves:10"

pattern = re.compile(r"@\w+", re.UNICODE | re.MULTILINE)


with  open("tweets.csv", "w", newline="") as twitter_file:
    csv_writer = csv.writer(twitter_file, quoting=csv.QUOTE_ALL)
    header = ["screen_name", "content", "mentions"]
    csv_writer.writerow(header)
    # fetch 100 pages with 100 tweets per page.
    for public_tweets in tweepy.Cursor(api.search, q=f"{query} {twitter_filter} {mininum_favorites}", count=100).pages(100):
        for tweet in public_tweets:
            mentions = pattern.findall(tweet.text)
            csv_writer.writerow([f"@{tweet.user.screen_name}", tweet.text.replace("\n", "\\n"), "|".join(mentions)])

In [3]:
def retry(func=None, wait=900):
    """retry retries the function after the wait period on a RateLimitError.
    
    All other errors are raised."""
    def decorator_retry(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            while True:
                try:
                    return func(*args, **kwargs)
                except tweepy.RateLimitError:
                    print(f"sleeping for {wait/60}min")
                    time.sleep(wait)
                except Exception:
                    raise
        return wrapper


    if func is not None:
        return decorator_retry(func)

    return decorator_retry

In [None]:
def log(msg, filename):
    """log logs the message to the given filename.
    
    It will append the message to an existing file."""
    with open(filename, "a") as f:
        f.write(msg)

In [4]:
names = set()
with open("tweets.csv", newline="") as twitter_file:
    csv_reader = csv.DictReader(twitter_file)
    for row in csv_reader:
        names.add(row["screen_name"])
        for mention in row["mentions"].split("|"):
            names.add(mention)

# remove empty screen name
names.remove("")

In [5]:
friend_list = {}

In [6]:
@retry
def friends_ids(name):
    return {name: api.friends_ids(name, count=5000)}

for name in names:
    # if the name is already in the list, we continue
    if name in friend_list:
        continue
    
    try:
        val = friends_ids(name)
    except Exception as e:
        log(str(e), "friends_ids.log")
    else:
        friend_list.update(val)

, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'message': 'Sorry, that page does not exist.'}]
[{'code': 34, 'messa

KeyboardInterrupt: 

In [7]:
with open("user_and_friends_ids.csv", "w", newline="") as twitter_file:
    header = ["screen_name", "friends_ids"]
    csv_writer = csv.writer(twitter_file, quoting=csv.QUOTE_ALL)
    csv_writer.writerow(header)
    for screen_name, friends_ids in friend_list.items():
        # discard twitter profiles with over 5000 friends - no one can have that many friends!
        if len(friends_ids) == 5000:
            continue
        csv_writer.writerow([screen_name, "|".join(str(id_) for id_ in friends_ids)])

In [8]:
unique_friend_ids = list(set(id_ for ids in friend_list.values() for id_ in ids))

In [None]:
screen_names = {}

In [10]:
@retry
def lookup_users(ids):
    return api.lookup_users(ids)


for i in range(100, len(unique_friend_ids), 100):
    try:
        users = lookup_users(unique_friend_ids[i-100:i])
    except Exception as e:
        log(str(e), "lookup_users.log")
    else:
        screen_names.update({user.id: user.screen_name for user in users})

In [11]:
with open("id_to_screen_name.csv", "w", newline="") as twitter_file:
    header = ["id", "screen_name"]
    csv_writer = csv.writer(twitter_file, quoting=csv.QUOTE_ALL)
    csv_writer.writerow(header)
    for k, v in screen_names.items():
        csv_writer.writerow([k, v])

In [15]:
friendly_friend_list = {screen_name: [f"@{screen_names.get(id_)}" for id_ in ids] for screen_name, ids in friend_list.items()}

In [16]:
with open("user_to_friend_screen_names.csv", "w", newline="") as twitter_file:
    header = ["screen_name", "friend_screen_names"]
    csv_writer = csv.writer(twitter_file, quoting=csv.QUOTE_ALL)
    csv_writer.writerow(header)
    for k, v in friendly_friend_list.items():
        csv_writer.writerow([k, "|".join(v)])