In [1]:
# Setup credentials

import keys

consumer_key = keys.key
consumer_secret = keys.secret
bearer_token = keys.bearer
access_token = keys.access_token
access_secret = keys.access_secret

In [2]:
# Authentication on Twitter API

import tweepy

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
if api.verify_credentials:
    print("Auth completed successfuly!")
else:
    print("Issue occoured during authentication")

Auth completed successfuly!


In [5]:
# Import serializer from an external module

from serializer import Serializer

In [7]:
# Get starting users info

accounts = ["mizzaro", "damiano10", "Miccighel_", "eglu81", "KevinRoitero"]
for account in accounts:
    serializer = Serializer(f'check_data/{account}')
    profile = api.get_user(account)
    profile_json = profile._json
    serializer.serialize_json(f'{account}_profile.json', profile_json)

Data serialized to path: check_data/mizzaro/mizzaro_profile.json
Data serialized to path: check_data/damiano10/damiano10_profile.json
Data serialized to path: check_data/Miccighel_/Miccighel__profile.json
Data serialized to path: check_data/eglu81/eglu81_profile.json
Data serialized to path: check_data/KevinRoitero/KevinRoitero_profile.json


In [8]:
# Point #1 of the assignment: retrieve starting users followers and followings

for account in accounts:
    print(f"Processing @{account}")
    serializer = Serializer(f'check_data/{account}')

### FOLLOWERS

    account_followers = []
    for item in tweepy.Cursor(
            api.followers,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_follower = item._json
        account_followers.append(found_follower)

    print(f"Found {len(account_followers)} followers for @{account}")
    serializer.serialize_json(f"{account}_follower.json", account_followers)

### FOLLOWINGS

    account_followings = []
    for item in tweepy.Cursor(
            api.friends,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_followings = item._json
        account_followings.append(found_followings)

    print(f"@{account} follows {len(account_followings)} users")
    serializer.serialize_json(f"{account}_following.json", account_followings)

Processing @mizzaro
Found 158 followers for @mizzaro
Data serialized to path: check_data/mizzaro/mizzaro_followers.json
@mizzaro follows 331 users
Data serialized to path: check_data/mizzaro/mizzaro_following.json
Processing @damiano10
Found 791 followers for @damiano10
Data serialized to path: check_data/damiano10/damiano10_followers.json
@damiano10 follows 840 users
Data serialized to path: check_data/damiano10/damiano10_following.json
Processing @Miccighel_
Found 333 followers for @Miccighel_
Data serialized to path: check_data/Miccighel_/Miccighel__followers.json
@Miccighel_ follows 212 users
Data serialized to path: check_data/Miccighel_/Miccighel__following.json
Processing @eglu81
Found 538 followers for @eglu81
Data serialized to path: check_data/eglu81/eglu81_followers.json
@eglu81 follows 622 users
Data serialized to path: check_data/eglu81/eglu81_following.json
Processing @KevinRoitero
Found 103 followers for @KevinRoitero
Data serialized to path: check_data/KevinRoitero/Kevi

Rate limit reached. Sleeping for: 894
Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 894
Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 894
Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 889
Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 894


In [9]:
# Points #2 and #3 of the assignment: pick 5 random followers of the starting users,
# retrieve 10 followers each, pick 5 random followings of the starting users
# and retrieve 10 followings each

import random

for account in accounts:
    serializer = Serializer(f'check_data/{account}')
    json = serializer.read_json(f"{account}_followers.json")
    for count in range(0, 5):
        random_follower = random.choice(json)
        random_follower_screenName = random_follower["screen_name"]
        random_follower_id = random_follower["id"]
        random_follower_followers = []
        for item in tweepy.Cursor(
                api.followers,
                screen_name=random_follower_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_follower = item._json
            random_follower_followers.append(found_follower)
        print(f"Found {len(random_follower_followers)} followers for @{random_follower_screenName}")
        serializer.serialize_json(f"random_{random_follower_id}_follower.json", random_follower_followers)

    json = serializer.read_json(f"{account}_following.json")
    for count in range(0, 5):
        random_following = random.choice(json)
        random_following_screenName = random_following["screen_name"]
        random_following_id = random_following["id"]
        random_following_followings = []
        for item in tweepy.Cursor(
                api.friends,
                screen_name=random_following_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_friend = item._json
            random_following_followings.append(found_friend)
        print(f"@{random_following_screenName} follows {len(random_following_followings)} users")
        serializer.serialize_json(f"random_{random_following_id}_following.json", random_following_followings)

Data read from path: check_data/mizzaro/mizzaro_followers.json
Found 10 followers for @airamoigroig
Data serialized to path: check_data/mizzaro/random_269301403_follower.json
Found 10 followers for @MauroLorenzutti
Data serialized to path: check_data/mizzaro/random_332134740_follower.json
Found 10 followers for @fmnardini
Data serialized to path: check_data/mizzaro/random_125483940_follower.json
Found 10 followers for @ridhorei
Data serialized to path: check_data/mizzaro/random_151303125_follower.json
Found 10 followers for @damiano10
Data serialized to path: check_data/mizzaro/random_132646210_follower.json


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# Point #4 of the assignment: retrieve all encountered users' profile
from datetime import datetime
import os

error_count = 0         # Keep trace of how many errors occurred during user retrieval (account not found)
duplicate_count = 0     # Keep trace of users already encountered

all_users = []
processed_ids = []

print(f"Start at {datetime.now()}")
for account in accounts:
    print(
        f'\n\n*************************************\nProcessing {account} and his friends\n*************************************')
    serializer = Serializer(f'check_data/{account}')
    with os.scandir(f'check_data/{account}') as it:
        for entry in it:
            if entry.name.endswith('.json') and not entry.name.endswith('profile.json'):
                print('\n\n******************')
                users_data = serializer.read_json(f"{entry.name}")
                print(f'\nProcessing {entry.name}, containing {len(users_data)} users\n******************\n\n')
                for user in users_data:
                    if user["id"] not in processed_ids:
                        try:
                            print(f'Processing {user["id"]}, user #{len(all_users) + 1}')
                            user_details = api.get_user(user["id"])._json
                            useful_user_details = {
                                "id": user_details["id"],
                                "name": user_details["name"],
                                "screen_name": user_details["screen_name"],
                                "description": user_details["description"],
                                "followers_count": user_details["followers_count"],
                                "friends_count": user_details["friends_count"],
                                "profile_image_url_https": user_details["profile_image_url_https"]
                            }
                            all_users.append(useful_user_details)
                            processed_ids.append(user_details["id"])
                        except tweepy.TweepError:
                            error_count += 1
                            print("Skipped user because of error")
                    else:
                        duplicate_count += 1
serializer = Serializer('check_data')
print('\n\n*************************************\n')
serializer.serialize_json(f"all_users.json", all_users)
print('\n*************************************\n\n')
print(f'Found {error_count} errors and {duplicate_count} duplicates')

In [None]:
# As requested, before building the social network is necessary to check friendships

def get_friendship(sourceid, targetid, api):
    kind = ""

    friendship = api.show_friendship(source_id=sourceid, target_id=targetid)

    if not friendship[0].following and not friendship[0].followed_by:
        kind = "none"
    elif not friendship[0].following and friendship[0].followed_by:
        kind = "r_l"
    elif friendship[0].following and not friendship[0].followed_by:
        kind = "l_r"
    else:
        kind = "bi"

    return {
        "source_id": sourceid,
        "target_id": targetid,
        "friendship": kind
    }

serializer = Serializer('check_data')
users = serializer.read_json("all_users.json")
edges = []
count = 0
for account in accounts:
    serializer = Serializer(f'check_data/{account}')
    account_json = serializer.read_json(f"{account}_profile.json")
    account_id = account_json["id"]
    for user in users:
        if user["id"] is not account_id:
            edges.append(get_friendship(account_id, user["id"], api))
            count += 1
            print(f"Added friendship between {account} and {user['screen_name']} #{count}")
    # There's also the necessity to check friendships between the random picked users at Point #2 and #3 and their followers and followings
    with os.scandir(f'check_data/{account}') as it:
        for entry in it:
            if entry.name.endswith('.json') and entry.name.startswith('random'):
                fileId = int(entry.name.split("_")[1])
                json = serializer.read_json(entry.name)
                for profile in json:
                    edges.append(get_friendship(fileId, profile["id"], api))
                    count += 1
                    print(f"Added friendship between {fileId} and {profile['screen_name']} #{count}")

serializer = Serializer('check_data')
serializer.serialize_json(f'all_friendships.json', edges)

In [None]:
# Point #5 of the assignment: build the social network