In [4]:
# Setup credentials
from datetime import datetime
import os
import canta_secret
import tweepy
from serializer import Serializer
from tweepy import TweepError

consumer_key = canta_secret.key
consumer_secret = canta_secret.secret
bearer_token = canta_secret.bearer
access_token = canta_secret.access_token
access_secret = canta_secret.access_secret

In [5]:
# Authentication on Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, timeout=120)
if api.verify_credentials:
    print("Auth completed successfuly!")
else:
    print("Issue occoured during authentication")

Auth completed successfuly!


In [None]:
# Get starting users info

accounts = ["Miccighel_"]
for account in accounts:
    serializer = Serializer(f'check_data/{account}')
    profile = api.get_user(account)
    profile_json = profile._json
    serializer.serialize_json(f'{account}_profile.json', profile_json)

In [None]:
# Point #1 of the assignment: retrieve starting users followers and followings
for account in accounts:
    print(f"Processing @{account}")
    serializer = Serializer(f'check_data/{account}')

### FOLLOWERS

    account_followers = []
    for item in tweepy.Cursor(
            api.followers,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_follower = item._json
        account_followers.append(found_follower)

    print(f"Found {len(account_followers)} followers for @{account}")
    serializer.serialize_json(f"{account}_follower.json", account_followers)

### FOLLOWINGS

    account_followings = []
    for item in tweepy.Cursor(
            api.friends,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_followings = item._json
        account_followings.append(found_followings)

    print(f"@{account} follows {len(account_followings)} users")
    serializer.serialize_json(f"{account}_following.json", account_followings)

In [None]:
# Points #2 and #3 of the assignment: pick 5 random followers of the starting users,
# retrieve 10 followers each, pick 5 random followings of the starting users
# and retrieve 10 followings each

import random

for account in accounts:
    serializer = Serializer(f'check_data/{account}')
    json = serializer.read_json(f"{account}_follower.json")
    for count in range(0, 5):
        random_follower = random.choice(json)
        random_follower_screenName = random_follower["screen_name"]
        random_follower_id = random_follower["id"]
        random_follower_followers = []
        for item in tweepy.Cursor(
                api.followers,
                screen_name=random_follower_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_follower = item._json
            random_follower_followers.append(found_follower)
        print(f"Found {len(random_follower_followers)} followers for @{random_follower_screenName}")
        serializer.serialize_json(f"random_{random_follower_id}_follower.json", random_follower_followers)

    json = serializer.read_json(f"{account}_following.json")
    for count in range(0, 5):
        random_following = random.choice(json)
        random_following_screenName = random_following["screen_name"]
        random_following_id = random_following["id"]
        random_following_followings = []
        for item in tweepy.Cursor(
                api.friends,
                screen_name=random_following_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_friend = item._json
            random_following_followings.append(found_friend)
        print(f"@{random_following_screenName} follows {len(random_following_followings)} users")
        serializer.serialize_json(f"random_{random_following_id}_following.json", random_following_followings)

In [None]:
# Checks for the presence of profs in random users' data downloaded. 
# It shouldn't find conflicts.
counter = 0
s = Serializer(f"check_data/{account}")
profs = ["Miccighel_", "damiano10", "eglu81", "mizzaro", "KevinRoitero"]
with os.scandir(f"check_data/{account}") as directory:
    for entry in directory:
        if "random" in entry.name:
            random_group = s.read_json(entry.name)
            for user in random_group:
                if user["screen_name"] in profs:
                    counter += 1
                    print(f"Conflict found in file {entry.name}")
print(f"Found {counter} conflicts.")

In [None]:
# Point #4 of the assignment: retrieve all encountered users' profile
error_count = 0         # Keep trace of how many errors occurred during user retrieval (account not found)
duplicate_count = 0     # Keep trace of users already encountered

all_users = []
processed_ids = []

print(f"Start at {datetime.now()}")
for account in accounts:
    print(
        f'\n\n*************************************\nProcessing {account} and his friends\n*************************************')
    serializer = Serializer(f'check_data/{account}')
    with os.scandir(f'check_data/{account}') as it:
        for entry in it:
            if entry.name.endswith('.json') and not entry.name.endswith('profile.json'):
                print('\n\n******************')
                users_data = serializer.read_json(f"{entry.name}")
                print(f'\nProcessing {entry.name}, containing {len(users_data)} users\n******************\n\n')
                for user in users_data:
                    if user["id"] not in processed_ids:
                        try:
                            print(f'Processing {user["id"]}, user #{len(all_users) + 1}')
                            user_details = api.get_user(user["id"])._json
                            useful_user_details = {
                                "id": user_details["id"],
                                "name": user_details["name"],
                                "screen_name": user_details["screen_name"],
                                "description": user_details["description"],
                                "followers_count": user_details["followers_count"],
                                "friends_count": user_details["friends_count"],
                                "profile_image_url_https": user_details["profile_image_url_https"]
                            }
                            all_users.append(useful_user_details)
                            processed_ids.append(user_details["id"])
                        except tweepy.TweepError:
                            error_count += 1
                            print("Skipped user because of error")
                    else:
                        duplicate_count += 1
serializer = Serializer('check_data')
print('\n\n*************************************\n')
serializer.serialize_json(f"{account}_all_users.json", all_users)
print('\n*************************************\n\n')
print(f'Found {error_count} errors and {duplicate_count} duplicates')

In [None]:
# As requested, before building the social network is necessary to check friendships
accounts = ["Miccighel_"]
import os
def get_friendship(sourceid, targetid, api):
    kind = ""

    friendship = api.show_friendship(source_id=sourceid, target_id=targetid)

    if not friendship[0].following and not friendship[0].followed_by:
        kind = "none"
    elif not friendship[0].following and friendship[0].followed_by:
        kind = "r_l"
    elif friendship[0].following and not friendship[0].followed_by:
        kind = "l_r"
    else:
        kind = "bi"

    return {
        "source_id": sourceid,
        "target_id": targetid,
        "friendship": kind
    }


def caller(count, serializer):
    try:
        users = serializer.read_json("unique_all_users.json")
        if count > 0:
            users = users[count:]
            
        for account in accounts:
            account_json = serializer.read_json(f"{account}/{account}_profile.json")
            account_id = account_json["id"]
            for user in users:
                if user["id"] is not account_id:
                    friendship = get_friendship(account_id, user["id"], api)
                    # edges.append(friendship)
                    serializer.serialize_json(f'{account}/{account}_all_friendships.json', friendship)
                    print(f"Added friendship between {account} and {user['screen_name']} #{count}")
                    count += 1

            # There's also the necessity to check friendships between the random picked users at Point #2 and #3 and their followers and followings
            with os.scandir(f'check_data/{account}') as it:
                for entry in it:
                    if entry.name.endswith('.json') and entry.name.startswith('random'):
                        fileId = int(entry.name.split("_")[1])
                        json = serializer.read_json(f"{account}/{entry.name})
                        for profile in json:
                            friendship = get_friendship(fileId, profile["id"], api)
                            # edges.append(friendship)
                            serializer.serialize_json(f'Miccighel__all_friendships.json', friendship)
                            print(f"Added friendship between {fileId} and {profile['screen_name']} #{count}")
                            count += 1
        print("Downloaded finished succesfully")
        
    except TweepError as te:
        print(f"Download aborted unexpectedly due to {te}")
        print(f"Download will restore from user {count}")
        caller(count, serializer)
    except Exception as e: 
        print("An error occured, download should be restarted from scratch " + e)

caller(0, Serializer('check_data'))

In [None]:
# Point #5 of the assignment: build the social network