In [None]:
# Setup credentials

import keys

consumer_key = keys.key
consumer_secret = keys.secret
bearer_token = keys.bearer
access_token = keys.access_token
access_secret = keys.access_secret

In [None]:
# Authentication on Twitter API

import tweepy

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
if api.verify_credentials:
    print("Auth completed successfuly!")
else:
    print("Issue occoured during authentication")

In [None]:
# Import serializer from an external module

from serializer import Serializer

# Define data directory
dataDir = "data"

In [None]:
# Get starting users info
accounts = ["mizzaro", "damiano10", "Miccighel_", "eglu81", "KevinRoitero"]
for account in accounts:
    serializer = Serializer(f'{dataDir}/{account}')
    profile = api.get_user(account)
    profile_json = profile._json
    serializer.serialize_json(f'{account}_profile.json', profile_json)

In [None]:
# Point #1 of the assignment: retrieve starting users followers and followings

for account in accounts:
    print(f"Processing @{account}")
    serializer = Serializer(f'{dataDir}/{account}')

### FOLLOWERS

    account_followers = []
    for item in tweepy.Cursor(
            api.followers,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_follower = item._json
        account_followers.append(found_follower)

    print(f"Found {len(account_followers)} followers for @{account}")
    serializer.serialize_json(f"{account}_follower.json", account_followers)

### FOLLOWINGS

    account_followings = []
    for item in tweepy.Cursor(
            api.friends,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_followings = item._json
        account_followings.append(found_followings)

    print(f"@{account} follows {len(account_followings)} users")
    serializer.serialize_json(f"{account}_following.json", account_followings)

In [None]:
# Points #2 and #3 of the assignment: pick 5 random followers of the starting users,
# retrieve 10 followers each, pick 5 random followings of the starting users
# and retrieve 10 followings each
import random

for account in accounts:
    serializer = Serializer(f'{dataDir}/{account}')
    json = serializer.read_json(f"{account}_follower.json")
    for count in range(0, 5):
        random_follower = random.choice(json)
        random_follower_screenName = random_follower["screen_name"]
        random_follower_id = random_follower["id"]
        random_follower_followers = []
        for item in tweepy.Cursor(
                api.followers,
                screen_name=random_follower_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_follower = item._json
            random_follower_followers.append(found_follower)
        print(f"Found {len(random_follower_followers)} followers for @{random_follower_screenName}")
        serializer.serialize_json(f"random_{random_follower_id}_follower.json", random_follower_followers)

    json = serializer.read_json(f"{account}_following.json")
    for count in range(0, 5):
        random_following = random.choice(json)
        random_following_screenName = random_following["screen_name"]
        random_following_id = random_following["id"]
        random_following_followings = []
        for item in tweepy.Cursor(
                api.friends,
                screen_name=random_following_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_friend = item._json
            random_following_followings.append(found_friend)
        print(f"@{random_following_screenName} follows {len(random_following_followings)} users")
        serializer.serialize_json(f"random_{random_following_id}_following.json", random_following_followings)

In [None]:
# Point #4 of the assignment: retrieve all encountered users' profile
from datetime import datetime
import os

error_count = 0         # Keep trace of how many errors occurred during user retrieval (account not found)
duplicate_count = 0     # Keep trace of users already encountered

all_users = []
processed_ids = []

print(f"Start at {datetime.now()}")
for account in accounts:
    print(
        f'\n\n*************************************\nProcessing {account} and his friends\n*************************************')
    serializer = Serializer(f'{dataDir}/{account}')
    with os.scandir(f'{dataDir}/{account}') as it:
        for entry in it:
            if entry.name.startswith('random') and not entry.name.endswith('profile.json'):
                print('\n\n******************')
                users_data = serializer.read_json(f"{entry.name}")
                print(f'\nProcessing {entry.name}, containing {len(users_data)} users\n******************\n\n')
                for user in users_data:
                    if user["id"] not in processed_ids:
                        try:
                            print(f'Processing {user["id"]}, user #{len(all_users) + 1}')
                            user_details = api.get_user(user["id"])._json
                            useful_user_details = {
                                "id": user_details["id"],
                                "name": user_details["name"],
                                "screen_name": user_details["screen_name"],
                                "description": user_details["description"],
                                "followers_count": user_details["followers_count"],
                                "friends_count": user_details["friends_count"],
                                "profile_image_url_https": user_details["profile_image_url_https"]
                            }
                            all_users.append(useful_user_details)
                            processed_ids.append(user_details["id"])
                        except tweepy.TweepError:
                            error_count += 1
                            print("Skipped user because of error")
                    else:
                        duplicate_count += 1
serializer = Serializer(dataDir)
print('\n\n*************************************\n')
serializer.serialize_json("all_users.json", all_users)
print('\n*************************************\n\n')
print(f'Found {error_count} errors and {duplicate_count} duplicates')

In [None]:
# As requested, before building the social network is necessary to check friendships

def get_friendship(sourceid, targetid, api):
    kind = ""

    friendship = api.show_friendship(source_id=sourceid, target_id=targetid)

    if not friendship[0].following and not friendship[0].followed_by:
        kind = "none"
    elif not friendship[0].following and friendship[0].followed_by:
        kind = "r_l"
    elif friendship[0].following and not friendship[0].followed_by:
        kind = "l_r"
    else:
        kind = "bi"

    return {
        "source_id": sourceid,
        "target_id": targetid,
        "friendship": kind
    }

serializer = Serializer(dataDir)
users = serializer.read_json("all_users.json")
edges = []
count = 0
for account in accounts:
    serializer = Serializer(f'{dataDir}/{account}')
    account_json = serializer.read_json(f"{account}_profile.json")
    account_id = account_json["id"]
    for user in users:
        if user["id"] is not account_id:
            edges.append(get_friendship(account_id, user["id"], api))
            count += 1
            print(f"Added friendship between {account} and {user['screen_name']} #{count}")
    # There'serializer also the necessity to check friendships between the random picked users at Point #2 and #3 and their followers and followings
    with os.scandir(f'{dataDir}/{account}') as it:
        for entry in it:
            if entry.name.endswith('.json') and entry.name.startswith('random'):
                fileId = int(entry.name.split("_")[1])
                json = serializer.read_json(entry.name)
                for profile in json:
                    edges.append(get_friendship(fileId, profile["id"], api))
                    count += 1
                    print(f"Added friendship between {fileId} and {profile['screen_name']} #{count}")

serializer = Serializer(dataDir)
serializer.serialize_json(f'all_friendships.json', edges)

In [1]:
# Point #5 of the assignment: build the social network
def buildFromJson(path, nodeJson, edgeJson):
    import networkx as nx
    from serializer import  Serializer

    # Create base diGraph
    diGraph = nx.DiGraph(students=["Lorenzo Bellina" "Francesco Bombassei De Bona", "Andrea Cantarutti", "Gabriele Dominici"])

    serializer = Serializer(path)

    # Read users from json and add them to the diGraph
    all_nodes = serializer.read_json(nodeJson)
    for profile in all_nodes:
        diGraph.add_node(profile["id"], follower_ing = 0, following_ing = 0, **profile)

    # Read friendships from json and add them to the diGraph
    all_edges = serializer.read_json(edgeJson)
    for friendship in all_edges:
        if friendship["friendship"] != "none":
            if friendship["friendship"] == "bi":
                diGraph.add_edge(friendship["source_id"], friendship["target_id"], type = friendship["friendship"])
                diGraph.add_edge(friendship["target_id"], friendship["source_id"], type = friendship["friendship"])
                print(f'Added bidirectional edge between {friendship["source_id"]} and {friendship["target_id"]}')
            elif friendship["friendship"] == "r_l":
                diGraph.add_edge(friendship["target_id"], friendship["source_id"], type = friendship["friendship"])
                print(f'Added edge from {friendship["target_id"]} to {friendship["source_id"]}')
            elif friendship["friendship"] == "l_r":
                diGraph.add_edge(friendship["source_id"], friendship["target_id"], type = friendship["friendship"])
                print(f'Added edge from {friendship["source_id"]} to {friendship["target_id"]}')

    # Set new attributes for every node to represent number of followers and followings
    for degree in diGraph.in_degree:
        diGraph.nodes[degree[0]]["follower_ing"] = degree[1]
    for degree in diGraph.out_degree:
        diGraph.nodes[degree[0]]["following_ing"] = degree[1]

    return diGraph

import networkx as nx
diGraph = buildFromJson('data', 'all_users.json', 'all_friendships.json')
# Save the diGraph
nx.write_gpickle(diGraph, "graph/diGraph_networkx.pkl")

Data read from path: data/all_users.json
Data read from path: data/all_friendships.json
Added edge from 1972411447 to 18932422
Added bidirectional edge between 18932422 and 93781753
Added bidirectional edge between 18932422 and 132646210
Added bidirectional edge between 18932422 and 3036907250
Added bidirectional edge between 18932422 and 52059998
Added edge from 18932422 to 190161488
Added edge from 18932422 to 293483941
Added bidirectional edge between 18932422 and 19659370
Added edge from 18932422 to 354847710
Added edge from 18932422 to 41667342
Added edge from 18932422 to 333597222
Added bidirectional edge between 18932422 and 249661913
Added edge from 94732055 to 18932422
Added edge from 116897811 to 18932422
Added edge from 18932422 to 7295362
Added edge from 18932422 to 30339571
Added edge from 18932422 to 91440853
Added bidirectional edge between 18932422 and 384127183
Added edge from 18932422 to 40576466
Added bidirectional edge between 18932422 and 14451127
Added edge from 1

In [3]:
# Point #6 of the assignment: visualize the diGraph created before
def plotGraph(pklPath):
    import networkx.readwrite.gpickle as pkl
    from pyvis.network import Network
    import random
    r = lambda : random.randint(0, 255)

    # Load the diGraph
    twitter = pkl.read_gpickle(pklPath)

    # Setup network
    nt = Network(height="100%", width="100%", bgcolor="#111111", directed=True, font_color="white", heading="Twitter Graph - Final Edition")

    # Model particles physic
    nt.barnes_hut()

    # Convert from NetworkX
    nt.from_nx(twitter)

    # Retrieve nodes weight
    neighbor_map = nt.get_adj_list()

    profs = ["Miccighel_", "mizzaro", "damiano10", "eglu81", "KevinRoitero"]

    colors = {}

    # Building nodes
    for node in nt.nodes:
        info = "nome utente: " + node['screen_name'] + "<br>" + "id: " + str(node['id'])
        map_length = len(neighbor_map[node["id"]])
        node['title'] = info
        node['label'] = node['name']
        color = '#%02X%02X%02X' % (r(),r(),r())
        colors[node["id"]] = color
        if node['screen_name'] in profs:
            node['shape'] = 'circularImage'
            node['image'] = node['profile_image_url_https']
            node['mass'] = map_length
        else:
            node['color'] = color
        node['size'] = map_length


    # Edges color
    for edge in nt.edges:
        color = colors[edge['from']]
        edge['color'] = color

    return nt

nt = plotGraph("graph/diGraph_networkx.pkl")
# Save and show network
nt.save_graph("graph/diGraph.html")

In [None]:
# To study various properties of the graph is recommended to convert the directed graph in an undirected graph

graph = diGraph.to_undirected()
nx.write_gpickle(graph, "graph/graph_networkx.pkl")
nt.show("graph/graph.html")

In [None]:
# Point #7 of the assignment

# Is graph connected?
connected = nx.is_connected(graph)
if connected:
    print("The graph IS connected")
else:
    print("The graph IS NOT connected")

# Is graph bipartite?
bipartite = nx.is_bipartite(graph)
if bipartite:
    print("The graph IS bipartite")
else:
    print("The graph IS NOT bipartite")

In [None]:
# Point #8 of the assignment: find center, diameter and radius

# Center
center = nx.center(graph)
string = ""
string += ("Center of the graph is " + str(center) + " ---> [")
for id in center:
    string += (graph.nodes[id]["screen_name"] + ", ")
string = string[0:-2]
string += "]"
print(string)

# Diameter
diameter = nx.diameter(graph)
print(f"Diameter = {diameter}")

# Radius
radius = nx.radius(graph)
print(f"Radius = {radius}")

In [None]:
# Point #9 of the assignment: find various centrality measures

# !! The following properties will be printed only if NOT equals to zero !!

# It's possible to calculate some centrality values on the undirected graph
bt_centrality = nx.betweenness_centrality(graph)
print(f"Betweenness centrality:")
for key, value in bt_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

cl_centrality = nx.closeness_centrality(graph)
print(f"\n\nCloseness centrality:")
for key, value in cl_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

dg_centrality = nx.degree_centrality(graph)
print(f"\n\nDegree centrality:")
for key, value in dg_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

# Passing to the previously created directed graph is possible to calculate some more properties
in_dg_centrality = nx.in_degree_centrality(diGraph)
print(f"\n\nIn-degree centrality:")
for key, value in in_dg_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

out_dg_centrality = nx.out_degree_centrality(diGraph)
print(f"\n\nOut-degree centrality:")
for key, value in out_dg_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

pageRank = nx.pagerank(diGraph)
print(f"\n\nPage Rank:")
for key, value in pageRank.items():
    if value != 0:
        print(f"\t{key}: {value}")

hits = nx.hits(diGraph, max_iter=500)
print("HITS:")
for node, authValue in hits[0].items():
    print(f"\n\t{node}\n\t\tauth = {authValue}\n\t\t hub = {hits[1][node]}")

In [7]:
# Point #10 of the assignment: generate damiano10 subgraph
import networkx.readwrite.gpickle as pkl

diGraph = pkl.read_gpickle("graph/diGraph_networkx.pkl")
d10_reduced_graph = nx.ego_graph(diGraph, 132646210 , radius=1, center=True)
pkl.write_gpickle(d10_reduced_graph, 'graph/reducedGraph_networkx.pkl')

nt = plotGraph('graph/reducedGraph_networkx.pkl')
# Save and show network
nt.save_graph("graph/reducedGraph.html")


In [9]:
# import networkx as nx
#import networkx.readwrite.gpickle as pkl
# # To improve our results we decided to enrich the starting graph
# #betterGraph = buildFromJson('data', 'all_users.json', 'complete_friendships.json')
#betterGraph = pkl.read_gpickle("graph/better_networkx.pkl")
# # nx.write_gpickle(betterGraph, "graph/better_networkx.pkl")
# # nt = plotGraph("graph/better_networkx.pkl")
# # nt.save_graph("graph/betterGraph.html")
#d10_reduced_betterGraph = nx.ego_graph(betterGraph, 132646210 , radius=1, center=True)
#pkl.write_gpickle(d10_reduced_betterGraph, 'graph/reducedBetterGraph_networkx.pkl')
#nt = plotGraph('graph/reducedBetterGraph_networkx.pkl')
# Save and show network
#nt.save_graph("graph/reducedBetterGraph.html")
#

In [4]:
# # Then, we decided to recalculate the previous properties of the graph
# # Is graph connected?
# connected = nx.is_connected(betterGraph.to_undirected())
# if connected:
#     print("The graph IS connected")
# else:
#     print("The graph IS NOT connected")
#
# # Is graph bipartite?
# bipartite = nx.is_bipartite(betterGraph.to_undirected())
# if bipartite:
#     print("The graph IS bipartite")
# else:
#     print("The graph IS NOT bipartite")
#
# undiBetterGraph = betterGraph.to_undirected()
#
# # Center
# center = nx.center(undiBetterGraph)
# string = ""
# string += ("Center of the graph is " + str(center) + " ---> [")
# for id in center:
#     string += (undiBetterGraph.nodes[id]["screen_name"] + ", ")
# string = string[0:-2]
# string += "]"
# print(string)
#
# # Diameter
# diameter = nx.diameter(undiBetterGraph)
# print(f"Diameter = {diameter}")
#
# # Radius
# radius = nx.radius(undiBetterGraph)
# print(f"Radius = {radius}")
#

The graph IS connected
The graph IS NOT bipartite
Center of the graph is [93781753, 132646210, 3036907250, 52059998, 1556357336, 262236415, 813286, 19659370, 44196397, 571202103, 15750573, 15492359] ---> [rmit_csit, damiano10, KevinRoitero, marcopavan83, martysantarossa, Barbe_z, BarackObama, eglu81, elonmusk, Medium, Miccighel_, TEDTalks]
Diameter = 5
Radius = 3


In [None]:
# # It's possible to calculate some centrality values on the undirected graph
# bt_centrality = nx.betweenness_centrality(undiBetterGraph)
# print(f"Betweenness centrality:")
# for key, value in bt_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# cl_centrality = nx.closeness_centrality(undiBetterGraph)
# print(f"\n\nCloseness centrality:")
# for key, value in cl_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# dg_centrality = nx.degree_centrality(undiBetterGraph)
# print(f"\n\nDegree centrality:")
# for key, value in dg_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# # Passing to the previously created directed graph is possible to calculate some more properties
# in_dg_centrality = nx.in_degree_centrality(betterGraph)
# print(f"\n\nIn-degree centrality:")
# for key, value in in_dg_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# out_dg_centrality = nx.out_degree_centrality(betterGraph)
# print(f"\n\nOut-degree centrality:")
# for key, value in out_dg_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# pageRank = nx.pagerank(betterGraph)
# print(f"\n\nPage Rank:")
# for key, value in pageRank.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# hits = nx.hits(betterGraph, max_iter=500)
# print("HITS:")
# for node, authValue in hits[0].items():
#     print(f"\n\t{node}\n\t\tauth = {authValue}\n\t\t hub = {hits[1][node]}")

