In [None]:
# Setup credentials

import keys

consumer_key = keys.key
consumer_secret = keys.secret
bearer_token = keys.bearer
access_token = keys.access_token
access_secret = keys.access_secret

In [None]:
# Authentication on Twitter API

import tweepy

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
if api.verify_credentials:
    print("Auth completed successfuly!")
else:
    print("Issue occoured during authentication")

In [None]:
# Import serializer from an external module

from serializer import Serializer

# Define data directory
dataDir = "data"

In [None]:
# Get starting users info
accounts = ["mizzaro", "damiano10", "Miccighel_", "eglu81", "KevinRoitero"]
for account in accounts:
    serializer = Serializer(f'{dataDir}/{account}')
    profile = api.get_user(account)
    profile_json = profile._json
    serializer.serialize_json(f'{account}_profile.json', profile_json)

In [None]:
# Point #1 of the assignment: retrieve starting users followers and followings

for account in accounts:
    print(f"Processing @{account}")
    serializer = Serializer(f'{dataDir}/{account}')

### FOLLOWERS

    account_followers = []
    for item in tweepy.Cursor(
            api.followers,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_follower = item._json
        account_followers.append(found_follower)

    print(f"Found {len(account_followers)} followers for @{account}")
    serializer.serialize_json(f"{account}_follower.json", account_followers)

### FOLLOWINGS

    account_followings = []
    for item in tweepy.Cursor(
            api.friends,
            screen_name=account,
            skip_status=True,
            include_user_entities=False
    ).items():
        found_followings = item._json
        account_followings.append(found_followings)

    print(f"@{account} follows {len(account_followings)} users")
    serializer.serialize_json(f"{account}_following.json", account_followings)

In [None]:
# Points #2 and #3 of the assignment: pick 5 random followers of the starting users,
# retrieve 10 followers each, pick 5 random followings of the starting users
# and retrieve 10 followings each
import random

for account in accounts:
    serializer = Serializer(f'{dataDir}/{account}')
    json = serializer.read_json(f"{account}_follower.json")
    for count in range(0, 5):
        random_follower = random.choice(json)
        random_follower_screenName = random_follower["screen_name"]
        random_follower_id = random_follower["id"]
        random_follower_followers = []
        for item in tweepy.Cursor(
                api.followers,
                screen_name=random_follower_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_follower = item._json
            random_follower_followers.append(found_follower)
        print(f"Found {len(random_follower_followers)} followers for @{random_follower_screenName}")
        serializer.serialize_json(f"random_{random_follower_id}_follower.json", random_follower_followers)

    json = serializer.read_json(f"{account}_following.json")
    for count in range(0, 5):
        random_following = random.choice(json)
        random_following_screenName = random_following["screen_name"]
        random_following_id = random_following["id"]
        random_following_followings = []
        for item in tweepy.Cursor(
                api.friends,
                screen_name=random_following_screenName,
                skip_status=True,
                include_user_entities=False
        ).items(10):
            found_friend = item._json
            random_following_followings.append(found_friend)
        print(f"@{random_following_screenName} follows {len(random_following_followings)} users")
        serializer.serialize_json(f"random_{random_following_id}_following.json", random_following_followings)

In [None]:
# Point #4 of the assignment: retrieve all encountered users' profile
from datetime import datetime
import os

error_count = 0         # Keep trace of how many errors occurred during user retrieval (account not found)
duplicate_count = 0     # Keep trace of users already encountered

all_users = []
processed_ids = []

print(f"Start at {datetime.now()}")
for account in accounts:
    print(
        f'\n\n*************************************\nProcessing {account} and his friends\n*************************************')
    serializer = Serializer(f'{dataDir}/{account}')
    with os.scandir(f'{dataDir}/{account}') as it:
        for entry in it:
            if entry.name.startswith('random') and not entry.name.endswith('profile.json'):
                print('\n\n******************')
                users_data = serializer.read_json(f"{entry.name}")
                print(f'\nProcessing {entry.name}, containing {len(users_data)} users\n******************\n\n')
                for user in users_data:
                    if user["id"] not in processed_ids:
                        try:
                            print(f'Processing {user["id"]}, user #{len(all_users) + 1}')
                            user_details = api.get_user(user["id"])._json
                            useful_user_details = {
                                "id": user_details["id"],
                                "name": user_details["name"],
                                "screen_name": user_details["screen_name"],
                                "description": user_details["description"],
                                "followers_count": user_details["followers_count"],
                                "friends_count": user_details["friends_count"],
                                "profile_image_url_https": user_details["profile_image_url_https"]
                            }
                            all_users.append(useful_user_details)
                            processed_ids.append(user_details["id"])
                        except tweepy.TweepError:
                            error_count += 1
                            print("Skipped user because of error")
                    else:
                        duplicate_count += 1
serializer = Serializer(dataDir)
print('\n\n*************************************\n')
serializer.serialize_json("all_users.json", all_users)
print('\n*************************************\n\n')
print(f'Found {error_count} errors and {duplicate_count} duplicates')

In [None]:
# As requested, before building the social network is necessary to check friendships

def get_friendship(sourceid, targetid, api):
    kind = ""

    friendship = api.show_friendship(source_id=sourceid, target_id=targetid)

    if not friendship[0].following and not friendship[0].followed_by:
        kind = "none"
    elif not friendship[0].following and friendship[0].followed_by:
        kind = "r_l"
    elif friendship[0].following and not friendship[0].followed_by:
        kind = "l_r"
    else:
        kind = "bi"

    return {
        "source_id": sourceid,
        "target_id": targetid,
        "friendship": kind
    }

serializer = Serializer(dataDir)
users = serializer.read_json("all_users.json")
edges = []
count = 0
for account in accounts:
    serializer = Serializer(f'{dataDir}/{account}')
    account_json = serializer.read_json(f"{account}_profile.json")
    account_id = account_json["id"]
    for user in users:
        if user["id"] is not account_id:
            edges.append(get_friendship(account_id, user["id"], api))
            count += 1
            print(f"Added friendship between {account} and {user['screen_name']} #{count}")
    # There'serializer also the necessity to check friendships between the random picked users at Point #2 and #3 and their followers and followings
    with os.scandir(f'{dataDir}/{account}') as it:
        for entry in it:
            if entry.name.endswith('.json') and entry.name.startswith('random'):
                fileId = int(entry.name.split("_")[1])
                json = serializer.read_json(entry.name)
                for profile in json:
                    edges.append(get_friendship(fileId, profile["id"], api))
                    count += 1
                    print(f"Added friendship between {fileId} and {profile['screen_name']} #{count}")

serializer = Serializer(dataDir)
serializer.serialize_json(f'all_friendships.json', edges)

In [1]:
# Point #5 of the assignment: build the social network
def buildFromJson(path, nodeJson, edgeJson):
    import networkx as nx
    from serializer import  Serializer

    # Create base diGraph
    diGraph = nx.DiGraph(students=["Lorenzo Bellina" "Francesco Bombassei De Bona", "Andrea Cantarutti", "Gabriele Dominici"])

    serializer = Serializer(path)

    # Read users from json and add them to the diGraph
    all_nodes = serializer.read_json(nodeJson)
    for profile in all_nodes:
        diGraph.add_node(profile["id"], follower_ing = 0, following_ing = 0, **profile)

    # Read friendships from json and add them to the diGraph
    all_edges = serializer.read_json(edgeJson)
    for friendship in all_edges:
        if friendship["friendship"] != "none":
            if friendship["friendship"] == "bi":
                diGraph.add_edge(friendship["source_id"], friendship["target_id"], type = friendship["friendship"])
                diGraph.add_edge(friendship["target_id"], friendship["source_id"], type = friendship["friendship"])
                print(f'Added bidirectional edge between {friendship["source_id"]} and {friendship["target_id"]}')
            elif friendship["friendship"] == "r_l":
                diGraph.add_edge(friendship["target_id"], friendship["source_id"], type = friendship["friendship"])
                print(f'Added edge from {friendship["target_id"]} to {friendship["source_id"]}')
            elif friendship["friendship"] == "l_r":
                diGraph.add_edge(friendship["source_id"], friendship["target_id"], type = friendship["friendship"])
                print(f'Added edge from {friendship["source_id"]} to {friendship["target_id"]}')

    # Set new attributes for every node to represent number of followers and followings
    for degree in diGraph.in_degree:
        diGraph.nodes[degree[0]]["follower_ing"] = degree[1]
    for degree in diGraph.out_degree:
        diGraph.nodes[degree[0]]["following_ing"] = degree[1]

    return diGraph

import networkx as nx
diGraph = buildFromJson('data', 'all_users.json', 'all_friendships.json')
# Save the diGraph
nx.write_gpickle(diGraph, "graph/diGraph_networkx.pkl")

Data read from path: data/all_users.json
Data read from path: data/all_friendships.json
Added edge from 1972411447 to 18932422
Added bidirectional edge between 18932422 and 93781753
Added bidirectional edge between 18932422 and 132646210
Added bidirectional edge between 18932422 and 3036907250
Added bidirectional edge between 18932422 and 52059998
Added edge from 18932422 to 190161488
Added edge from 18932422 to 293483941
Added bidirectional edge between 18932422 and 19659370
Added edge from 18932422 to 354847710
Added edge from 18932422 to 41667342
Added edge from 18932422 to 333597222
Added bidirectional edge between 18932422 and 249661913
Added edge from 94732055 to 18932422
Added edge from 116897811 to 18932422
Added edge from 18932422 to 7295362
Added edge from 18932422 to 30339571
Added edge from 18932422 to 91440853
Added bidirectional edge between 18932422 and 384127183
Added edge from 18932422 to 40576466
Added bidirectional edge between 18932422 and 14451127
Added edge from 1

In [20]:
# Point #6 of the assignment: visualize the diGraph created before
def plotGraph(pklPath):
    import networkx.readwrite.gpickle as pkl
    from pyvis.network import Network
    import random
    r = lambda : random.randint(0, 255)

    # Load the diGraph
    twitter = pkl.read_gpickle(pklPath)

    # Setup network
    nt = Network(height="100%", width="100%", bgcolor="#111111", directed=True, font_color="white", heading="Twitter Graph - Final Edition")

    # Model particles physic
    nt.barnes_hut()

    # Convert from NetworkX
    nt.from_nx(twitter)

    # Retrieve nodes weight
    neighbor_map = nt.get_adj_list()

    profs = ["Miccighel_", "mizzaro", "damiano10", "eglu81", "KevinRoitero"]

    colors = {}

    # Building nodes
    for node in nt.nodes:
        info = "nome utente: " + node['screen_name'] + "<br>" + "id: " + str(node['id'])
        map_length = len(neighbor_map[node["id"]])
        node['title'] = info
        node['label'] = node['name']
        color = '#%02X%02X%02X' % (r(),r(),r())
        colors[node["id"]] = color
        if node['screen_name'] in profs:
            node['shape'] = 'circularImage'
            node['image'] = node['profile_image_url_https']
            node['mass'] = map_length
        else:
            node['color'] = color
        node['size'] = map_length


    # Edges color
    for edge in nt.edges:
        color = colors[edge['from']]
        edge['color'] = color

    return nt

nt = plotGraph("graph/diGraph_networkx.pkl")
# Save and show network
nt.save_graph("graph/diGraph.html")

In [24]:
# To study various properties of the graph is recommended to convert the directed graph in an undirected graph

graph = diGraph.to_undirected()
nx.write_gpickle(graph, "graph/graph_networkx.pkl")
nt.show("graph/graph.html")

In [None]:
# Point #7 of the assignment

# Is graph connected?
connected = nx.is_connected(graph)
if connected:
    print("The graph IS connected")
else:
    print("The graph IS NOT connected")

# Is graph bipartite?
bipartite = nx.is_bipartite(graph)
if bipartite:
    print("The graph IS bipartite")
else:
    print("The graph IS NOT bipartite")

In [None]:
# Point #8 of the assignment: find center, diameter and radius

# Center
center = nx.center(graph)
string = ""
string += ("Center of the graph is " + str(center) + " ---> [")
for id in center:
    string += (graph.nodes[id]["screen_name"] + ", ")
string = string[0:-2]
string += "]"
print(string)

# Diameter
diameter = nx.diameter(graph)
print(f"Diameter = {diameter}")

# Radius
radius = nx.radius(graph)
print(f"Radius = {radius}")

In [30]:
# Point #9 of the assignment: find various centrality measures

# !! The following properties will be printed only if NOT equals to zero !!
# It's possible to calculate some centrality values on the undirected graph
bt_centrality = nx.betweenness_centrality(graph)
print(f"Betweenness centrality:")
for key, value in bt_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

cl_centrality = nx.closeness_centrality(graph)
print(f"\n\nCloseness centrality:")
for key, value in cl_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

dg_centrality = nx.degree_centrality(graph)
print(f"\n\nDegree centrality:")
for key, value in dg_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

# Passing to the previously created directed graph is possible to calculate some more properties
in_dg_centrality = nx.in_degree_centrality(diGraph)
print(f"\n\nIn-degree centrality:")
for key, value in in_dg_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

out_dg_centrality = nx.out_degree_centrality(diGraph)
print(f"\n\nOut-degree centrality:")
for key, value in out_dg_centrality.items():
    if value != 0:
        print(f"\t{key}: {value}")

pageRank = nx.pagerank(diGraph)
print(f"\n\nPage Rank:")
for key, value in pageRank.items():
    if value != 0:
        print(f"\t{key}: {value}")

hits = nx.hits(diGraph, max_iter=500)
print("HITS:")
for node, authValue in hits[0].items():
    print(f"\n\t{node}\n\t\tauth = {authValue}\n\t\t hub = {hits[1][node]}")

Betweenness centrality:
	1972411447: 0.0016341296676322958
	93781753: 0.0019537588879844597
	132646210: 0.588940492724657
	3036907250: 0.1474736827095397
	52059998: 0.0016341296676322958
	2176025565: 0.006364343584751821
	944443345: 0.005728823497501905
	904174368: 0.006364343584751825
	220017829: 0.006364343584751825
	190161488: 0.0016341296676322958
	150346051: 0.005093201824560291
	293483941: 0.0016341296676322958
	417189950: 0.005148893014620762
	130667410: 0.006364343584751825
	59913797: 0.006364343584751825
	5869132: 0.006364343584751825
	19659370: 0.446222586885579
	354847710: 0.0016341296676322958
	433226744: 0.006364343584751825
	41667342: 0.0016341296676322958
	15750573: 0.3095135722871332
	333597222: 0.0028767078098715882
	3102522680: 0.0006416297409933419
	301178769: 4.571356126318151e-06
	1174325520294715392: 0.006364343584751825
	7295362: 0.005197973195173693
	1155634482629791746: 0.0004381943520736603
	1148895885729947648: 0.006364343584751825
	384127183: 0.0057377342557

	105925630: 0.37308286767328497
	372830899: 0.37308286767328497
	190778398: 0.37308286767328497
	272623675: 0.37308286767328497
	199298537: 0.37308286767328497
	195658424: 0.37308286767328497
	19204539: 0.37308286767328497
	43652382: 0.37308286767328497
	93494670: 0.38319697154719745
	174072879: 0.37308286767328497
	9571292: 0.37308286767328497
	60832858: 0.37308286767328497
	18843675: 0.37308286767328497
	111373125: 0.37308286767328497
	16948477: 0.37308286767328497
	442245081: 0.37308286767328497
	399089660: 0.37308286767328497
	419071399: 0.37308286767328497
	118263124: 0.38319697154719745
	393652166: 0.38319697154719745
	250059685: 0.37308286767328497
	51215183: 0.37308286767328497
	403731744: 0.37308286767328497
	106418930: 0.37308286767328497
	131498466: 0.37308286767328497
	20167623: 0.37308286767328497
	352053266: 0.38319697154719745
	391247062: 0.37308286767328497
	382393: 0.37308286767328497
	22749856: 0.37308286767328497
	36515907: 0.37308286767328497
	295344317: 0.373082867

	80269062: 0.00031867431485022306
	14103970: 0.00031867431485022306
	994886035086761984: 0.0006373486297004461
	933679231220850688: 0.00031867431485022306
	458399248: 0.0006373486297004461
	77436536: 0.00031867431485022306
	14575354: 0.00031867431485022306
	450842550: 0.0006373486297004461
	920700510306529280: 0.0015933715742511153
	977250301: 0.00031867431485022306
	44860754: 0.00031867431485022306
	64732883: 0.00031867431485022306
	949493208702099456: 0.00031867431485022306
	45827605: 0.00031867431485022306
	941478255789457408: 0.00031867431485022306
	188756842: 0.00031867431485022306
	885944485615738881: 0.0006373486297004461
	934851466082357249: 0.0009560229445506692
	917421341359464450: 0.00031867431485022306
	1851621: 0.0006373486297004461
	911454651417088000: 0.00031867431485022306
	918158086858211329: 0.00031867431485022306
	264103867: 0.00031867431485022306
	923649456981135367: 0.0009560229445506692
	917259707236208640: 0.00031867431485022306
	143187995: 0.00031867431485022306

	514189512: 0.00031867431485022306
	485447824: 0.00031867431485022306
	73705954: 0.003505417463352454
	113339417: 0.00031867431485022306
	492216731: 0.00031867431485022306
	274134534: 0.00031867431485022306
	467852343: 0.00031867431485022306
	289284899: 0.00031867431485022306
	423567471: 0.00031867431485022306
	431192409: 0.00031867431485022306
	69218199: 0.00031867431485022306
	408986233: 0.00031867431485022306
	136088911: 0.00031867431485022306
	95630269: 0.00031867431485022306
	253587073: 0.00031867431485022306
	226298062: 0.00031867431485022306
	16102444: 0.00031867431485022306
	11649702: 0.00031867431485022306
	202740551: 0.00031867431485022306
	65605637: 0.00031867431485022306
	46932973: 0.00031867431485022306
	11069462: 0.00031867431485022306
	6015842: 0.00031867431485022306
	23999540: 0.00031867431485022306
	17278474: 0.00031867431485022306
	209100025: 0.0006373486297004461
	201854523: 0.00031867431485022306
	151208743: 0.00031867431485022306
	191728278: 0.00031867431485022306


	3232640646: 0.00031867431485022306
	1149491203: 0.0006373486297004461
	3775401076: 0.00031867431485022306
	846672306378326016: 0.00031867431485022306
	880523990883848192: 0.00031867431485022306
	3406278903: 0.00031867431485022306
	822431809: 0.00031867431485022306
	446719282: 0.00031867431485022306
	804701761017671680: 0.0009560229445506692
	342578839: 0.00031867431485022306
	4863093417: 0.0009560229445506692
	850170113504325632: 0.00031867431485022306
	4027105402: 0.0006373486297004461
	26240101: 0.0006373486297004461
	801059640951836672: 0.00031867431485022306
	839149376643686401: 0.00031867431485022306
	1929876336: 0.00031867431485022306
	9430302: 0.0006373486297004461
	832267168276475904: 0.00031867431485022306
	833967951447486465: 0.00031867431485022306
	33284619: 0.00031867431485022306
	16851370: 0.00031867431485022306
	821666958680719361: 0.00031867431485022306
	3482836634: 0.00031867431485022306
	16319797: 0.00031867431485022306
	29735775: 0.00031867431485022306
	1072910977: 0

	17839109: 0.00031867431485022306
	1617603914: 0.00031867431485022306
	289222668: 0.00031867431485022306
	125483940: 0.0025493945188017845
	568542838: 0.003505417463352454
	25481069: 0.0009560229445506692
	153506820: 0.00031867431485022306
	217377296: 0.0009560229445506692
	2178067878: 0.00031867431485022306
	58880930: 0.0006373486297004461
	16868154: 0.0012746972594008922
	606265290: 0.0006373486297004461
	36417099: 0.00031867431485022306
	1365711606: 0.0006373486297004461
	1074540169: 0.00031867431485022306
	14049302: 0.00031867431485022306
	17743823: 0.0009560229445506692
	91168371: 0.00031867431485022306
	2475224005: 0.0009560229445506692
	54875928: 0.0009560229445506692
	1339637532: 0.0009560229445506692
	3332231: 0.0006373486297004461
	25134469: 0.0009560229445506692
	19677430: 0.0006373486297004461
	130671142: 0.0012746972594008922
	439572655: 0.00031867431485022306
	547871798: 0.00031867431485022306
	1354848524: 0.00031867431485022306
	203250712: 0.00031867431485022306
	5853906



Page Rank:
	2572511777: 0.00012251516596495393
	14616323: 0.00012251516596495393
	1972411447: 0.00012251516596495393
	369085369: 0.00012251516596495393
	1320653768346816512: 0.00012251516596495393
	1218287414965350401: 0.00012251516596495393
	93781753: 0.0003981458690139903
	700064764: 0.0003258135304692023
	778856886: 0.00044133128392738507
	132646210: 0.11408437335157305
	1064203950717583360: 0.00012251516596495393
	1084900963306098689: 0.00012251516596495393
	1037864733360226304: 0.00012251516596495393
	1035807940719112192: 0.00012251516596495393
	1023655925742268426: 0.00012251516596495393
	1401748656: 0.0003258135304692023
	3036907250: 0.012660779498548318
	959849838212014081: 0.00012251516596495393
	636089336: 0.00012251516596495393
	390380093: 0.00012251516596495393
	844218954088435712: 0.00012251516596495393
	194989299: 0.0003258135304692023
	948483885209477121: 0.00012251516596495393
	898161154169679873: 0.00012251516596495393
	71502653: 0.00012251516596495393
	9187980402820

	114045446: 0.0002380329194231367
	135801478: 0.0002380329194231367
	91367103: 0.0002934600305363596
	10759032: 0.0002934600305363596
	42399260: 0.0002380329194231367
	783214: 0.0002802288448241309
	297091554: 0.0002380329194231367
	125792630: 0.0002380329194231367
	10627282: 0.0002380329194231367
	197313522: 0.0002380329194231367
	633: 0.0002380329194231367
	181720081: 0.0002380329194231367
	9320372: 0.0002380329194231367
	249695469: 0.0002380329194231367
	271909348: 0.0002380329194231367
	23973296: 0.0002380329194231367
	37622738: 0.0002380329194231367
	126718539: 0.0002380329194231367
	11595422: 0.0002380329194231367
	12755532: 0.0002380329194231367
	41259586: 0.0002380329194231367
	47602120: 0.0002934600305363596
	166499569: 0.0002380329194231367
	135047672: 0.0002380329194231367
	8077952: 0.0002380329194231367
	21457289: 0.0003356559559373538
	52869134: 0.0002380329194231367
	112415325: 0.0002380329194231367
	45598113: 0.0002380329194231367
	36659063: 0.0002380329194231367
	139377

HITS:

	2572511777
		auth = 0.0001333528128764136
		 hub = 0.0

	14616323
		auth = 0.000785515535999838
		 hub = 0.0

	1972411447
		auth = 0.0003653453313707706
		 hub = 0.0

	369085369
		auth = 0.0001333528128764136
		 hub = 0.0

	1320653768346816512
		auth = 0.0001333528128764136
		 hub = 0.0

	1218287414965350401
		auth = 0.0001333528128764136
		 hub = 0.0

	93781753
		auth = 0.0011299787923272726
		 hub = 0.0011694717551780764

	700064764
		auth = 0.0001333528128764136
		 hub = 2.314454642696147e-05

	778856886
		auth = 0.000785515535999838
		 hub = 0.0007432496698403093

	132646210
		auth = 0.06967031425543092
		 hub = 0.006849969846635

	1064203950717583360
		auth = 0.0001333528128764136
		 hub = 0.0

	1084900963306098689
		auth = 0.0001333528128764136
		 hub = 0.0

	1037864733360226304
		auth = 0.0001333528128764136
		 hub = 0.0

	1035807940719112192
		auth = 0.0001333528128764136
		 hub = 0.0

	1023655925742268426
		auth = 0.0001333528128764136
		 hub = 0.0

	1401748656
		auth 


	847801154473996288
		auth = 0.0
		 hub = 8.741879879715093e-06

	2314055840
		auth = 0.0
		 hub = 8.741879879715093e-06

	194985826
		auth = 6.869399692184339e-05
		 hub = 8.741879879715093e-06

	596420812
		auth = 0.0
		 hub = 8.741879879715093e-06

	65416867
		auth = 0.0
		 hub = 8.741879879715093e-06

	65973777
		auth = 7.144914955698451e-05
		 hub = 9.557424777427175e-06

	2165499508
		auth = 0.0
		 hub = 9.557424777427175e-06

	1316453529058328576
		auth = 0.0
		 hub = 9.557424777427175e-06

	241204643
		auth = 0.0
		 hub = 9.557424777427175e-06

	57371662
		auth = 0.0
		 hub = 9.557424777427175e-06

	255812611
		auth = 0.0
		 hub = 9.557424777427175e-06

	80061912
		auth = 0.0
		 hub = 9.557424777427175e-06

	1062346772016742403
		auth = 7.144914955698451e-05
		 hub = 9.557424777427175e-06

	980310324
		auth = 7.144914955698451e-05
		 hub = 9.557424777427175e-06

	4889039829
		auth = 6.917141152037639e-05
		 hub = 0.0

	563658239
		auth = 6.917141152037639e-05
		 hub = 0.0

	11

In [2]:
# Point #10 of the assignment: generate damiano10 subgraph
import networkx.readwrite.gpickle as pkl
import networkx as nx
diGraph = pkl.read_gpickle("graph/diGraph_networkx.pkl")
d10_reduced_graph = nx.ego_graph(diGraph, 132646210 , radius=1, center=True)
pkl.write_gpickle(d10_reduced_graph, 'graph/reducedGraph_networkx.pkl')


nt = plotGraph('graph/reducedGraph_networkx.pkl')
# Save and show network
nt.save_graph("graph/reducedGraph.html")

NameError: name 'plotGraph' is not defined

In [19]:
import networkx.algorithms.approximation as alg

max_clique = alg.max_clique(d10_reduced_graph)
print(max_clique)

large_clique_size = alg.large_clique_size(d10_reduced_graph)
print(large_clique_size)

{132646210, 21192987, 19659370, 18932422}
5


In [28]:
# Point #11 of the assignment: generate the minimum edge cover of the graph

min_edge_cover = nx.min_edge_cover(graph)

min_tree_cover = nx.from_edgelist(min_edge_cover)
pkl.write_gpickle(min_tree_cover, 'graph/min_tree_cover.pkl')

def plotGraph(pklPath):
    import networkx.readwrite.gpickle as pkl
    from pyvis.network import Network
    import random
    r = lambda : random.randint(0, 255)

    # Load the diGraph
    twitter = pkl.read_gpickle(pklPath)

    # Setup network
    nt = Network(height="100%", width="100%", bgcolor="#111111", directed=True, font_color="white", heading="Twitter Graph - Final Edition")

    # Model particles physic
    nt.barnes_hut()

    # Convert from NetworkX
    nt.from_nx(twitter)

    # Retrieve nodes weight
    neighbor_map = nt.get_adj_list()

    profs = ["Miccighel_", "mizzaro", "damiano10", "eglu81", "KevinRoitero"]

    colors = {}

    # Building nodes
    for node in nt.nodes:
        #info = "nome utente: " + node['screen_name'] + "<br>" + "id: " + str(node['id'])
        map_length = len(neighbor_map[node["id"]])
        #node['title'] = info
        #node['label'] = node['name']
        color = '#%02X%02X%02X' % (r(),r(),r())
        colors[node["id"]] = color
        node['color'] = color
        node['size'] = map_length


    # Edges color
    for edge in nt.edges:
        color = colors[edge['from']]
        edge['color'] = color

    return nt


nt = plotGraph('graph/min_tree_cover.pkl')
# Save and show network
nt.save_graph("graph/min_tree_cover.html")

In [None]:
# Point #12 of the assignment: calculate omega and sigma of the graph

omega = nx.omega(G, niter=10, nrand=2) 
print(omega)

sigma = nx.sigma(G, niter=10, nrand=2) 
print(sigma)

In [36]:
# Point #13 of the assignment: calculate Pearson and Kendall correlation coefficient of centrality measures
from scipy.stats import pearsonr, kendalltau
measures = {"bt_centrality": bt_centrality, "cl_centrality": cl_centrality, "dg_centrality": dg_centrality,
            "in_dg_centrality": in_dg_centrality, "out_dg_centrality": out_dg_centrality,
            "pageRank": pageRank, "hits":hits}
results = {}
for i in measures.keys():
    for j in measures.keys():
        if i != j:
            results[f"pearson-{i}-{j}"], _ = pearsonr(measures[i], measures[j])
            results[f"kendall-{i}-{j}"], _ = kendalltau(measures[i], measures[j])

IndexError: too many indices for array

In [9]:
# import networkx as nx
#import networkx.readwrite.gpickle as pkl
# # To improve our results we decided to enrich the starting graph
# #betterGraph = buildFromJson('data', 'all_users.json', 'complete_friendships.json')
#betterGraph = pkl.read_gpickle("graph/better_networkx.pkl")
# # nx.write_gpickle(betterGraph, "graph/better_networkx.pkl")
# # nt = plotGraph("graph/better_networkx.pkl")
# # nt.save_graph("graph/betterGraph.html")
#d10_reduced_betterGraph = nx.ego_graph(betterGraph, 132646210 , radius=1, center=True)
#pkl.write_gpickle(d10_reduced_betterGraph, 'graph/reducedBetterGraph_networkx.pkl')
#nt = plotGraph('graph/reducedBetterGraph_networkx.pkl')
# Save and show network
#nt.save_graph("graph/reducedBetterGraph.html")
#

In [4]:
# # Then, we decided to recalculate the previous properties of the graph
# # Is graph connected?
# connected = nx.is_connected(betterGraph.to_undirected())
# if connected:
#     print("The graph IS connected")
# else:
#     print("The graph IS NOT connected")
#
# # Is graph bipartite?
# bipartite = nx.is_bipartite(betterGraph.to_undirected())
# if bipartite:
#     print("The graph IS bipartite")
# else:
#     print("The graph IS NOT bipartite")
#
# undiBetterGraph = betterGraph.to_undirected()
#
# # Center
# center = nx.center(undiBetterGraph)
# string = ""
# string += ("Center of the graph is " + str(center) + " ---> [")
# for id in center:
#     string += (undiBetterGraph.nodes[id]["screen_name"] + ", ")
# string = string[0:-2]
# string += "]"
# print(string)
#
# # Diameter
# diameter = nx.diameter(undiBetterGraph)
# print(f"Diameter = {diameter}")
#
# # Radius
# radius = nx.radius(undiBetterGraph)
# print(f"Radius = {radius}")
#

The graph IS connected
The graph IS NOT bipartite
Center of the graph is [93781753, 132646210, 3036907250, 52059998, 1556357336, 262236415, 813286, 19659370, 44196397, 571202103, 15750573, 15492359] ---> [rmit_csit, damiano10, KevinRoitero, marcopavan83, martysantarossa, Barbe_z, BarackObama, eglu81, elonmusk, Medium, Miccighel_, TEDTalks]
Diameter = 5
Radius = 3


In [None]:
# # It's possible to calculate some centrality values on the undirected graph
# bt_centrality = nx.betweenness_centrality(undiBetterGraph)
# print(f"Betweenness centrality:")
# for key, value in bt_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# cl_centrality = nx.closeness_centrality(undiBetterGraph)
# print(f"\n\nCloseness centrality:")
# for key, value in cl_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# dg_centrality = nx.degree_centrality(undiBetterGraph)
# print(f"\n\nDegree centrality:")
# for key, value in dg_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# # Passing to the previously created directed graph is possible to calculate some more properties
# in_dg_centrality = nx.in_degree_centrality(betterGraph)
# print(f"\n\nIn-degree centrality:")
# for key, value in in_dg_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# out_dg_centrality = nx.out_degree_centrality(betterGraph)
# print(f"\n\nOut-degree centrality:")
# for key, value in out_dg_centrality.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# pageRank = nx.pagerank(betterGraph)
# print(f"\n\nPage Rank:")
# for key, value in pageRank.items():
#     if value != 0:
#         print(f"\t{key}: {value}")
#
# hits = nx.hits(betterGraph, max_iter=500)
# print("HITS:")
# for node, authValue in hits[0].items():
#     print(f"\n\t{node}\n\t\tauth = {authValue}\n\t\t hub = {hits[1][node]}")

