In [1]:
import warnings; warnings.simplefilter('ignore')
import matplotlib.pyplot as plt
%matplotlib inline
import networkx as nx
import pandas as pd
import numpy as np
import infomap
import json
from TweetMiner import TweetMiner

In [2]:
# Note
# In this part we use the pre-collected dataset to generate the 1.5-degree ego graph and 2-degree ego graph

In [3]:
# Inherit from TweetMiner for laod in JSON File

class Graph_Generation(TweetMiner):
    def __init__(self):
        pass
    

In [4]:
GG = Graph_Generation()

In [5]:
# Load in the data

# Relational Dataset
friends_2_id_dict = GG.load_json('Friends_2_id_dict.json')

# Attribute Dataset
WQ = GG.load_json('Weight_quantity.json')

# Screen Dict
screen_name_dict = GG.load_json('Friends_1_screen_name.json')

In [6]:
# Initialize Graph 1.5 ego graph

G_1_5 = nx.DiGraph()
G_2 = nx.DiGraph()


In [7]:
# Get the first-layer friend

friend_1_list = []
for friend_1 in friends_2_id_dict:
    friend_1_list.append(int(friend_1))
friend_1_set = set(friend_1_list)

In [8]:
# Add root user

friend_1_set.add(2775998016)
screen_name_dict['2775998016'] = 'HuXijin_GT'

In [9]:
# Connect root user to 1st-layer friend

for friend_1 in friend_1_list:
    G_1_5.add_edge('HuXijin_GT', screen_name_dict[str(friend_1)])
    G_2.add_edge('2775998016', str(friend_1))

In [10]:
# Connect 1st-layer friend to their friend

for friend_1, friend_2_list in friends_2_id_dict.items():
    for friend_2 in friend_2_list:
        G_2.add_edge(friend_1, str(friend_2))
        if friend_2 in friend_1_set:
            G_1_5.add_edge(screen_name_dict[friend_1], screen_name_dict[str(friend_2)])

In [11]:
# The basic properties of the two graphs

Nodes_list_1_5 = list(G_1_5.nodes)
Edges_list_1_5 = list(G_1_5.edges)

Nodes_list_2 = list(G_2.nodes)
Edges_list_2 = list(G_2.edges)

print("Nodes_1.5: ",Nodes_list_1_5[0:5])
print("Nodes_2: ",Nodes_list_2[0:5])
print("")

print("Nodes_1.5 Number: ", len(Nodes_list_1_5))
print("Edges_1.5 Number: ", len(Edges_list_1_5))
print("")

print("Nodes_2 Number: ", len(Nodes_list_2))
print("Edges_2 Number: ", len(Edges_list_2))

Nodes_1.5:  ['HuXijin_GT', 'KellyCNBC', 'howardfineman', 'suhasinih', 'Tom_Winter']
Nodes_2:  ['2775998016', '1164674447506386944', '14962779', '25966181', '27101698']

Nodes_1.5 Number:  492
Edges_1.5 Number:  28148

Nodes_2 Number:  233083
Edges_2 Number:  641365


In [12]:
# ********** Add nodes weight **********

W_node = 1
for node in Nodes_list_1_5:
    G_1_5.nodes[node]['weight'] = WQ[node]['status_number'] * W_node

In [13]:
# ********** Add edges weight **********
 
# Edege_weight_ab = ((w_1 * RT_ab) + (w_2 * QT_ab) + (w_3 * RP_ab)) 
#                   * (1 + w_4 * Rff_a_factor) 
#                   * (1 + w_5 * (FL_a - FL_b) / (FL_a + FL_b))
# RT_ab = number of A Retweets B
# QT_ab = numer of A Quoted B
# RP_ab = numer of A Replies to B
# Rff_a = Follower to Following Ration of a
# FL_a = Follower Count of a
# FL_b = Follower Count of b

# 0-4000
w_1 = 1
w_2 = 2
w_3 = 4

w_4 = 1
w_5 = 0.4

# Initialize edegs weight
for edge in Edges_list_1_5:
    G_1_5.edges[edge]['weight'] = 1

for node in Nodes_list_1_5:
    Suc_set = set(G_1_5.successors(node))
    Suc_list = list(G_1_5.successors(node))
    FL_a = WQ[node]['followers_number']
    FR_a = WQ[node]['freinds_number']
    Rff_a = FL_a / FR_a
    
    # w_1 * RT_ab
    for rt_node in WQ[node]['retweet']:
        if rt_node in Suc_set:
            G_1_5.edges[(node, rt_node)]['weight'] += w_1

    # w_2 * QT_ab
    for qt_node in WQ[node]['quoted']:
        if qt_node in Suc_set:
            G_1_5.edges[(node, qt_node)]['weight'] += w_2
            
    # w_3 * RP_ab
    for rp_node in WQ[node]['reply']:
        if rp_node in Suc_set:
            G_1_5.edges[(node, rp_node)]['weight'] += w_3
    
    # (1 + w_4 * Rff_a_factor)
    for friend in Suc_list:
        
        if Rff_a < 1:
            Rff_a_factor = 0
            
        elif Rff_a < 10:
            Rff_a_factor = 0.1
            
        elif Rff_a < 100:
            Rff_a_factor = 0.2
            
        elif Rff_a <1000:
            Rff_a_factor = 0.4
            
        elif Rff_a < 10000:
            Rff_a_factor = 0.8
            
        elif Rff_a < 100000:
            Rff_a_factor = 1.6
            
        else:
            Rff_a_factor = 3.2
            
        G_1_5.edges[(node, friend)]['weight'] *= (1 + w_4 * Rff_a_factor) 
    
    # (1 + w_5 * (FL_a - FL_b) / (FL_a + FL_b))
    for friend in Suc_list:
        FL_b = WQ[friend]['followers_number']
        G_1_5.edges[(node, friend)]['weight'] *=  (1 + w_5 *(FL_a - FL_b) / (FL_a + FL_b))

In [14]:
# ********** Check the Results **********
# Check the value of the edges weitht

weight_number = []
for edge in G_1_5.edges:
        weight_number.append(round(G_1_5.edges[edge]['weight'],2))
weight_number = list(set(weight_number))
weight_number.sort()
print(weight_number)

[0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0, 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1, 1.11, 1.12, 1.13, 1.14, 1.15, 1.16, 1.17, 1.18, 1.19, 1.2, 1.21, 1.22, 1.23, 1.24, 1.25, 1.26, 1.27, 1.28, 1.29, 1.3, 1.31, 1.32, 1.33, 1.34, 1.35, 1.36, 1.37, 1.38, 1.39, 1.4, 1.41, 1.42, 1.43, 1.44, 1.45, 1.46, 1.47, 1.48, 1.49, 1.5, 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59, 1.6, 1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69, 1.7, 1.71, 1.72, 1.73, 1.74, 1.75, 1.76, 1.77, 1.78, 1.79, 1.8, 1.81, 1.82, 1.83, 1.84, 1.85, 1.86, 1.87, 1.88, 1.89, 1.9, 1.91, 1.92, 1.93, 1.94, 1.95, 1.96, 1.97, 1.98, 1.99, 2.0, 2.01, 2.02, 2.03, 2.04, 2.05, 2.06, 2.07, 2.08, 2.09, 2.1, 2.11, 2.12, 2.13, 2.14, 2.15, 2.16, 2.17, 2.18, 2.19, 2.2, 2.21, 2.22, 2.23, 2.24, 2.25, 2.26, 2.27, 2.28, 2.

In [15]:
# Save the results as .gexf format which keep all the detail

nx.write_gexf(G_1_5, "Ego_1.5_Graph_HuXijin_GT.gexf")
nx.write_gexf(G_2, "Ego_2_Graph_HuXijin_GT.gexf")