In [125]:
import pandas as pd
import networkx as nx
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize

In [91]:
tweets = pd.read_csv("tweets_new.csv")
followers = pd.read_csv('followers.csv', index_col = 0)
linked = pd.read_csv('linked.csv', index_col = 0)
retweets = pd.read_csv('retweets.csv', index_col = 0)

In [92]:
# add nodes to network
G = nx.Graph()
for i in xrange(len(tweets)):
    G.add_edge(tweets.iloc[i]['Column1'], tweets.iloc[i]['Column2'])

In [93]:
#metrics calculation
nodes = G.nodes()
degress = G.degree()
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)

In [126]:
# create DataFrame
nodes_metrics = pd.DataFrame({'node':nodes, 
                              'degree':degress.values(), 
                              'closeness':closeness.values(), 
                              'betweenness':betweenness.values()})
nodes_metrics = \
nodes_metrics.merge(followers, left_on='node', right_on='index', how='left')\
.merge(linked, left_on='node', right_on='index', how='left')\
.merge(retweets, left_on='node', right_on='index', how='left')\
.drop(['index_x', 'index_y', 'index'], axis = 1)\
.drop_duplicates('node')

nodes_metrics.fillna(0, inplace = True)

In [127]:
# Normalization and Standardization
scaler = StandardScaler()
scaled_metrics = pd.DataFrame(normalize(scaler.fit_transform(nodes_metrics.drop('node', axis=1))),
                              index = nodes_metrics.node, 
                              columns=['betweenness', 'closeness','degree', 'followers', 'listed_count', 'retweets']).reset_index()

From Part I, we know that the weights are [ 0.22816118  0.2526551   0.23967392  0.27950979] for retweets, listed_count, follower_count and the sume of betweeness, closeness and degree.

In [129]:
scaled_metrics['influence_score'] = 0.22816118*scaled_metrics['retweets'] + 0.2526551*scaled_metrics['listed_count']+\
0.23967392*scaled_metrics['followers']+0.27950979*(scaled_metrics['betweenness']+scaled_metrics['degree']+\
                                                   scaled_metrics['closeness'])

These are the top 50 influencers:

In [132]:
scaled_metrics.sort_values('influence_score', ascending = False)[['node', 'influence_score']][:50]

Unnamed: 0,node,influence_score
1140,Annan26,0.482334
1056,membranesoundin,0.482193
1580,indiumpick,0.477887
482,evelinadinolfo1,0.477318
2923,Natgeo127Ctk,0.465243
2852,toisfigexic1977,0.451346
687,TheJasonJenkins,0.450878
2704,BubbaOller,0.44936
993,_Alex_Il,0.447502
504,paynmaxx411,0.446088
