In [2]:
import csv
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Data Import

In [3]:
df = pd.read_csv('name_name.csv', dtype={"reps": np.int64, "comm" : str, "auth" : str})


In [4]:
df.head()

Unnamed: 0,reps,auth,comm
0,26,Maria Shaldibina,Stev Witzel
1,17,Eduardo Valentin,Daniel
2,24,Eric W. Biederman,geiti94
3,519,Paul Jakma,Paul Jakma
4,122,Laxman Dewangan,Stephen Warren


## Data Cleaning

In [5]:
df.isnull().values.sum()

345

In [6]:
nan_rows = df[df.isnull().any(1)]
nan_rows

Unnamed: 0,reps,auth,comm
2295,225,,root
5867,24,,mark
12673,18,,舒志凌
13649,55,,Jeff Garzik
18747,21,John Axel Eriksson,
23358,21,Fish,
25165,22,,黄志伟
27935,477,,git-darcs-import
29367,18,Lars van de Kerkhof,
29690,27,,Ease


In [7]:
df = df.dropna()
df.isnull().values.sum()

0

## Sample Graph

In [9]:
dfn = df.sample(1000)
del df
df = dfn

In [10]:
graph = nx.from_pandas_edgelist(df, source = 'comm', target = 'auth', edge_attr = 'reps',create_using = nx.DiGraph())

In [8]:
#del dfn

In [11]:
number_of_nodes = nx.number_of_nodes(graph)

In [12]:
print("Eigenvector centrality")
pg = nx.pagerank(graph, alpha=0.85, personalization=None, max_iter=100, tol=1e-06, nstart=None, weight='reps', dangling=None)

print("HITS")
hub, auths = nx.hits(graph, max_iter=100, tol=1e-08, nstart=None, normalized=True)

Eigenvector centrality
HITS


In [13]:
x = 5
from heapq import nlargest
spg = nlargest(x, pg, key=pg.get)
shubs = nlargest(x, hub, key=hub.get)
sauths = nlargest(x, auths, key=auths.get)

In [14]:
print("Eigen", spg)
print("Hubs", shubs)
print("Authorities", sauths)

Eigen ['mexisme', 'Justin Reagor', 'Kris Nuttycombe', 'DFW', 'Timbertoes']
Hubs ['GitHub', 'Weblate', 'Greg Kroah-Hartman', 'Quyen', 'Ricardo Cerqueira']
Authorities ['Steven Haggerty', 'Kaszás Dániel', 'Jesse Harlin', 'luoqiaoen', 'kanishk2509']


## Distance

In [15]:
len(graph.nodes())

1640

In [16]:
len(graph.edges())


1000

In [50]:
[n for n in nx.neighbors(graph, "curiousNoob")]

[]

In [41]:
def check_neighbourhood(graph, node, radius):
    level = [n for n in nx.neighbors(graph, node)]
    if radius == 1:
        return level
    total_list = []
    total_list.extend(level)
    next_level = []
    radius = radius - 1;
    for i in range(radius):
        for item in level:
            m = [n for n in nx.neighbors(graph, item)]
            next_level.extend(m)
        del level[:]
        level.extend(next_level)
        total_list.extend(next_level)
        del next_level[:]
    return list(set(total_list))


In [42]:
check_neighbourhood(graph, "Quyen", 1)

['Tony Prisk', 'flar2', 'Wei Yongjun']

In [43]:
[n for n in graph.neighbors("Tony Prisk")]
[n for n in graph.neighbors("flar2")]
[n for n in graph.neighbors("Wei Yongjun")]

[]

In [51]:
check_neighbourhood(graph, "curiousNoob", 3)

[]

In [45]:
gu = graph.to_undirected()
check_neighbourhood(gu, "Quyen", 1)

['Tony Prisk', 'flar2', 'Wei Yongjun']

In [52]:
check_neighbourhood(gu, "curiousNoob", 3)

['itsyouranmol',
 'pengelsman7',
 'Steve Crabb',
 'Catherine-Lawrey',
 'George Ornbo',
 'Adomas Ven',
 'dreampet',
 'Aleks C. Barragan',
 'Pavel Feldman',
 'Andy Pieters',
 'AnamikaD',
 'Shawn Hartsell',
 'Giacomo Tanganelli',
 'JacobGunther12',
 'Trevor Watson',
 'Steven Haggerty',
 'Brent Yorgey',
 'Kaszás Dániel',
 'liuyuan',
 'chester_lee',
 'geeksctrl',
 'Jesse Harlin',
 'Archie',
 'jibux',
 'ErikSkoda',
 'ShotaOd',
 'lydiasbitya',
 'Dmitriy Filyushin',
 'Jérémie Parisel',
 'Antonio Mora',
 'Mike Kittridge',
 'CaioHAndradeLima',
 'mdzio',
 'XBPIG',
 'jshcs',
 'omniprog',
 'Beer-BearDevTech',
 'luoqiaoen',
 'Matthias Müller',
 'Brad Simantel',
 'kanishk2509',
 'Jiongyan Zhang',
 'aorek',
 'Aliona',
 'Robert\xa0Hunter',
 'tangwei12',
 'callum-improbable',
 '10448129',
 'Ted B',
 'Bence Molnár',
 'varkor',
 'Stephen J Newhouse',
 'SharpeRAD',
 'SirGrizzlyBear',
 'wyczisk',
 'zengqingyang1306410141',
 'Griffin1110',
 'Holanda Junior',
 'Boris Egorov',
 'dongfix',
 'sn0wday',
 'Jianjia

## Score neighbours

In [None]:
def score_neighbours(graph, node, radius):
    neighbours = check_neighbourhood(graph, node, radius)
    for item in neigbours