In [11]:
import json
import networkx as nx

In [6]:
profile = json.load(open("profile.json"))

In [7]:
papers = json.load(open("papers.json"))

In [76]:
# number of GEAR papers

def make_year_graph(profile, papers, year_0, year_1, full):
    G = nx.Graph()
    
    if full:
        for member in profile['items']:
            G.add_node(member['member_id'])
            
    
    for paper in papers['papers']:
        year = int(paper['date'])
        if year_0<=year and year<=year_1:
            edges = get_pairs(paper['collaborator_ids'])
            for edge in edges:
                G.add_edge(edge[0], edge[1])
    return G

def get_pairs(l):
    res = []
    for a in l:
        for b in l:
            if a<b:
                res.append((a,b))
    return res

### Number of authors who collaborated with gear members

In [47]:
for year_1 in range(2011, 2017):
    g = make_year_graph(profile, papers, 2011, year_1, False)
    print "From 2011 to", year_1, "| Number of authors who collaborated with gear members:", len(g.nodes())

From 2011 to 2011 | Number of authors who collaborated with gear members: 66
From 2011 to 2012 | Number of authors who collaborated with gear members: 106
From 2011 to 2013 | Number of authors who collaborated with gear members: 131
From 2011 to 2014 | Number of authors who collaborated with gear members: 164
From 2011 to 2015 | Number of authors who collaborated with gear members: 206
From 2011 to 2016 | Number of authors who collaborated with gear members: 231


### Mean distance between gear members

mean(dist(u,v)) if there exist a path between u and v

In [77]:
def mean_dist_year(starting_yr, ending_yr):
    g = make_year_graph(profile, papers, starting_yr, ending_yr, False)
    
    nodes = g.nodes()
    pairs = get_pairs(nodes)
    #print pairs
    dist_list = []
    for p in pairs:
        try:
            dist_list.append(nx.shortest_path_length(g, source = p[0], target = p[1]))
        except:
            pass
    return round(float(sum(dist_list))/len(dist_list),2)

In [78]:
for year_1 in range(2011, 2017):
    d = mean_dist_year(2011, year_1)
    print "From 2011 to", year_1, "| Mean distance between gear members", d

From 2011 to 2011 | Mean distance between gear members 1.49
From 2011 to 2012 | Mean distance between gear members 3.41
From 2011 to 2013 | Mean distance between gear members 4.04
From 2011 to 2014 | Mean distance between gear members 5.22
From 2011 to 2015 | Mean distance between gear members 5.32
From 2011 to 2016 | Mean distance between gear members 5.97


### Clustering coefficient

definition:

https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.algorithms.cluster.average_clustering.html#networkx.algorithms.cluster.average_clustering

In [79]:
def clustering_coefficient_year(starting_yr, ending_yr):
    g = make_year_graph(profile, papers, starting_yr, ending_yr, True)
    return nx.average_clustering(g)

In [80]:
for year_1 in range(2011, 2017):
    d = clustering_coefficient_year(2011, year_1)
    print "From 2011 to", year_1, "| Clustering coefficient", d

From 2011 to 2011 | Clustering coefficient 0.0409921671018
From 2011 to 2012 | Clustering coefficient 0.0788263085913
From 2011 to 2013 | Clustering coefficient 0.089553121668
From 2011 to 2014 | Clustering coefficient 0.0961400653307
From 2011 to 2015 | Clustering coefficient 0.107679410813
From 2011 to 2016 | Clustering coefficient 0.144948026257
