In [163]:
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, OrderedDict
import operator
import pandas as pd
import math

In [164]:
g = nx.read_edgelist("max_wcc_graph.edgelist", 
                     delimiter="\t", 
                     create_using  = nx.DiGraph(), 
                     data=True)
temp = map(lambda (x, y): (x, float(y)), nx.get_edge_attributes(g, "weight").items())
nx.set_edge_attributes(g, "weight", dict(temp))


In [165]:
# Check for bipartiteness
num_nodes = len(g.nodes())
no_in_degree = len(filter(lambda x: x != 0, g.in_degree().values()))
no_out_degree = len(filter(lambda x: x != 0, g.out_degree().values()))
num_nodes - no_in_degree - no_out_degree

0

0

In [166]:
sns.set_style("white")

In [167]:
in_counts = Counter(filter(lambda x: x != 0, g.in_degree().values())).items()
out_counts = Counter(filter(lambda x: x != 0, g.out_degree().values())).items()
in_degree_hist = zip(*sorted(in_counts, key=operator.itemgetter(0)))
out_degree_hist = zip(*sorted(out_counts, key=operator.itemgetter(0)))

In [168]:
plt.figure()
plt.plot(in_degree_hist[0], in_degree_hist[1], label="In Degree")
plt.plot(out_degree_hist[0], out_degree_hist[1], label="Out Degree")
plt.xlabel('Degree')
plt.ylabel('Number of Companies')
plt.title("Degree Graph: Number of Incoming/Outgoing Investments, 2005-2015")
plt.legend()
plt.savefig("../../graphs/fig1-DegreeMaxWcc.png",  bbox_inches="tight")

In [169]:
plt.figure()
plt.loglog(in_degree_hist[0], in_degree_hist[1], label="Log In Degree")
plt.loglog(out_degree_hist[0], out_degree_hist[1], label="Log Out Degree")
plt.xlabel('Log Degree')
plt.ylabel('Log Number of Companies')
plt.title("Degree Graph: Number of Incoming/Outgoing Investments, 2005-2015 (log-log scale)")
plt.legend()
plt.savefig("../../graphs/fig2-DegreeMaxWccLogLog.png", bbox_inches="tight")

In [170]:
investors = map(lambda x: x[0],
                filter(lambda x: x[1] != 0, 
                       g.out_degree().items()))

startups = map(lambda x: x[0],
               filter(lambda x: x[1] != 0, 
                      g.in_degree().items()))

funds_raised = filter(lambda x: x!=0,
                      g.in_degree(startups, weight='weight').values())
log_funds_raised = map(lambda x: math.log(x), funds_raised)

funds_invested = filter(lambda x: x!=0,
                      g.out_degree(investors, weight='weight').values())
log_funds_invested = map(lambda x: math.log(x), funds_invested)

In [171]:
plt.figure()
sns.distplot(funds_raised, hist=False, rug=False, label="In-Weight (Funds Raised)");
sns.distplot(funds_invested, hist=False, rug=False, label="Out-Weight (Funds Invested)");
plt.xlabel("Funding, USD")
plt.ylabel("Density")
plt.title("Kernel Density Estimate of In-Weight / Out-Weight (Funds Raised/Invested)")
plt.savefig("../../graphs/fig3-DistFunds.png", bbox_inches="tight")

In [172]:
plt.figure()
sns.distplot(log_funds_raised, hist=False, rug=False, label = "Log In-Weight (Funds Raised)");
sns.distplot(log_funds_invested, hist=False, rug=False, label = "Log Out-Weight (Funds Invested)");
plt.xlabel("Log Funding, USD")
plt.ylabel("Density")
plt.title("Kernel Density Estimate of Log Funds Raised (Log Edge Weight Distribution)")
plt.savefig("../../graphs/fig4-DistFundsLog.png", bbox_inches="tight")

491335428.4384941

491335428.4384941