In [1]:
import numpy as np
import snap
from tqdm import tqdm

We begin by loading in 4 folded graphs:

(1) A graph of companies where in edges exist for common investors
(2) A graph of investors where in edges exist for common companies/investments
(3) A graph of companies where in edges exist for common region
(4) A graph of companies where in edges exist for common industry

Since a company can only belong to one region and one industry, we recognize that (3) and (4) just consist of giant cliques

In [2]:
graph_name = "../../graphs/investors_to_companies_directed/investors_to_companies_directed_folded.graph"
FIn = snap.TFIn(graph_name)
companies_folded_by_investors = snap.TUNGraph.Load(FIn)

# Sanity check
print "Nodes: " + str(companies_folded_by_investors.GetNodes())
print "Edges: " + str(companies_folded_by_investors.GetEdges())

Nodes: 11572
Edges: 768063


In [3]:
graph_name = "../../graphs/investors_to_companies_directed/investors_to_companies_directed_folded_reverse_order.graph"
FIn = snap.TFIn(graph_name)
investors_folded_by_companies = snap.TUNGraph.Load(FIn)

# Sanity check
print "Nodes: " + str(investors_folded_by_companies.GetNodes())
print "Edges: " + str(investors_folded_by_companies.GetEdges())

Nodes: 10465
Edges: 33053


In [4]:
graph_name = "../../graphs/region_to_company_directed/region_to_company_directed_folded.graph"
FIn = snap.TFIn(graph_name)
companies_folded_by_region = snap.TUNGraph.Load(FIn)

# Sanity check
print "Nodes: " + str(companies_folded_by_region.GetNodes())
print "Edges: " + str(companies_folded_by_region.GetEdges())

Nodes: 11573
Edges: 9061049


In [5]:
graph_name = "../../graphs/categories_to_companies_directed/categories_to_companies_directed_folded.graph"
FIn = snap.TFIn(graph_name)
companies_folded_by_industry = snap.TUNGraph.Load(FIn)

# Sanity check
print "Nodes: " + str(companies_folded_by_industry.GetNodes())
print "Edges: " + str(companies_folded_by_industry.GetEdges())

Nodes: 11318
Edges: 4329444


We now address the clique issue mentioned above by creating the following graph:

(1) A graph of companies wherein there's an edge between two companies if they are in the same region or industry
(2) A graph of companies wherein there's an edge between two companies if they share an investor or are in the same region
(3) A graph of companies wherein there's an edge between two companies if they share an investor or are in the same industry
(4) A graph of companies wherein there's an edge between two companies if they are in the same region or industry or share a common investor

In [10]:
# Graph of companies wherein there's an edge between two companies if they are in the same region or industry 
common_region_or_industry = snap.TUNGraph.New()

for NI in companies_folded_by_region.Nodes():
    common_region_or_industry.AddNode(NI.GetId())

for NI in companies_folded_by_industry.Nodes():
    if not common_region_or_industry.IsNode(NI.GetId()):
        common_region_or_industry.AddNode(NI.GetId())

for EI in companies_folded_by_region.Edges():
    src = EI.GetSrcNId()
    dest = EI.GetDstNId()
    
    if not common_region_or_industry.IsNode(src):
        common_region_or_industry.AddNode(src)
        
    if not common_region_or_industry.IsNode(dest):
        common_region_or_industry.AddNode(dest)
        
    if not common_region_or_industry.IsEdge(src, dest):
        common_region_or_industry.AddEdge(src, dest)
        
for EI in companies_folded_by_industry.Edges():
    src = EI.GetSrcNId()
    dest = EI.GetDstNId()
    
    if not common_region_or_industry.IsNode(src):
        common_region_or_industry.AddNode(src)
        
    if not common_region_or_industry.IsNode(dest):
        common_region_or_industry.AddNode(dest)
        
    if not common_region_or_industry.IsEdge(src, dest):
        common_region_or_industry.AddEdge(src, dest)
            
# Sanity check
print "Nodes: " + str(common_region_or_industry.GetNodes())
print "Edges: " + str(common_region_or_industry.GetEdges())

herre
Nodes: 12080
Edges: 12858171


In [None]:
# Graph of companies wherein there's an edge between two companies if they are in the same region or have a common investor 
common_region_or_investor = snap.TUNGraph.New()

for NI in companies_folded_by_region.Nodes():
    common_region_or_industry.AddNode(NI.GetId())

for NI in companies_folded_by_industry.Nodes():
    if not common_region_or_industry.IsNode(NI.GetId()):
        common_region_or_industry.AddNode(NI.GetId())

for EI in companies_folded_by_region.Edges():
    src = EI.GetSrcNId()
    dest = EI.GetDstNId()
    
    if not common_region_or_industry.IsNode(src):
        common_region_or_industry.AddNode(src)
        
    if not common_region_or_industry.IsNode(dest):
        common_region_or_industry.AddNode(dest)
        
    if not common_region_or_industry.IsEdge(src, dest):
        common_region_or_industry.AddEdge(src, dest)
        
print "herre"
        
for EI in companies_folded_by_industry.Edges():
    src = EI.GetSrcNId()
    dest = EI.GetDstNId()
    
    if not common_region_or_industry.IsNode(src):
        common_region_or_industry.AddNode(src)
        
    if not common_region_or_industry.IsNode(dest):
        common_region_or_industry.AddNode(dest)
        
    if not common_region_or_industry.IsEdge(src, dest):
        common_region_or_industry.AddEdge(src, dest)
            
# Sanity check
print "Nodes: " + str(common_region_or_industry.GetNodes())
print "Edges: " + str(common_region_or_industry.GetEdges())

In [None]:
common_region_or_industry = snap.TUNGraph.New()

In [None]:
common_region_or_industry = snap.TUNGraph.New()