In [6]:
import numpy as np
from gensim.models import Word2Vec
import snap
import operator
import snap

In [130]:
def getNumAcquired(sim_rankings, acquired_companies_in_graph_by_id, k):
    top_rankings = sim_rankings[:k]
    result = 0
    
    for tup in top_rankings:
        company_id = int(tup[0])
        if company_id in acquired_companies_in_graph_by_id: result += 1
    
    return result

In [129]:
def getAcquiredToNumAcquired(k, node_to_sorted_L2_similarities, acquired_companies_in_graph_by_id):
    acquired_to_num_acquired = {}

    for company_id in acquired_companies_in_graph_by_id:
        sim_rankings = node_to_sorted_L2_similarities[str(company_id)]
        acquired_to_num_acquired[company_id] = getNumAcquired(sim_rankings, acquired_companies_in_graph_by_id, k)
                                     
    return acquired_to_num_acquired

In [7]:
# Returns a list sorted from the node with the least l2 distance to 
# reference node to the node with the greatest l2 distance
def calculateSortedL2Similarity(reference_node, model):
    reference_embeddings = model[reference_node]
    l2_similarities = {}

    for key in model.vocab:
        if key != reference_node:
            curr_l2 = np.linalg.norm(reference_embeddings - model[key])
            l2_similarities[key] = curr_l2

    sorted_l2_similarities = sorted(l2_similarities.items(), key=operator.itemgetter(1))
    return sorted_l2_similarities

In [8]:
def createSortedSimilaritesMap(model):
    result = {}
    for key in model.vocab:
        result[key] = calculateSortedL2Similarity(key, model)
    return result

In [9]:
# Load the embeddings from memory
BFS_model = Word2Vec.load_word2vec_format("../node2vec_embeddings/company_embeddings_p1_q100.emd")
neutral_model = Word2Vec.load_word2vec_format("../node2vec_embeddings/company_embeddings_p1_q1.emd")
DFS_model = Word2Vec.load_word2vec_format("../node2vec_embeddings/company_embeddings_p1_q0_01.emd")

In [10]:
BFS_nodeToSortedL2Similarities = createSortedSimilaritesMap(BFS_model)
print "Completed BFS model"
neutral_nodeToSortedL2Similarities = createSortedSimilaritesMap(neutral_model)
print "Completed neutral model"
DFS_nodeToSortedL2Similarities = createSortedSimilaritesMap(DFS_model)
print "Completed DFS model"

Completed BFS model
Completed neutral model
Completed DFS model


In [15]:
'''
DO NOT RUN THIS AGAIN!

np.save("../node2vec_embeddings/nodeToSortedL2Similarities_p1_q100", BFS_nodeToSortedL2Similarities)
np.save("../node2vec_embeddings/nodeToSortedL2Similarities_p1_q1", neutral_nodeToSortedL2Similarities)
np.save("../node2vec_embeddings/nodeToSortedL2Similarities_p1_q0_01", DFS_nodeToSortedL2Similarities)
'''

In [35]:
# These are all of the acquired companies we know about
acquired_companies = set(np.load("../acquired_companies.npy"))

4563


In [114]:
# Now we find the overlap between companies in our graph and the set of acquired companies
node_id_to_value = np.load("../../graphs/investors_to_companies_directed/node_id_to_value.npy").item()
FIn = snap.TFIn("../../graphs/investors_to_companies_directed/investors_to_companies_directed_folded_reverse_order.graph")
G = snap.TUNGraph.Load(FIn)

companies_in_graph = set()
for EI in G.Edges(): 
    curr_src_id = EI.GetSrcNId()
    curr_dst_id = EI.GetDstNId()
    companies_in_graph.add(node_id_to_value[curr_src_id])
    companies_in_graph.add(node_id_to_value[curr_dst_id])
    
# These are the acquired companies in our folded graph
acquired_companies_in_graph = companies_in_graph.intersection(acquired_companies)

acquired_companies_in_graph_by_id = []
value_to_node_id = {v: k for k, v in node_id_to_value.iteritems()}
for company in acquired_companies_in_graph:
    acquired_companies_in_graph_by_id.append(value_to_node_id[company])

In [133]:
k = 100
acquired_to_num_acquired_BFS = getAcquiredToNumAcquired(k, BFS_nodeToSortedL2Similarities, acquired_companies_in_graph_by_id)
acquired_to_num_acquired_neutral = getAcquiredToNumAcquired(k, neutral_nodeToSortedL2Similarities, acquired_companies_in_graph_by_id)
acquired_to_num_acquired_DFS = getAcquiredToNumAcquired(k, DFS_nodeToSortedL2Similarities, acquired_companies_in_graph_by_id)

#null_model = getNullModel()

{4098: 16, 3: 19, 2055: 22, 10248: 27, 343: 24, 8208: 15, 8212: 27, 2076: 24, 6055: 15, 30: 17, 6177: 27, 6178: 20, 6181: 19, 6185: 15, 10282: 27, 2093: 19, 10286: 27, 5469: 19, 10288: 19, 8243: 30, 15385: 24, 8247: 15, 4152: 17, 8249: 29, 8252: 21, 2059: 24, 16453: 30, 2120: 17, 3753: 14, 6219: 14, 6220: 18, 6444: 23, 10325: 13, 86: 22, 88: 25, 11620: 24, 12379: 13, 6238: 23, 2665: 14, 10344: 22, 8298: 25, 8299: 23, 8300: 18, 8301: 19, 8303: 20, 16497: 30, 10354: 19, 4212: 26, 2165: 23, 8310: 24, 4215: 15, 8312: 22, 6278: 13, 5141: 17, 4224: 17, 10369: 22, 10370: 18, 134: 11, 10381: 22, 4238: 23, 4239: 27, 11403: 25, 4241: 27, 5661: 21, 8339: 22, 8340: 21, 8341: 29, 4246: 28, 5145: 26, 4251: 29, 157: 19, 159: 20, 160: 23, 161: 21, 12451: 21, 4260: 21, 5830: 24, 2417: 25, 6172: 26, 10410: 24, 4268: 18, 173: 20, 4270: 24, 2223: 27, 2224: 25, 10417: 22, 178: 21, 9587: 16, 4276: 19, 4279: 16, 2232: 13, 4281: 21, 8379: 19, 10428: 19, 7882: 10, 4287: 26, 3670: 23, 6337: 23, 2242: 30, 9112: 