In [2]:
import networkx as nx
import random

In [3]:
nx.__version__

'2.2'

## 生成ER随机图

In [4]:
G = nx.fast_gnp_random_graph(50, 0.6)

In [5]:
G.nodes()

NodeView((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))

In [6]:
nx.is_connected(G)

True

In [7]:
nx.write_adjlist(G, "mygraph.adjlist")

In [8]:
G = nx.read_adjlist("mygraph.adjlist", nodetype=str)

> **Word2Vector** 模型要求顶点为字符串

In [9]:
G.nodes()

NodeView(('0', '3', '4', '5', '6', '7', '10', '11', '12', '13', '14', '16', '17', '18', '21', '22', '24', '25', '27', '29', '31', '33', '34', '37', '38', '39', '41', '45', '46', '48', '1', '2', '19', '28', '32', '40', '43', '44', '49', '9', '15', '20', '26', '30', '35', '36', '47', '23', '42', '8'))

## 图上的随机游走，给定随机游走的长度和起始点
Ref: 
1. [清华大学自然语言处理Lab:OpenNE](https://github.com/thunlp/OpenNE/blob/master/src/openne/walker.py#L11) 
2. [作者源代码库:deepwalk](https://github.com/phanein/deepwalk/blob/master/deepwalk/graph.py#L122)


In [10]:
def random_walk(G, walk_length, start_point):
    walk = [start_point]
    while len(walk) < walk_length:
        cur = walk[-1]
        cur_nbrs = list(G.neighbors(cur))
        if len(cur_nbrs) > 0:
            walk.append(random.choice(cur_nbrs))
        else:
            break
    return walk

In [11]:
random_walk(G, walk_length=10, start_point='0')

['0', '5', '1', '46', '31', '39', '0', '27', '46', '29']

## 建立随机游走词库

In [12]:
def build_deepwalk_corpus(G, num_walks, walk_length):
    # 对图的每个顶点跑一个随机游走。一共对图跑num_walks次。
    walks = []
    nodes = list(G.nodes())
    
    for cnt in range(num_walks):
        random.shuffle(nodes)
        for node in nodes:
            walks.append(random_walk(G, walk_length, start_point=node))
    
    return walks

In [13]:
walk_corpus = build_deepwalk_corpus(G, num_walks=5, walk_length=10)

In [14]:
walk_corpus[:10]

[['33', '39', '40', '4', '45', '40', '23', '8', '21', '31'],
 ['15', '21', '33', '48', '16', '7', '1', '41', '10', '42'],
 ['19', '29', '47', '4', '41', '5', '18', '16', '31', '14'],
 ['17', '24', '0', '27', '29', '25', '33', '0', '27', '45'],
 ['37', '32', '49', '41', '2', '38', '44', '28', '29', '2'],
 ['47', '21', '47', '44', '21', '35', '26', '13', '37', '24'],
 ['36', '3', '47', '9', '21', '29', '24', '15', '47', '49'],
 ['1', '5', '35', '6', '32', '22', '0', '5', '16', '32'],
 ['9', '39', '44', '19', '46', '16', '17', '39', '34', '10'],
 ['11', '37', '7', '12', '36', '2', '48', '43', '36', '13']]

## 将随机游走词库看成自然语言词库代入Skip-Gram模型(Word2Vector)生成嵌入向量

In [20]:
from gensim.models import Word2Vec

In [16]:
vect = Word2Vec(walk_corpus, size=2, sg=1)

In [17]:
vect.wv['14'].tolist()

[0.41066470742225647, 0.3059128522872925]

In [18]:
vectors = {}
for word in G.nodes():
    vectors[word] = vect.wv[word].tolist()

In [19]:
vectors

{'0': [0.1841091513633728, 0.2956933081150055],
 '3': [-0.0765131413936615, 0.2146182507276535],
 '4': [0.1806582808494568, 0.3519364297389984],
 '5': [0.01922767609357834, 0.012294016778469086],
 '6': [-0.09284552931785583, -0.06729608029127121],
 '7': [0.05694431811571121, 0.15262654423713684],
 '10': [0.2879209816455841, -0.02413460612297058],
 '11': [0.037030600011348724, 0.09687264263629913],
 '12': [-0.032191354781389236, -0.009465262293815613],
 '13': [-0.09966184198856354, 0.20574405789375305],
 '14': [0.41066470742225647, 0.3059128522872925],
 '16': [0.07503832876682281, 0.2188539206981659],
 '17': [0.14746424555778503, 0.35743406414985657],
 '18': [0.05164521187543869, 0.2634373605251312],
 '21': [0.22302907705307007, 0.08671806752681732],
 '22': [0.18896447122097015, 0.27202534675598145],
 '24': [0.17848733067512512, 0.13115401566028595],
 '25': [0.27120471000671387, 0.3310478627681732],
 '27': [0.40336599946022034, 0.152100071310997],
 '29': [0.12082358449697495, -0.0878492