In [1]:
import networkx as nx
import random

In [2]:
nx.__version__

'2.2'

## 生成ER随机图

In [3]:
G = nx.fast_gnp_random_graph(50, 0.6)

In [4]:
G.nodes()

NodeView((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))

In [5]:
nx.is_connected(G)

True

In [6]:
nx.write_adjlist(G, "mygraph.adjlist")

In [8]:
G = nx.read_adjlist("mygraph.adjlist", nodetype=str)

> **Word2Vector** 模型要求顶点为字符串

In [9]:
G.nodes()

NodeView(('0', '2', '3', '5', '7', '9', '11', '13', '17', '19', '20', '21', '22', '23', '26', '27', '28', '29', '30', '34', '35', '37', '38', '39', '42', '43', '44', '46', '49', '1', '4', '6', '14', '15', '16', '32', '45', '48', '8', '12', '18', '25', '33', '41', '47', '10', '24', '36', '31', '40'))

## 图上的随机游走，给定随机游走的长度和起始点
Ref: 
1. [清华大学自然语言处理Lab:OpenNE](https://github.com/thunlp/OpenNE/blob/master/src/openne/walker.py#L11) 
2. [作者源代码库:deepwalk](https://github.com/phanein/deepwalk/blob/master/deepwalk/graph.py#L122)


In [10]:
def random_walk(G, walk_length, start_point):
    walk = [start_point]
    while len(walk) < walk_length:
        cur = walk[-1]
        cur_nbrs = list(G.neighbors(cur))
        if len(cur_nbrs) > 0:
            walk.append(random.choice(cur_nbrs))
        else:
            break
    return walk

In [12]:
random_walk(G, walk_length=10, start_point='0')

['0', '26', '20', '26', '9', '16', '6', '21', '34', '32']

## 建立随机游走词库

In [13]:
def build_deepwalk_corpus(G, num_walks, walk_length):
    # 对图的每个顶点跑一个随机游走。一共对图跑num_walks次。
    walks = []
    nodes = list(G.nodes())
    
    for cnt in range(num_walks):
        random.shuffle(nodes)
        for node in nodes:
            walks.append(random_walk(G, walk_length, start_point=node))
    
    return walks

In [26]:
walk_corpus = build_deepwalk_corpus(G, num_walks=5, walk_length=10)

In [27]:
walk_corpus

[['37', '42', '31', '12', '9', '19', '24', '10', '9', '19'],
 ['32', '27', '18', '19', '3', '46', '7', '13', '29', '19'],
 ['6', '1', '37', '17', '33', '3', '19', '9', '29', '5'],
 ['27', '13', '37', '25', '35', '16', '6', '4', '2', '19'],
 ['19', '38', '5', '34', '21', '36', '21', '1', '35', '27'],
 ['49', '23', '49', '0', '28', '48', '4', '12', '40', '12'],
 ['45', '28', '0', '28', '6', '28', '12', '2', '7', '49'],
 ['41', '13', '14', '32', '3', '13', '25', '13', '26', '43'],
 ['33', '4', '47', '32', '30', '43', '23', '8', '44', '38'],
 ['8', '36', '8', '17', '48', '24', '6', '30', '25', '10'],
 ['34', '36', '10', '30', '49', '16', '45', '44', '18', '15'],
 ['48', '23', '11', '33', '42', '25', '6', '32', '4', '29'],
 ['46', '40', '19', '2', '41', '2', '19', '4', '45', '19'],
 ['23', '8', '17', '4', '11', '34', '22', '9', '45', '21'],
 ['15', '49', '8', '43', '47', '20', '42', '16', '25', '2'],
 ['10', '36', '5', '40', '13', '0', '38', '29', '25', '43'],
 ['17', '22', '28', '12', '47'

## 将随机游走词库看成自然语言词库代入Skip-Gram模型（Word2Vector）生成嵌入向量

In [17]:
from gensim.models import Word2Vec

In [28]:
vect = Word2Vec(walk_corpus, size=2, sg=1)

In [34]:
vect.wv['14'].tolist()

[0.1581534743309021, -0.4580100178718567]

In [36]:
vectors = {}
for word in G.nodes():
    vectors[word] = vect.wv[word].tolist()

In [37]:
vectors

{'0': [-0.17068098485469818, -0.6104867458343506],
 '2': [0.2536482512950897, -0.46589457988739014],
 '3': [0.16776493191719055, -0.4422425627708435],
 '5': [0.14379219710826874, -0.26075980067253113],
 '7': [-0.05263641104102135, -0.10770130902528763],
 '9': [0.08839749544858932, -0.5125434994697571],
 '11': [0.11371975392103195, -0.24136832356452942],
 '13': [-0.16243813931941986, -0.4793795049190521],
 '17': [-0.17913202941417694, -0.4007255434989929],
 '19': [0.10413634777069092, -0.4970078766345978],
 '20': [-0.03200548142194748, -0.12307611852884293],
 '21': [0.11845149099826813, -0.4528867304325104],
 '22': [-0.17508842051029205, -0.3172272741794586],
 '23': [-0.02030903473496437, -0.5033435225486755],
 '26': [-0.12583209574222565, -0.17159435153007507],
 '27': [0.14024657011032104, -0.3057872951030731],
 '28': [-0.08604558557271957, -0.4803437888622284],
 '29': [0.21540670096874237, -0.4638909101486206],
 '30': [0.12585826218128204, -0.14997683465480804],
 '34': [0.042796734720