# pytorch实现NS的Word2Vec

In [1]:
import torch
import torch.nn.functional as F
import torch.utils.data as DataUtil
import collections
from collections import Counter
import numpy as np
import random
import math
import time

# 文件读取与预处理
参考：

[word2vec CPP代码](https://github.com/tmikolov/word2vec/blob/master/word2vec.c)

[wor2vec.ipynb](https://localhost:8888/notebooks/mm_note.git/0010-%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/%E4%B8%9A%E5%8A%A1%E7%AE%97%E6%B3%95/word2vec.ipynb)

## 读取并生成词表

In [2]:
# 文件已经是切词过的，读取数据,去掉\ufeff字节顺序
# 按行读取
data_path = "../../../learnMLData/三国演义_cut.txt"
text_toks = []
with open(data_path, encoding='UTF-8-sig') as fp:
    for line in fp.readlines():
        toks = line.strip().lower().split()
        text_toks.append(toks)

# 过滤低频词建立索引
以下部分可以套用到其他应用：如商品推荐

## 低频词过滤

In [3]:
# 过滤额掉低频词
mini_filter = 5
vocab_dict =  dict(filter(lambda kv:kv[1] > mini_filter, Counter([tok for toks in text_toks for tok in toks]).most_common()))

## 建立索引

In [4]:
idx2word = list(vocab_dict.keys())
word2idx = {word: idx for idx, word in enumerate(idx2word)}

In [5]:
# 词按索引
text_toks2idx_data = [
    [word2idx[tok] for tok in raw_toks if tok in vocab_dict] for raw_toks in text_toks
]

In [6]:
text_toks2idx_data[0:3]

[[],
 [278, 2333, 1976, 1703, 318, 2621, 75, 922, 432, 3310, 1704],
 [1506, 1292, 432, 3026]]

# 下采样
解决稀有词和常见词之间的不平衡问题，使用了一种简单的下采样方法：训练集中的每个词$w_i$被丢弃，其概率由公式计算,实际计算做了修正,$f(w_i)$为词的频率,t为阈值，通常是$1e-5$

$$
P(w_i) = 1 - \sqrt{\frac{t}{f(w_i)}}
$$实际计算做了修正
c代码

```c
real ran = (sqrt(vocab[word].cn / (sample * train_words)) + 1) * (sample * train_words) / vocab[word].cn;
next_random = next_random * (unsigned long long)25214903917 + 11;
if (ran < (next_random & 0xFFFF) / (real)65536) continue;

```



In [7]:
train_words = sum(len(raw_toks) for raw_toks in text_toks)

In [8]:
def sub_sample(idx):
    word_freq = vocab_dict[idx2word[idx]] / train_words
    ran = 1 - math.sqrt(1e-5 / word_freq)
    #print(ran, random.uniform(0, 1), vocab_dict[idx2word[idx]])
    return random.uniform(0, 1) < ran

对数据集下采样

In [9]:
subsampled_text_toks_idx = [[tok for tok in raw_data if not sub_sample(tok)] for raw_data in text_toks2idx_data] 

In [10]:
text_toks2idx_data[0:2], subsampled_text_toks_idx[0:2]

([[], [278, 2333, 1976, 1703, 318, 2621, 75, 922, 432, 3310, 1704]],
 [[], [3310, 1704]])

# 实现Dataset
按顺序读取token，并返回这个中心词token与相关的上下文词。

## 中心词与context词抽取

In [11]:
# 按行生成上下文
def get_context_list(raw_data, window):
    # 此时，raw_data 长度最少为2
    context_list = []
    for i in range(len(raw_data)):
        start_i = max(0, i - window)
        end_i = min(i + 1 + window, len(raw_data))
        index_list = list(range(start_i, end_i))
        index_list.remove(i)
        context_list.append([raw_data[j] for j in index_list])
    # print("context_list=", context_list)
    return context_list

# 生成一一映射的数据(中心词， [上下文])
def gen_cc_data(dataset, window):
    center_list, context_list = [], []
    for raw_data in dataset:
        if len(raw_data) < 2:
            continue
        center_list += raw_data
        context_list.extend(get_context_list(raw_data, window))
    return center_list, context_list

In [12]:
#tiny_dataset = [list(range(7)), list(range(7, 10))]
#tiny_dataset
#gen_cc_data(tiny_dataset, 2)

In [13]:
g_window = 5

In [14]:
center_list, context_list = gen_cc_data(subsampled_text_toks_idx, g_window)

## Negative sample：必须采集K个
词频的0.75次方占比

In [15]:
weighted_freq = np.array([vocab_dict[word] ** 0.75 for word in idx2word], dtype=np.float32)
weighted_freq = weighted_freq / np.sum(weighted_freq)

In [16]:
idx2word_index = list(range(len(idx2word)))
idx2word_index[0:19]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

In [17]:
start_t = time.time()
np.random.choice(idx2word_index, 5, False, weighted_freq)
print(time.time() - start_t)

0.00817728042602539


五万数据抽样耗时36m，后续优化：减少抽样，一次性抽足够多数据（确保内存不爆）

以下负采样用于CBOW模型，所以每次只需要抽取k个就可以（训练时是输入的平均），skipgram则需要抽取与context上下文相同量（误差平均，每个context也是一个正样例）


In [18]:
def gen_ns_data(word2idx, vocab_dict, center_list, context_list, k = 5):
    weighted_freq = np.array([vocab_dict[word] ** 0.75 for word in idx2word], dtype=np.float32)
    weighted_freq = weighted_freq / np.sum(weighted_freq)
    
    idx2word_index = list(range(len(idx2word)))
    ret = []
    for i in range(len(center_list)):
        # 不放回抽样
        # 防止抽样数据全部都是context, k*2
        # 最小抽样数
        #min_sample_num = min(len(context_list[i]), k)
        ns_data = []
        ns_pre = np.random.choice(idx2word_index, k * 4, False, weighted_freq)
        for ns in ns_pre:
            if ns in ns_data:
                continue
            #if len(ns_data) < min_sample_num:
            if len(ns_data) < k:
                ns_data.append(ns)
        ret.append(ns_data)
    return ret

In [19]:
start_t = time.time()
ns_data = gen_ns_data(word2idx, vocab_dict, center_list, context_list, k = g_window)
print(time.time() - start_t)

25.43799901008606


In [20]:
ns_data[0:10]

[[905, 1450, 1023, 6245, 394],
 [2733, 2718, 3285, 974, 106],
 [176, 31, 552, 76, 641],
 [104, 704, 1884, 4349, 2906],
 [4470, 3256, 34, 2921, 1819],
 [481, 3255, 421, 90, 4928],
 [190, 2071, 138, 6094, 1503],
 [2216, 5742, 175, 35, 5154],
 [203, 6021, 4835, 210, 2742],
 [119, 371, 1086, 328, 1192]]

# 自定义Dataset与Loader算法

## 方法1：自定义简单dataset与DataLoader的collate_fn方法

核心是将Dataset中`__getitem__`实现的复杂功能移植到collate_fn中，只是此函数输入是batch_data

## 方法2：直接在自定义的dataset中实现
将getitem中的功能移到`__getitem__(self, i)`中

In [21]:
g_max_conpect = g_window * 2
g_common_label = torch.tensor([1] + [0] * g_window)

class W2VDataset2(torch.utils.data.Dataset):
    def __init__(self, center_list, context_list, ns_list):

        super(W2VDataset2, self).__init__()
        # 三者长度必须一致
        assert len(center_list) == len(context_list) == len(ns_list)
        self.center_list = torch.LongTensor(center_list)
        self.context_list = context_list
        self.ns_list = ns_list
           
    def __len__(self):
        return len(self.center_list)  

    def __getitem__(self, i):
        center = self.center_list[i]
        context = self.context_list[i]
        negative = self.ns_list[i]
        
        real_context_len = len(context)
        
        context_list = context + [0] * (g_max_conpect - real_context_len)
        
        # 长度为负采样参数k=g_window
        center_negative = [center] + negative
        
        label = g_common_label
        
        # 生成CBOW输出层index
        center_negative_list = [center] + negative
        return torch.LongTensor([center]), torch.LongTensor(context_list), torch.LongTensor(center_negative), real_context_len, label

In [22]:
dataset2 = W2VDataset2(center_list, context_list, ns_data)
dataloader2 = torch.utils.data.DataLoader(dataset2, 32, shuffle=True, drop_last = True)

In [23]:
next(iter(dataset2))

(tensor([3310]),
 tensor([1704,    0,    0,    0,    0,    0,    0,    0,    0,    0]),
 tensor([3310,  905, 1450, 1023, 6245,  394]),
 1,
 tensor([1, 0, 0, 0, 0, 0]))

In [24]:
for i, (center_batch, context_batch, center_ns_batch, context_len_batch, label_batch) in enumerate(dataloader2):
    if i == 1:
        break
    print("i=%d" % (i))
    print(center_batch)
    print(context_batch)
    print(center_ns_batch)
    print(context_len_batch)
    
    print(label_batch)
    
    

i=0
tensor([[ 714],
        [2002],
        [3687],
        [ 765],
        [ 841],
        [5749],
        [1423],
        [5416],
        [4703],
        [1865],
        [ 368],
        [1538],
        [1630],
        [1672],
        [3466],
        [ 328],
        [3827],
        [3767],
        [ 661],
        [ 525],
        [  40],
        [6313],
        [5029],
        [2418],
        [ 295],
        [ 390],
        [2769],
        [1502],
        [4034],
        [  57],
        [ 372],
        [2862]])
tensor([[5593, 2580,   64,  337, 6394,  983,  136,  983,  690, 1839],
        [ 838, 2324,  231, 1187, 2317, 3439, 6275, 3802,    0,    0],
        [3147, 5135, 1605, 1418,  601, 1573, 1418,  196, 5720,    8],
        [   4,  904, 2276, 1929,   26,  682, 4406, 1542,   53, 1186],
        [ 205,   56, 1342, 3745, 5537, 2169, 1800, 2824, 3513,  106],
        [ 849,  560, 3406, 3404, 3404, 1312, 4507,  550, 3407,  385],
        [1127, 6013, 1924, 1220, 1220, 2934, 1526,  670,  138, 

# 自定义model

In [25]:
EMBEDDING_SIZE = 100

定义一个函数，去处理非空

In [26]:
in_embed = torch.nn.Embedding(len(vocab_dict), 3)

out_embed = torch.nn.Embedding(len(vocab_dict), 3)


In [27]:
idx_tensor = torch.tensor([[1],[2], [3]])

In [28]:
in_embed(idx_tensor), in_embed(torch.tensor([1,2,3])),in_embed(torch.tensor([1,2,3])).unsqueeze(2),in_embed(torch.tensor([1,2,3])).unsqueeze(2).shape

(tensor([[[-1.6210,  1.7269,  1.4544]],
 
         [[-2.0192, -0.0111,  0.1607]],
 
         [[-0.5709,  1.1892,  1.3197]]], grad_fn=<EmbeddingBackward>),
 tensor([[-1.6210,  1.7269,  1.4544],
         [-2.0192, -0.0111,  0.1607],
         [-0.5709,  1.1892,  1.3197]], grad_fn=<EmbeddingBackward>),
 tensor([[[-1.6210],
          [ 1.7269],
          [ 1.4544]],
 
         [[-2.0192],
          [-0.0111],
          [ 0.1607]],
 
         [[-0.5709],
          [ 1.1892],
          [ 1.3197]]], grad_fn=<UnsqueezeBackward0>),
 torch.Size([3, 3, 1]))

In [29]:
x = torch.tensor([[1, 2, 3, 4],[5,6,7,8]])
print(x) 
print(torch.unsqueeze(x, 1))

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])
tensor([[[1, 2, 3, 4]],

        [[5, 6, 7, 8]]])


In [30]:
in_embed(torch.tensor([[1,2,3], [7,19,36]])), in_embed(torch.tensor([[1,2,3], [7,19,36]])).permute(0, 2, 1)

(tensor([[[-1.6210,  1.7269,  1.4544],
          [-2.0192, -0.0111,  0.1607],
          [-0.5709,  1.1892,  1.3197]],
 
         [[-1.0274,  1.0965,  1.3659],
          [-0.0862,  0.1579, -0.1276],
          [ 0.3759, -0.8672, -0.4328]]], grad_fn=<EmbeddingBackward>),
 tensor([[[-1.6210, -2.0192, -0.5709],
          [ 1.7269, -0.0111,  1.1892],
          [ 1.4544,  0.1607,  1.3197]],
 
         [[-1.0274, -0.0862,  0.3759],
          [ 1.0965,  0.1579, -0.8672],
          [ 1.3659, -0.1276, -0.4328]]], grad_fn=<PermuteBackward>))

In [31]:
def pre_h2(in_embed, in_batch_idx, in_batch_sz):
    h_list = []
    for i in range(len(in_batch_idx)):
        #print(i, in_batch_sz[i])
        #print(in_embed(in_batch_idx[i][0: in_batch_sz[i]]))
        
        t = in_embed(in_batch_idx[i][0: in_batch_sz[i]]).sum(0) / in_batch_sz[i]
        #print("sum=",t)
        #print("sum as numpy=", t.detach().numpy())
        h_list.append([t.detach().numpy().tolist()])
    #print("h_list",h_list)
    return torch.tensor(h_list)

In [32]:
def pre_h(in_embed, in_batch_idx, in_batch_sz):
    h_list = None
    for i in range(len(in_batch_idx)):
        #print(i, in_batch_sz[i])
        #print(in_embed(in_batch_idx[i][0: in_batch_sz[i]]))
        
        t = in_embed(in_batch_idx[i][0: in_batch_sz[i]]).sum(0) / in_batch_sz[i]
        #print("t1=",t)
        t = torch.unsqueeze(t, 0)
        t = torch.unsqueeze(t, 0)
        #print("t2=",t)
        if h_list is None:
            h_list = t
        else:
            h_list = torch.cat((h_list, t), 0)
            #print("h_list=", h_list)
    return h_list

In [33]:
for i, (center_batch, context_batch, center_ns_batch, context_len_batch, label_batch) in enumerate(dataloader2):
    
    print("i=%d" % (i))
    #print(center_batch)
    #print(context_batch)
    #print(ns_batch)
    #print(context_len_batch)
    
    #print(ns_len_batch)
    #print("pre_h") 
    #h = pre_h2(in_embed, context_batch, context_len_batch)
    #print("h=",h)
    
    h = pre_h(in_embed, context_batch, context_len_batch)
    #print("h=",h)
    #if i == 0:
    #    break
    
    # test: 
    out_neg_w = out_embed(center_ns_batch)
    
    print(out_neg_w)
    
    print(out_neg_w.shape)
    
    ret = torch.matmul(h, out_neg_w.permute(0, 2, 1))
    print("ret=", ret)
    
    
    print(label_batch)
    print(ret.shape, label_batch.shape)
    print("ret.view=", ret.view(label_batch.shape))
    
    print("label=", label_batch)
    
    
    out_cent = out_embed(center_batch)
    
    print("h_list",h)
    
    print("out_emb",out_cent)
    
    #print("out_emb trans",out_cent.permute(1, 0))
    
    # 2维可以
    #print(out_cent.t())

    print("h * out mul",torch.mul(h, out_cent))
    
    print("h * out mul sum(0)",torch.mul(h, out_cent).sum(0))
    # 按行求和
    print("h * out mul sum(1)：按行求和=",torch.mul(h, out_cent).sum(1))
    
    #print("h * out matmul",torch.matmul(h, out_cent.permute(1, 0)))
    
    
    
    if i == 0:
        break

i=0
tensor([[[ 0.1500,  0.2027,  0.1143],
         [ 0.5773, -0.8480,  1.9527],
         [-1.2520,  1.3017,  1.2613],
         [-1.0445,  1.8441, -0.0325],
         [ 0.1798, -0.2022, -2.1779],
         [ 0.8415,  0.6424,  0.2772]],

        [[-1.4583,  1.5947,  0.3225],
         [ 0.0705, -0.3765,  0.8216],
         [-0.5176,  1.6101,  1.9239],
         [ 0.6472,  0.3272,  1.3222],
         [ 0.2636,  1.0815,  1.5993],
         [-1.1916,  0.0665, -0.2726]],

        [[-1.2224, -0.5033, -0.6743],
         [ 0.1567, -0.9533,  0.4476],
         [-0.6395, -1.8632, -1.0285],
         [-0.4598, -1.3224,  0.7423],
         [ 0.1798, -0.2022, -2.1779],
         [ 2.3015, -1.2651,  0.5135]],

        [[-1.8098,  0.7424,  0.6207],
         [-0.6699,  0.5513, -0.7802],
         [ 2.1077,  0.4353, -0.6296],
         [ 0.2539, -0.2458, -1.6580],
         [-0.3685, -0.9273,  0.7757],
         [-0.3273, -0.0182, -0.7305]],

        [[-0.6289, -2.4330,  1.3897],
         [-0.1171, -1.0449, -0.3222],


In [34]:
k = torch.tensor([[1, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0]])
k.unsqueeze(2
           )

tensor([[[1],
         [0],
         [0],
         [0],
         [0],
         [0]],

        [[1],
         [0],
         [0],
         [0],
         [0],
         [0]]])

In [35]:

a = np.array([ 0.5038, -0.1116, -0.1281])
b = np.array([-0.3235, -0.1992,  0.5401])

c = np.array([-0.6835,  1.9188, -0.1909])
np.dot(b, c)

-0.2642178

In [36]:
tensor = torch.rand(4, 4, 3)

In [37]:
torch.matmul(tensor,tensor.permute(0, 2, 1))

tensor([[[0.6938, 0.4322, 0.1372, 0.5597],
         [0.4322, 1.0403, 0.2153, 0.4649],
         [0.1372, 0.2153, 0.1716, 0.0940],
         [0.5597, 0.4649, 0.0940, 0.4797]],

        [[0.9739, 0.8339, 1.2277, 0.5855],
         [0.8339, 1.0014, 1.2593, 0.7542],
         [1.2277, 1.2593, 1.7990, 0.8382],
         [0.5855, 0.7542, 0.8382, 0.6427]],

        [[0.5103, 0.4979, 0.7008, 0.8078],
         [0.4979, 0.5169, 0.6957, 0.7483],
         [0.7008, 0.6957, 1.0871, 1.1403],
         [0.8078, 0.7483, 1.1403, 1.3479]],

        [[1.4820, 1.1996, 0.6819, 1.0098],
         [1.1996, 1.7690, 0.6960, 1.0845],
         [0.6819, 0.6960, 0.8283, 0.2715],
         [1.0098, 1.0845, 0.2715, 0.8967]]])

In [38]:
a = torch.tensor([[0.5365, 0.4129, 0.6282],
          [0.7699, 0.1266, 0.2930],
          [0.8473, 0.5407, 0.7543],
          [0.8037, 0.3583, 0.7042]])
b = torch.tensor([[0.5365, 0.7699, 0.8473, 0.8037],
          [0.4129, 0.1266, 0.5407, 0.3583],
          [0.6282, 0.2930, 0.7543, 0.7042]])

In [39]:
torch.matmul(a,b)

tensor([[0.8530, 0.6494, 1.1517, 1.0215],
        [0.6494, 0.6946, 0.9418, 0.8705],
        [1.1517, 0.9418, 1.5792, 1.4059],
        [1.0215, 0.8705, 1.4059, 1.2702]])

In [40]:
0.5365 * 0.5365 + 0.4129 * 0.4129 + 0.6282 * 0.6282

0.8529538999999999

## 模型定义

In [41]:
class W2VModel(torch.nn.Module):
    def __init__(self, vocab_size, embed_size):
        super(W2VModel, self).__init__()
        
        self.vocab_size = vocab_size
        self.embed_size = embed_size
        
        self.in_embed = torch.nn.Embedding(self.vocab_size, self.embed_size)
        self.out_embed = torch.nn.Embedding(self.vocab_size, self.embed_size)
        
    def forward(self, centers, contexts, center_negatives, real_context_len, labels):
        ''' 
            forward最后一层只需要计算抽样的正负例
            其中中心词对应label=1,neg对应负例
        '''
        
        # 调整batch数据
        h_list = None
        # 处理batch数据:因为长度不一致，无法使用sum(0)去按列求和函数
        for i in range(len(centers)):
            h = self.in_embed(contexts[i][0: real_context_len[i]]).sum(0) / real_context_len[i]
            h = torch.unsqueeze(h, 0)
            h = torch.unsqueeze(h, 0)
            #print("i=", i)
            #print("h=", h)
            if h_list is None:
                h_list = h
            else:
                h_list = torch.cat((h_list, h), 0)
        
        #labels = torch.unsqueeze(labels, 0)
        #print("h_list=", h_list)
        out_neg_w = self.out_embed(center_negatives)

        ret = torch.matmul(h_list, out_neg_w.permute(0, 2, 1))
        #print("label size=", label_batch.shape)
        
        #print("ret size=", ret.shape)
        ret = ret.squeeze()
        #print("ret size=", ret.shape)
        #print("ret", ret)
        #print("label", labels)
        ret = ret.view(label_batch.shape)
        
        
        
        loss = torch.nn.functional.binary_cross_entropy_with_logits(ret.float(), labels.float(), reduction="none")
       
        return loss.sum(dim=1).mean()
net = W2VModel(len(idx2word), 3)

In [42]:
    
def train(net, lr, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("train on", device)
    net = net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    for epoch in range(num_epochs):
        start, l_sum, n = time.time(), 0.0, 0
        for i, batch in enumerate(dataloader2):
            center_batch, context_batch, center_ns_batch, context_len_batch, label_batch = [d.to(device) for d in batch]
            
            loss = net(center_batch, context_batch, center_ns_batch, context_len_batch, label_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            l_sum += loss.cpu().item()
            n += 1
        print('epoch %d, loss %.2f, time %.2fs'
              % (epoch + 1, l_sum / n, time.time() - start)) 


In [43]:
train(net, 0.01, 100)

train on cuda
epoch 1, loss 4.04, time 10.92s
epoch 2, loss 2.79, time 10.78s
epoch 3, loss 2.61, time 10.74s
epoch 4, loss 2.55, time 10.65s
epoch 5, loss 2.49, time 10.67s
epoch 6, loss 2.44, time 10.80s
epoch 7, loss 2.40, time 10.67s
epoch 8, loss 2.37, time 10.66s
epoch 9, loss 2.34, time 10.58s
epoch 10, loss 2.31, time 10.41s
epoch 11, loss 2.29, time 10.53s
epoch 12, loss 2.27, time 10.83s
epoch 13, loss 2.25, time 10.75s
epoch 14, loss 2.23, time 10.75s
epoch 15, loss 2.22, time 10.61s
epoch 16, loss 2.21, time 10.71s
epoch 17, loss 2.19, time 10.72s
epoch 18, loss 2.18, time 10.67s
epoch 19, loss 2.17, time 10.80s
epoch 20, loss 2.16, time 10.73s
epoch 21, loss 2.15, time 10.65s
epoch 22, loss 2.14, time 10.79s
epoch 23, loss 2.13, time 11.35s
epoch 24, loss 2.12, time 11.41s
epoch 25, loss 2.12, time 10.82s
epoch 26, loss 2.11, time 10.61s
epoch 27, loss 2.10, time 10.73s
epoch 28, loss 2.10, time 10.75s
epoch 29, loss 2.09, time 10.82s
epoch 30, loss 2.09, time 10.80s
epoch

## 测试

In [44]:
def get_similar_tokens(query_token, k, embed):
    W = embed.weight.data
    x = W[word2idx[query_token]]
    # 添加的1e-9是为了数值稳定性
    cos = torch.matmul(W, x) / (torch.sum(W * W, dim=1) * torch.sum(x * x) + 1e-9).sqrt()
    _, topk = torch.topk(cos, k=k+1)
    topk = topk.cpu().numpy()
    for i in topk[1:]:  # 除去输入词
        print('cosine sim=%.3f: %s' % (cos[i], (idx2word[i])))
        
get_similar_tokens('云长', 10, net.in_embed)

cosine sim=1.000: 岂知
cosine sim=1.000: 曹爽
cosine sim=1.000: 孙仲谋
cosine sim=1.000: 想
cosine sim=1.000: 发落
cosine sim=1.000: 除非
cosine sim=1.000: 昏绝
cosine sim=0.999: 性
cosine sim=0.999: 米
cosine sim=0.999: 相迎
