In [2]:
import numpy as np
import torch
import torch.nn.utils.rnn as rnn_utils
import torch.nn.functional as F
import gensim

In [3]:
w2v_model = gensim.models.KeyedVectors.load_word2vec_format('./GloVe-master/word2vec_model.txt',binary=False, encoding='utf-8')
vocab_list = list(w2v_model.vocab.keys())
word_index = {word: index for index, word in enumerate(vocab_list)}  #获得字典：{'the': 0, 'a': 1...}

#获得测试集文件夹
filepath = './test.txt'

test_list = []
for line in open(filepath):
    line = line.replace('<br /><br />','')
    change_word = ['.', '!', ',' , ':', '?', '(', ')', '/']
    for word in change_word:
        line = line.replace(word, ' '+word+' ')
    line = line.replace('  ',' ')
    words = []
    line = line.split(" ")
    for word in line:
        if word not in word_index:
            words.append(0)
        else:
            words.append(word_index[word])
        #[words.append(0) for i in range(5-len(words)) if len(words) < 5]         
    test_list.append(torch.Tensor(words).long())

In [4]:
class TextCNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, embeding_vector, kernel_sizes, num_channels):
        super().__init__()
        self.hidden_size = hidden_size
        #不参与训练的嵌入层
        self.embedding = torch.nn.Embedding(num_embeddings=input_size, embedding_dim=hidden_size)
        self.embedding.weight.data.copy_(torch.from_numpy(embeding_vector))  #使用预训练的词向量
        self.embedding.weight.requires_grad = False
        #参与训练的嵌入层
        self.constant_embedding = torch.nn.Embedding(num_embeddings=input_size, embedding_dim=hidden_size)
        self.constant_embedding.weight.data.copy_(torch.from_numpy(embeding_vector))  #使用预训练的词向量
        self.dropout = torch.nn.Dropout(0.5)
        self.out_linear = torch.nn.Linear(sum(num_channels), output_size)
        self.pool = GlobalMaxPool1d()
        self.convs = torch.nn.ModuleList()
        for c, k in zip(num_channels, kernel_sizes):
            self.convs.append(torch.nn.Conv1d(in_channels=2*hidden_size, out_channels=c, kernel_size=k))
        
    def forward(self, x):
        embeddings = torch.cat((self.embedding(x), self.constant_embedding(x)), dim=2).permute(0,2,1)
        out = torch.cat([self.pool(F.relu(conv(embeddings))).squeeze(-1) for conv in self.convs], dim=1)
        out = self.out_linear(self.dropout(out))
        return out

class GlobalMaxPool1d(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, x):
        return F.max_pool1d(x, kernel_size = x.shape[2])

In [10]:
device = 'cuda:0'
# 让Embedding层使用训练好的Word2Vec权重
embedding_matrix = w2v_model.vectors
input_size = embedding_matrix.shape[0]   #49339, 词典的大小
hidden_size = embedding_matrix.shape[1]  #50, 隐藏层单元个数
kernel_size = [3, 4, 5]
nums_channels = [100, 100, 100]
model = TextCNN(input_size, hidden_size, 2, embedding_matrix, kernel_size, nums_channels).to(device)
model.load_state_dict(torch.load('./model_save/TextCNN_save_2.pt'))

f=open('result.txt','w')
for data_x in test_list:
    model.eval()
    with torch.no_grad():
        out = model(data_x.unsqueeze(0).to(device))
        prediction = out.argmax(dim=1).data.cpu().numpy()
        if prediction[0] == 0:
            f.write('0(negative)\n')
        else:
            f.write('1(positive)\n')
f.close()

25000
