forked from prakashpandey9/Text-Classification-Pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
RCNN.py
80 lines (64 loc) · 3.68 KB
/
RCNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# _*_ coding: utf-8 _*_
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
class RCNN(nn.Module):
def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
super(RCNN, self).__init__()
"""
Arguments
---------
batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
output_size : 2 = (pos, neg)
hidden_sie : Size of the hidden_state of the LSTM
vocab_size : Size of the vocabulary containing unique words
embedding_length : Embedding dimension of GloVe word embeddings
weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table
"""
self.batch_size = batch_size
self.output_size = output_size
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.embedding_length = embedding_length
self.word_embeddings = nn.Embedding(vocab_size, embedding_length)# Initializing the look-up table.
self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) # Assigning the look-up table to the pre-trained GloVe word embedding.
self.dropout = 0.8
self.lstm = nn.LSTM(embedding_length, hidden_size, dropout=self.dropout, bidirectional=True)
self.W2 = nn.Linear(2*hidden_size+embedding_length, hidden_size)
self.label = nn.Linear(hidden_size, output_size)
def forward(self, input_sentence, batch_size=None):
"""
Parameters
----------
input_sentence: input_sentence of shape = (batch_size, num_sequences)
batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
Returns
-------
Output of the linear layer containing logits for positive & negative class which receives its input as the final_hidden_state of the LSTM
final_output.shape = (batch_size, output_size)
"""
"""
The idea of the paper "Recurrent Convolutional Neural Networks for Text Classification" is that we pass the embedding vector
of the text sequences through a bidirectional LSTM and then for each sequence, our final embedding vector is the concatenation of
its own GloVe embedding and the left and right contextual embedding which in bidirectional LSTM is same as the corresponding hidden
state. This final embedding is passed through a linear layer which maps this long concatenated encoding vector back to the hidden_size
vector. After this step, we use a max pooling layer across all sequences of texts. This converts any varying length text into a fixed
dimension tensor of size (batch_size, hidden_size) and finally we map this to the output layer.
"""
input = self.word_embeddings(input_sentence) # embedded input of shape = (batch_size, num_sequences, embedding_length)
input = input.permute(1, 0, 2) # input.size() = (num_sequences, batch_size, embedding_length)
if batch_size is None:
h_0 = Variable(torch.zeros(2, self.batch_size, self.hidden_size).cuda()) # Initial hidden state of the LSTM
c_0 = Variable(torch.zeros(2, self.batch_size, self.hidden_size).cuda()) # Initial cell state of the LSTM
else:
h_0 = Variable(torch.zeros(2, batch_size, self.hidden_size).cuda())
c_0 = Variable(torch.zeros(2, batch_size, self.hidden_size).cuda())
output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
final_encoding = torch.cat((output, input), 2).permute(1, 0, 2)
y = self.W2(final_encoding) # y.size() = (batch_size, num_sequences, hidden_size)
y = y.permute(0, 2, 1) # y.size() = (batch_size, hidden_size, num_sequences)
y = F.max_pool1d(y, y.size()[2]) # y.size() = (batch_size, hidden_size, 1)
y = y.squeeze(2)
logits = self.label(y)
return logits