### Recurrent Convolutional Neural Networks (RCNN)
Recurrent Convolutional Neural Networks (RCNN) is also used for text classification. The main idea of this technique is capturing contextual information with the recurrent structure and constructing the representation of text using a convolutional neural network. This architecture is a combination of RNN and CNN to use the advantages of both technique in a model.

### Import Packages

In [1]:
# import libraries
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn import metrics
import pandas as pd
import torch
import torch.nn.functional as F

import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import torch.utils.data as data_utils
from pytorch_lightning import Trainer

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Tokenize Text Using Glove

In [2]:
# build a data tokenizer
def loadData_Tokenizer(X_train, X_test,MAX_NB_WORDS=179210, \
                       MAX_SEQUENCE_LENGTH=500):
    '''
    The function takes Train and Test datasets with text.
    Converts them into tokens, and returns tokenized version of
    both the sets, and the embedding matrix
    
    Parameters
    ----------
    X_train : list with each item having a set of words that will be used
    for training the model 
    X_test : list with each item having a set of words that will be used
    for testing the model
    MAX_NB_WORDS : Number of maximum words to be added in the tokenizer 
    vocabulary
    MAX_SEQUENCE_LENGTH : Maximum length of sentences in the 
    '''
    # set a random seed for reproducibility
    np.random.seed(7)
    
    # concatenate train and text to build a combined vocabulary
    text = np.concatenate((X_train, X_test), axis=0)
    text = np.array(text)
    
    # initiate tokenizer
    tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
    
    # fit tokenizer on texts
    tokenizer.fit_on_texts(text)
    
    # build sequences
    sequences = tokenizer.texts_to_sequences(text)
    
    # dictionary for total vocabulary
    word_index = tokenizer.word_index
    
    # pad sequences from left to make them of equal lengths
    text = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
    
    # total unique words in vocab
    print('Found %s unique tokens.' % len(word_index))
    
    # split tokenized text into train and test sets
    indices = np.arange(text.shape[0])
    text = text[indices]
    X_train = text[0:len(X_train), ]
    X_test = text[len(X_train):, ]
    
    # create embedding using GLOVE
    embeddings_index = {}
    f = open("glove.6B.50d.txt", encoding="utf8")
    for line in f:
        values = line.split()
        word = values[0]
        try:
            coefs = np.asarray(values[1:], dtype='float32')
        except:
            pass
        embeddings_index[word] = coefs
    f.close()
    
    # print total words in embedding
    print('Total %s word vectors.' % len(embeddings_index))
    
    # create embedding matrix
    embedding_matrix = np.zeros((len(word_index) + 1, 50))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    
    # return train, test, vocabulary and embedding details
    return (X_train, X_test, embedding_matrix)

### Define model class using Pytorch Lightning

Pytorch Lightning provides a standard wrapper to load data, define and train deep learning models.

In this codeblock we define:
1. Model
2. Training/Validation/Test Steps
3. Optimizer settings
4. Train/Validation/Test Data Loader

In [83]:
criterion = nn.CrossEntropyLoss()
class CoolSystem(pl.LightningModule):

    def __init__(self, embedding_matrix, nclasses):
        '''
        Deep neural network with 5 layers of 512 fully connected nodes.
        
        Parameters
        ----------
        shape: the dimensions of input layer
        nclasses: the dimensions of output layer
        dropout: the probability of dropping out.
        '''
        super(CoolSystem, self).__init__()
        
        self.nclasses = nclasses
        
        ## Embedding Layer, Add parameter 
        self.embedding = nn.Embedding(embedding_matrix.shape[0], \
                                      embedding_matrix.shape[1]) 
        et = torch.tensor(embedding_matrix, dtype=torch.float32)
        self.embedding.weight = nn.Parameter(et)
        self.embedding.weight.requires_grad = False
        self.embedding_dropout = nn.Dropout2d(0.25)

        self.feature = nn.Sequential(
            nn.Conv1d(50, 256, kernel_size = 2),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(256, 256, kernel_size = 2),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(256, 256, kernel_size = 2),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(256, 256, kernel_size = 2),
            nn.ReLU(),
            nn.MaxPool1d(2),
        )

        self.RNNfeature1 = nn.Sequential(
            nn.LSTM(256, 256,batch_first = True, dropout = 0.2))
        self.RNNfeature2 = nn.Sequential(
            nn.LSTM(256, 256,batch_first = True, dropout = 0.2))
        self.RNNfeature3 = nn.Sequential(
            nn.LSTM(256, 256,batch_first = True, dropout = 0.2))
        self.RNNfeature4 = nn.Sequential(
            nn.LSTM(256, 256,batch_first = True, dropout = 0.2))
        self.feature1 = nn.Sequential(
            nn.Linear(256, 1024),
            nn.ReLU(),
            nn.Linear(1024, nclasses)
        )
        
    def forward(self, x):
        '''
        Passes the input through Deep neural network defined before.
        
        Parameters
        ----------
        X: input
        '''
        x = self.embedding(x)
        x = x.permute(0,2,1)
        x = self.feature(x)
        x = x.permute(0,2,1)
        x, _ = self.RNNfeature1(x)
        x, _ = self.RNNfeature2(x)
        x, _ = self.RNNfeature3(x)
        x, _ = self.RNNfeature4(x)
        x = x[:,29,:]
        x = self.feature1(x)
                
        return x

    def training_step(self, batch, batch_nb):
        '''
        Training step, takes the training batch and pass it forward
        through network
        
        Parameters
        ----------
        batch: input
        batch_nb: batch number
        '''
        x, y = batch
        y_hat = self.forward(x)
        return {'loss': criterion(y_hat, y)}

    def validation_step(self, batch, batch_nb):
        '''
        Training step, takes the training batch and pass it forward
        through trained network
        
        Parameters
        ----------
        batch: input
        batch_nb: batch number
        '''
        x, y = batch
        y_hat = self.forward(x)
        return {'val_loss': criterion(y_hat, y)}

    def validation_end(self, outputs):
        '''
        Takes and stacks validation loss.
        Early stop can also be defined here
        
        Parameters
        ----------
        Outputs: Output of validation step
        '''
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        return {'avg_val_loss': avg_loss}

    def configure_optimizers(self):
        '''
        Optimizer for the network

        '''
        return torch.optim.Adam(self.parameters())

    @pl.data_loader
    def tng_dataloader(self):
        '''
        Training data loader, takes input directly from global environment
        Preprocessing can also be defined here.
        
        '''
        
        return\
    DataLoader(data_utils.TensorDataset(torch.LongTensor(X_train_Glove),\
                                        torch.LongTensor(y_train)),\
               batch_size=128)

    @pl.data_loader
    def val_dataloader(self):
        '''
        Validation data loader, takes input directly from global environment
        Preprocessing can also be defined here.
        
        '''
        return\
    DataLoader(data_utils.TensorDataset(torch.LongTensor(X_test_Glove),\
                                        torch.LongTensor(y_test)),\
               batch_size=128)

    @pl.data_loader
    def test_dataloader(self):
        '''
        Test data loader, takes input directly from global environment
        Preprocessing can also be defined here.
        
        '''
        return\
    DataLoader(data_utils.TensorDataset(torch.LongTensor(X_test_Glove),\
                                        torch.LongTensor(y_test)),\
               batch_size=128)

### Load text dataset (20newsgroups)

In [81]:
# Load train data
newsgroups_train = fetch_20newsgroups(subset='train')

# Load test data
newsgroups_test = fetch_20newsgroups(subset='test')

# make x and y
X_train = newsgroups_train.data
X_test = newsgroups_test.data
y_train = newsgroups_train.target
y_test = newsgroups_test.target

# tokenize text and obtain embedding matrix
X_train_Glove,X_test_Glove, embedding_matrix = loadData_Tokenizer(X_train,\
                                                                  X_test)

Found 179209 unique tokens.
Total 400001 word vectors.


### Train Model using Pytorch Lightning

In [84]:
# model
model = CoolSystem(embedding_matrix, 20)

# most basic trainer, uses good defaults
trainer = Trainer(max_nb_epochs=15)  
trainer.fit(model)

  0%|          | 0/5 [00:00<?, ?it/s]

gpu available: False, used: False
                 Name        Type   Params
0           embedding   Embedding  8960500
1   embedding_dropout   Dropout2d        0
2             feature  Sequential   419840
3           feature.0      Conv1d    25856
4           feature.1        ReLU        0
5           feature.2   MaxPool1d        0
6           feature.3      Conv1d   131328
7           feature.4        ReLU        0
8           feature.5   MaxPool1d        0
9           feature.6      Conv1d   131328
10          feature.7        ReLU        0
11          feature.8   MaxPool1d        0
12          feature.9      Conv1d   131328
13         feature.10        ReLU        0
14         feature.11   MaxPool1d        0
15        RNNfeature1  Sequential   526336
16      RNNfeature1.0        LSTM   526336
17        RNNfeature2  Sequential   526336
18      RNNfeature2.0        LSTM   526336
19        RNNfeature3  Sequential   526336
20      RNNfeature3.0        LSTM   526336
21        RNNfeature

100%|██████████| 148/148 [09:32<00:00,  1.85s/it, avg_val_loss=1.36, batch_nb=88, epoch=14, loss=0.575]

1

### Evaluate Results on Test Set

In [85]:
# get prediction
with torch.no_grad():
        model.eval()
        predicted = model.forward(torch.LongTensor(X_test_Glove))

# get classification report
predicted = predicted.detach().numpy()
print(metrics.classification_report(y_test, np.argmax(predicted, axis=1)))

              precision    recall  f1-score   support

           0       0.48      0.28      0.35       319
           1       0.51      0.46      0.48       389
           2       0.61      0.57      0.59       394
           3       0.39      0.45      0.42       392
           4       0.27      0.38      0.31       385
           5       0.55      0.29      0.38       395
           6       0.50      0.78      0.61       390
           7       0.86      0.63      0.73       396
           8       0.71      0.79      0.75       398
           9       0.81      0.80      0.81       397
          10       0.80      0.96      0.87       399
          11       0.93      0.70      0.80       396
          12       0.58      0.44      0.50       393
          13       0.85      0.76      0.80       396
          14       0.86      0.80      0.83       394
          15       0.55      0.88      0.68       398
          16       0.67      0.68      0.68       364
          17       0.95    