In [3]:
import torch
import torchtext
from torch.utils.data import DataLoader

train_dataset, test_dataset  = torchtext.datasets.AG_NEWS()

In [4]:
from torchtext.data import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer("basic_english")

def build_vocabulary(datasets):
    for dataset in datasets:
        for _, text in dataset:
            yield tokenizer(text)

vocab = build_vocab_from_iterator(build_vocabulary([train_dataset, test_dataset]), min_freq=1, specials=["<UNK>"])

vocab.set_default_index(vocab["<UNK>"])

In [5]:
len(vocab)

98635

In [6]:
from torch.utils.data import DataLoader
from torchtext.data.functional import to_map_style_dataset

train_dataset, test_dataset  = torchtext.datasets.AG_NEWS()
train_dataset, test_dataset  = to_map_style_dataset(train_dataset), to_map_style_dataset(test_dataset)

target_classes = ["World", "Sports", "Business", "Sci/Tech"]

max_words = 25

def vectorize_batch(batch):
    Y, X = list(zip(*batch))
    X = [vocab(tokenizer(text)) for text in X]
    X = [tokens+([0]* (max_words-len(tokens))) if len(tokens)<max_words else tokens[:max_words] for tokens in X] ## Bringing all samples to max_words length.

    return torch.tensor(X, dtype=torch.int32), torch.tensor(Y) - 1 ## We have deducted 1 from target names to get them in range [0,1,2,3] from [1,2,3,4]


train_loader = DataLoader(train_dataset, batch_size=1024, collate_fn=vectorize_batch, shuffle=True)
test_loader  = DataLoader(test_dataset , batch_size=1024, collate_fn=vectorize_batch)

In [7]:
for X, Y in train_loader:
    print(X.shape, Y.shape)
    break

torch.Size([1024, 25]) torch.Size([1024])


In [8]:
from torch import nn
from torch.nn import functional as F

embed_len = 50
hidden_dim = 50
n_layers=1

class RNNClassifier(nn.Module):
    def __init__(self):
        super(RNNClassifier, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=len(vocab), embedding_dim=embed_len)
        self.rnn = nn.RNN(input_size=embed_len, hidden_size=hidden_dim, num_layers=n_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, len(target_classes))

    def forward(self, X_batch):
        embeddings = self.embedding_layer(X_batch)
        output, hidden = self.rnn(embeddings, torch.randn(n_layers, len(X_batch), hidden_dim))
        return self.linear(output[:,-1])

In [9]:
rnn_classifier = RNNClassifier()

rnn_classifier

RNNClassifier(
  (embedding_layer): Embedding(98635, 50)
  (rnn): RNN(50, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=4, bias=True)
)

In [15]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import gc

def CalcValLossAndAccuracy(model, loss_fn, val_loader):
    with torch.no_grad():
        Y_shuffled, Y_preds, losses = [],[],[]
        for X, Y in val_loader:
            preds = model(X)
            loss = loss_fn(preds, Y)
            losses.append(loss.item())

            Y_shuffled.append(Y)
            Y_preds.append(preds.argmax(dim=-1))

        Y_shuffled = torch.cat(Y_shuffled)
        Y_preds = torch.cat(Y_preds)

        print("Valid Loss : {:.3f}".format(torch.tensor(losses).mean()))
        print("Valid Acc  : {:.3f}".format(accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())))


def TrainModel(model, loss_fn, optimizer, train_loader, val_loader, epochs=10):
    for i in range(1, epochs+1):
        losses = []
        for X, Y in tqdm(train_loader):
            Y_preds = model(X)
            print("Y pred:",Y_preds.shape)
            print("=============")
            print("Y", Y.shape)
            break
            loss = loss_fn(Y_preds, Y)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("Train Loss : {:.3f}".format(torch.tensor(losses).mean()))
        CalcValLossAndAccuracy(model, loss_fn, val_loader)

In [16]:
from torch.optim import Adam

epochs = 15
learning_rate = 1e-3

loss_fn = nn.CrossEntropyLoss()
rnn_classifier = RNNClassifier()
optimizer = Adam(rnn_classifier.parameters(), lr=learning_rate)

TrainModel(rnn_classifier, loss_fn, optimizer, train_loader, test_loader, epochs)

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]


Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan
Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


  0%|          | 0/118 [00:00<?, ?it/s]

Y pred: torch.Size([1024, 4])
Y torch.Size([1024])
Train Loss : nan





Valid Loss : 1.418
Valid Acc  : 0.250


In [None]:
def MakePredictions(model, loader):
    Y_shuffled, Y_preds = [], []
    for X, Y in loader:
        preds = model(X)
        Y_preds.append(preds)
        Y_shuffled.append(Y)
    gc.collect()
    Y_preds, Y_shuffled = torch.cat(Y_preds), torch.cat(Y_shuffled)

    return Y_shuffled.detach().numpy(), F.softmax(Y_preds, dim=-1).argmax(dim=-1).detach().numpy()

Y_actual, Y_preds = MakePredictions(rnn_classifier, test_loader)

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("Test Accuracy : {}".format(accuracy_score(Y_actual, Y_preds)))
print("\nClassification Report : ")
print(classification_report(Y_actual, Y_preds, target_names=target_classes))
print("\nConfusion Matrix : ")
print(confusion_matrix(Y_actual, Y_preds))

Test Accuracy : 0.8710526315789474

Classification Report : 
              precision    recall  f1-score   support

       World       0.89      0.87      0.88      1900
      Sports       0.92      0.95      0.94      1900
    Business       0.83      0.83      0.83      1900
    Sci/Tech       0.85      0.83      0.84      1900

    accuracy                           0.87      7600
   macro avg       0.87      0.87      0.87      7600
weighted avg       0.87      0.87      0.87      7600


Confusion Matrix : 
[[1649   88  100   63]
 [  43 1814   32   11]
 [  77   41 1575  207]
 [  93   23  202 1582]]
