# CS918: Introduction to Pytorch


## Use Pytorch for classifying news


#### Import the required packages

In [None]:
# Import packages
import numpy as np
import torch

print("My Pytorch version: " + torch.__version__)
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Set the device to perform the computation
DEVICE = torch.device('cpu')

# Set a fixed seed for reproducibility
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
import torchtext

print("Torch Text Version : {}".format(torchtext.__version__))

**Preparing the Data**

In [None]:
from torch.utils.data import DataLoader

# Load the AG News dataset
train_dataset, test_dataset  = torchtext.datasets.AG_NEWS()

# AG News classes
target_classes = ["World", "Sports", "Business", "Sci/Tec"]

In [None]:
next(iter(train_dataset))

#### Build the dictionary

In [None]:
# Use the torchtext tokenizer, it builds a vocabulary on the training and test set
from torchtext.data import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer("basic_english")

def build_vocab(datasets):
    for dataset in datasets:
        for _, text in dataset:
            yield tokenizer(text)

vocab = build_vocab_from_iterator(build_vocab([train_dataset, test_dataset]), specials=["<UNK>"])

# Defaults symbol for unknown words 
vocab.set_default_index(vocab["<UNK>"])

In [None]:
# Size of dictionary
len(vocab.get_itos())

In [None]:
# Print the word-to-index mapping
vocab.get_stoi()

#### Convert the documents into bag-of-word (BoW) representations

In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from torch.utils.data import DataLoader
from torchtext.data.functional import to_map_style_dataset

# Convert into Bag-of-Word (BoW) representation
vectorizer = CountVectorizer(vocabulary=vocab.get_itos(), tokenizer=tokenizer)


def vectorize_batch(batch):
    Y, X = list(zip(*batch))
    X = vectorizer.transform(X).todense()
    return (
        torch.tensor(X, dtype=torch.float32).to(DEVICE),
        torch.tensor(Y).to(DEVICE) - 1,
    )  ## We deduct 1 from target names to get them in range [0,1,2,3] from [1,2,3,4]


train_dataset, test_dataset = torchtext.datasets.AG_NEWS()
train_dataset, test_dataset = to_map_style_dataset(train_dataset), to_map_style_dataset(
    test_dataset
)

train_loader = DataLoader(train_dataset, batch_size=256, collate_fn=vectorize_batch)
test_loader = DataLoader(test_dataset, batch_size=256, collate_fn=vectorize_batch)

In [None]:
# Example - 1 batch
for X, Y in train_loader:
    print(X.shape, Y.shape)
    break

In [None]:
print(len(vocab))

###  Define the model's architecture

In [None]:
from torch import nn
from torch.nn import functional as F

setup_seed(42)

class TextClassifier(nn.Module):
    def __init__(self):
        super(TextClassifier, self).__init__()
        # Input X: (256, 98635)

        # Linear Layer: (98635, 128)
        self.linear1 = nn.Linear(len(vocab), 128)
        
        print("Weights of Linear Layer 1: ", self.linear1.weight.shape)
        print("PyTorch will transpose the weights before the multiplication with the input.\n")
        
        self.relu = nn.Sigmoid()
        #self.relu = nn.ReLU()        
        
        self.linear3 = nn.Linear(128, 64)
        
        #self.dropout1 = nn.Dropout(0.25)

        self.linear4 = nn.Linear(64, 4)
        

    def forward(self, X_batch):
        out = self.linear1(X_batch) 
        out = self.relu(out) 
        out = self.linear3(out) 
        out = self.relu(out) 
        #out = self.dropout1(out) 
        out = self.linear4(out) 

        return out

#### Visualize and check the first batch

In [None]:
text_classifier = TextClassifier().to(DEVICE)

for X, Y in train_loader:
    print("X: ", X)
    Y_preds = text_classifier(X)
    print(Y_preds.shape)
    break

#### Training and Validation Accuracy

In [None]:
import gc

from sklearn.metrics import accuracy_score
from tqdm import tqdm


def CalcValLossAndAccuracy(model, loss_fn, val_loader):
    # -- Disable the gradient --
    with torch.no_grad():
        Y_shuffled, Y_preds, losses = [],[],[]
        for X, Y in val_loader:
            preds = model(X)
            loss  = loss_fn(preds, Y)
            losses.append(loss.item())

            Y_shuffled.append(Y)
            Y_preds.append(preds.argmax(dim=-1))

        Y_shuffled = torch.cat(Y_shuffled)
        Y_preds    = torch.cat(Y_preds)

        print("Valid Loss : {:.3f}".format(torch.tensor(losses).mean()))
        print("Valid Acc  : {:.3f}".format(accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())))

#### Define the training loop

In [None]:
# Training Loop
def TrainingLoop(model, loss_fn, optimizer, train_loader, val_loader, epochs=10):
    for i in range(1, epochs+1):
        losses = []
        # Cycle over the training examples (using minibatches)
        # X are the examples, Y are the associated labels
        for X, Y in tqdm(train_loader):
            # Make the prediction
            Y_preds = model(X)

            # Compute the loss
            loss = loss_fn(Y_preds, Y)
            losses.append(loss.item())

            # Reset the gradient
            optimizer.zero_grad()
            
            # Compute the gradient
            loss.backward()
            
            # Update the weights
            optimizer.step()

        print("Train Loss : {:.3f}".format(torch.tensor(losses).mean()))
        CalcValLossAndAccuracy(model, loss_fn, val_loader)

In [None]:
from torch.optim import Adam

epochs        = 2
learning_rate = 1e-4

# Loss Function
loss_fn = nn.CrossEntropyLoss()

# Optimizer
#optimizer = optim.SGD(text_classifier.parameters(), lr=0.01, momentum=0.9)
optimizer = Adam(text_classifier.parameters(), lr=learning_rate)

#### Train the model

In [None]:
### Training Loop ###
TrainingLoop(text_classifier.to(DEVICE), loss_fn, optimizer, train_loader, test_loader, epochs)

## Evaluate on the Test Set

In [None]:
# Evaluate on the Test Set - Compute the statistics for the Confusion Matrix
def MakePredictions(model, loader):
    Y_shuffled, Y_preds = [], []
    for X, Y in loader:
        preds = model(X)
        Y_preds.append(preds)
        Y_shuffled.append(Y)
    gc.collect()
    Y_preds, Y_shuffled = torch.cat(Y_preds), torch.cat(Y_shuffled)

    return Y_shuffled.detach().numpy(), F.softmax(Y_preds, dim=-1).argmax(dim=-1).detach().numpy()

Y_actual, Y_preds = MakePredictions(text_classifier, test_loader)

#### Print Confusion matrix

In [None]:
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix)

# Overall Test Accuracy
print("Test Accuracy : {}".format(accuracy_score(Y_actual, Y_preds)))

# Report for each class
print("\nClassification Report : ")
print(classification_report(Y_actual, Y_preds, target_names=target_classes))

# Confusion Matrix
print("\nConfusion Matrix : ")
print(confusion_matrix(Y_actual, Y_preds))

#### Plot Confusion matrix

In [None]:
import matplotlib.pyplot as plt
import scikitplot as skplt

skplt.metrics.plot_confusion_matrix([target_classes[i] for i in Y_actual], [target_classes[i] for i in Y_preds],
                                    normalize=True,
                                    title="Confusion Matrix",
                                    cmap="Reds",
                                    hide_zeros=True,
                                    figsize=(5,5)
                                    );
plt.xticks(rotation=90);