# **WE DID NOT ADD THE WEIGHT OF THE TASK3 TO WEIGHT FOLDER IN ZIP FILE DUE TO SIZE CONSTRAINTS. IF YOU NEED IT YOU CAN ASK FOR IT, OTHERWISE TRAIN THE MODEL**

In [81]:
!pip install https://github.com/pytorch/text/archive/master.zip
!pip install inscriptis

Collecting https://github.com/pytorch/text/archive/master.zip
  Using cached https://github.com/pytorch/text/archive/master.zip
Building wheels for collected packages: torchtext
  Building wheel for torchtext (setup.py) ... [?25l[?25hdone
  Created wheel for torchtext: filename=torchtext-0.5.1-cp36-none-any.whl size=64241 sha256=9b9130bf16ac4936a3b54274e6950fea0c1d82c7e3beeddfdaa94bc51e0df2de
  Stored in directory: /tmp/pip-ephem-wheel-cache-46u8m37b/wheels/5a/86/3d/30ae7dfdfeb1748bb11b3da173fb9634141fbb39e9e9847317
Successfully built torchtext


In [0]:
import torch    # root package
import numpy as np
from torch.utils.data import Dataset, DataLoader 

In [83]:
"""
Load the AG_NEWS dataset in bi-gram features format.
"""
# !pip install https://github.com/pytorch/text/archive/master.zip
import torch
import torchtext
from torchtext.datasets import text_classification
import os

NGRAMS = 2

if not os.path.isdir('./.data'):
    os.mkdir('./.data')

train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS'](
    root='./.data', ngrams=NGRAMS, vocab=None)

BATCH_SIZE = 16

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

120000lines [00:07, 16533.90lines/s]
120000lines [00:14, 8120.77lines/s]
7600lines [00:00, 8682.63lines/s]


In [0]:
# TODO: Import the necessary libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

BATCH_SIZE = 32
loss_name = "BCELoss"

In [0]:
class LSTMmodel(nn.Module):
  def __init__(self, vocab_size, hidden_dim, dim_embed, num_classes, dropout_gate = 0): 
    super(LSTMmodel, self).__init__()

    self.dropout_gate = dropout_gate
    self.hidden_dim = hidden_dim
    self.embeddings = nn.EmbeddingBag(vocab_size, dim_embed)

    # convelution with kernel of [1,5], the in_channels = 1, the out_channels = 1
    self.conv =nn.Conv2d(1, 1, [1,5])  

    # After Convolution the input_size = (dim_embed - 5 + 1)
    self.lstm = nn.LSTM((dim_embed-4), hidden_dim)

    self.dropout = nn.Dropout(p=0.85)
    self.linear = nn.Linear(hidden_dim, num_classes)

    # one hot encoded vectors
    self.softmax = nn.Softmax(dim=1)

    self.initialize_weights()

  def initialize_weights(self):
    num_range = 0.5
    self.embeddings.weight.data.uniform_(-num_range, num_range)
    self.linear.weight.data.uniform_(-num_range, num_range)
    self.linear.bias.data.zero_()


  def forward(self, x, offset):
    embedding = self.embeddings(x,offset)   #[Batch_size (B), Dim_embed (D)]
    sq_embed = embedding.unsqueeze(0).unsqueeze(1)    #[1, 1, B, D]
    conv_out = self.conv(sq_embed)    #[1, 1, B, D-5+1]
    relu_conv = F.relu(conv_out).squeeze()  #[B, D-5+1]
    lstm_out, _ = self.lstm(relu_conv.view(len(relu_conv),1,-1)) 
    if (self.dropout_gate):
      lstm_out = self.dropout(lstm_out)
    out = self.linear(lstm_out.view(len(lstm_out),-1))
    return self.softmax(out)

In [167]:
VOCAB_SIZE = len(train_dataset.get_vocab())
EMBED_DIM = 32    #dimension of embedding
HIDDEN_DIM = 16   #The number of features in the hidden state
NUM_CLASS = len(train_dataset.get_labels())  #number of classes

# making training model
model = LSTMmodel(VOCAB_SIZE, HIDDEN_DIM, EMBED_DIM, NUM_CLASS, dropout_gate=0)
model.cuda()

LSTMmodel(
  (embeddings): EmbeddingBag(1308844, 32, mode=mean)
  (conv): Conv2d(1, 1, kernel_size=[1, 5], stride=(1, 1))
  (lstm): LSTM(28, 16)
  (dropout): Dropout(p=0.85, inplace=False)
  (linear): Linear(in_features=16, out_features=4, bias=True)
  (softmax): Softmax(dim=1)
)

In [0]:
def generate_batch(batch):
    
    label = torch.tensor([entry[0] for entry in batch])
    text = [entry[1] for entry in batch]
    offsets = [0] + [len(entry) for entry in text]

    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    text = torch.cat(text)
    
    return text, offsets, label

In [0]:
# function for one hot encoded vectors
def convert_labels (x):
  labels = np.zeros((x.size()[0], len(train_dataset.get_labels())))
  for val in range(x.size()[0]):
     labels[val][x[val]] = 1
     
  return torch.from_numpy(labels).type(torch.FloatTensor)

In [0]:
def train(train_data, model):
    
    model.train()

    # Initial values of training loss and training accuracy
    train_loss = 0
    train_acc = 0

    data = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=generate_batch)
    
    for i, (text, offsets, cls) in enumerate(data):
        
        optimizer.zero_grad()
        text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)

        # if criterion is BCELoss use one hot encoded vectors for labels
        if (loss_name == "BCELoss"):
          cls = convert_labels(cls).to(device)

        output = model(text, offsets)
        
        loss = criterion(output, cls)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        if (loss_name == "CrossEntropy"):
          train_acc += (output.argmax(1) == cls).sum().item()
        else:
          train_acc += (output.argmax(1) == cls.argmax(1)).sum().item()

    scheduler.step()
    
    return train_loss / len(train_data), train_acc / len (train_data)

In [0]:
def test(test_data, model):
  
    model.eval()
    # Initial values of test loss and test accuracy
    
    loss = 0
    acc = 0
    
    data = DataLoader(test_data, batch_size = BATCH_SIZE, collate_fn = generate_batch)
    
    for text, offsets, cls in data:

        text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)
        if (loss_name == "BCELoss"):
          cls = convert_labels(cls).to(device)

        # because during test we dont need to compute the grdient of any tensor
        with torch.no_grad():
          output = model(text, offsets)              

          loss = criterion(output, cls)
          loss += loss.item()
          
          if (loss_name == "CrossEntropy"):
            acc += (output.argmax(1) == cls).sum().item()
          else:
            acc += (output.argmax(1) == cls.argmax(1)).sum().item()

    return loss / len(test_data), acc / len(test_data)

In [91]:
# mount drive (needed for my own training)
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [27]:
# loading pre-trained weights to the model
# given path for pretrained model (for fine tuning, DONT RUN IF YOU DO NOT WANT TO FINE TUNE)
model.load_state_dict(torch.load('/content/gdrive/My Drive/weights_NN/finetune2_0.9325_.pt'))
# for name, param in model.named_parameters():
#   if "embedding" in name:
#     param.requires_grad = False

<All keys matched successfully>

In [92]:
# Print parameters of the model and the name
for name, param in model.named_parameters():
  print (name, param.requires_grad)

embeddings.weight True
conv.weight True
conv.bias True
lstm.weight_ih_l0 True
lstm.weight_hh_l0 True
lstm.bias_ih_l0 True
lstm.bias_hh_l0 True
linear.weight True
linear.bias True


In [0]:
import time
import torchvision
import torch
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split

# Hyper parametre selection

N_EPOCHS = 15
LEARNING_RATE = 1e-2
TRAIN_RATIO = 0.9

valid_loss = float('inf')

# Use the appropriate loss function
if loss_name == "BCELoss":
  criterion = torch.nn.BCELoss().to(device)
else:
  criterion = torch.nn.CrossEntropyLoss().to(device) 

optimizer = torch.optim.Adam(model.parameters(), lr= LEARNING_RATE, weight_decay=1e-5)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.6)
valid_acc_dummy = 1e-3

# TODO: Split the data into train and validation sets using random_split()
train_len = int(len(train_dataset) * TRAIN_RATIO)
valid_len = len(train_dataset) - train_len
train_split_dataset , valid_split_dataset = random_split(train_dataset, [train_len, valid_len])

In [173]:
# Training Code

# logs for graphs
valid_loss_array = []
train_loss_array = []
train_acc_array = []
valid_acc_array = []

for epoch in range(N_EPOCHS):

    start_time = time.time()
    train_loss, train_acc = train(train_split_dataset, model)
    # Log of accuracy and error
    train_loss_array.append(train_loss)
    train_acc_array.append(train_acc)

    # test on validation set to compute the error and accuracy on validation set
    valid_loss, valid_acc = test(valid_split_dataset, model)
    
    valid_loss_array.append(valid_loss)
    valid_acc_array.append(valid_acc)
    # save model of the best accuracy
    if (valid_acc > valid_acc_dummy):
      # for your own path change the path
      #torch.save(model.state_dict(), '/content/gdrive/My Drive/weights_NN/'+ 'CNN_LSTM2_' + str(valid_acc) + "_.pt")
      valid_acc_dummy = valid_acc

    secs = int(time.time() - start_time)
    mins = secs / 60
    secs = secs % 60

    print('Epoch: %d' %(epoch + 1), " | time in %d minutes, %d seconds" %(mins, secs))
    print(f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)')
    print(f'\tLoss: {valid_loss:.4f}(valid)\t|\tAcc: {valid_acc * 100:.1f}%(valid)')

Epoch: 1  | time in 2 minutes, 3 seconds
	Loss: 0.0050(train)	|	Acc: 87.3%(train)
	Loss: 0.0000(valid)	|	Acc: 90.1%(valid)
Epoch: 2  | time in 2 minutes, 3 seconds
	Loss: 0.0034(train)	|	Acc: 92.0%(train)
	Loss: 0.0000(valid)	|	Acc: 91.4%(valid)
Epoch: 3  | time in 2 minutes, 3 seconds
	Loss: 0.0026(train)	|	Acc: 94.0%(train)
	Loss: 0.0000(valid)	|	Acc: 92.1%(valid)
Epoch: 4  | time in 2 minutes, 3 seconds
	Loss: 0.0018(train)	|	Acc: 95.8%(train)
	Loss: 0.0000(valid)	|	Acc: 92.5%(valid)
Epoch: 5  | time in 2 minutes, 3 seconds
	Loss: 0.0012(train)	|	Acc: 97.5%(train)
	Loss: 0.0000(valid)	|	Acc: 92.7%(valid)
Epoch: 6  | time in 2 minutes, 2 seconds
	Loss: 0.0007(train)	|	Acc: 98.6%(train)
	Loss: 0.0000(valid)	|	Acc: 92.3%(valid)
Epoch: 7  | time in 2 minutes, 2 seconds
	Loss: 0.0004(train)	|	Acc: 99.2%(train)
	Loss: 0.0000(valid)	|	Acc: 92.3%(valid)
Epoch: 8  | time in 2 minutes, 3 seconds
	Loss: 0.0003(train)	|	Acc: 99.5%(train)
	Loss: 0.0000(valid)	|	Acc: 92.1%(valid)
Epoch: 9  | time

In [108]:
print('Building test model and loading the saved model...')

VOCAB_SIZE_TEST = len(test_dataset.get_vocab())
EMBED_DIM_TEST = 32
HIDDEN_DIM_TEST = 16
NUM_CLASS_TEST = len(test_dataset.get_labels())

model_test = LSTMmodel(VOCAB_SIZE_TEST,HIDDEN_DIM_TEST,EMBED_DIM_TEST,NUM_CLASS_TEST)
# load trained model 
# path given according to the directory
# Change directory according to your system to make it work
model_test.load_state_dict(torch.load('/content/gdrive/My Drive/weights_NN/iter2_0.9329166666666666_.pt'))
model_test.cuda().eval()

Building test model and loading the saved model...


LSTMmodel(
  (embeddings): EmbeddingBag(1308844, 32, mode=mean)
  (conv): Conv2d(1, 1, kernel_size=[1, 5], stride=(1, 1))
  (lstm): LSTM(28, 16)
  (dropout): Dropout(p=0.85, inplace=False)
  (linear): Linear(in_features=16, out_features=4, bias=True)
  (softmax): Softmax(dim=1)
)

In [109]:
# Testing error calculation

test_loss, test_acc = test(test_dataset, model_test)
print(f'\tLoss: {test_loss:.4f}(test)\t|\tAcc: {test_acc * 100:.1f}%(test)')

	Loss: 0.0001(test)	|	Acc: 92.3%(test)
