<a href="https://colab.research.google.com/github/GauravDesai85/Deep-Learning-Projects/blob/master/Pytorch_Text_Classification_Quora_Questions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Run once per sessiom
import os
!pip install -q feather-format kornia pyarrow wandb nbdev fastprogress fastai2 fastcore --upgrade 
!pip install torch==1.3.1
#!pip install torchvision==0.4.2
!pip install torchtext
#!pip install Pillow==6.2.1 --upgrade
os._exit(00)

In [0]:
#deal with tensors
import torch   

#handling text data
from torchtext import data  

In [0]:
SEED = 2020

In [3]:
torch.manual_seed(SEED)

<torch._C.Generator at 0x7f1e6b976a70>

In [0]:
torch.backends.cudnn.deterministic = True 

In [0]:
TEXT  = data.Field(tokenize = 'spacy',batch_first=True,include_lengths=True)
LABEL = data.LabelField(dtype = torch.float,batch_first =True)

In [0]:
fields = [(None, None), ('text',TEXT),('label', LABEL)]

In [7]:
!pwd

/content


In [8]:
#loading custom dataset
training_data=data.TabularDataset(path = 'quora.csv',format = 'csv',fields = fields,skip_header = True)

#print preprocessed text
print(vars(training_data.examples[0]))

{'text': ['Why', 'are', 'most', 'indian', 'parents', 'against', 'even', 'liking', 'someone', '?'], 'label': '1'}


In [0]:
# Split Data
import random
train_data,val_data = training_data.split(split_ratio =0.3,random_state = random.seed(SEED))

In [0]:
# Prepare Input and Output sequences
# build using pre-trained embeddings
TEXT.build_vocab(train_data,min_freq =3)
LABEL.build_vocab(train_data)

In [11]:
#No of unique tokens in the text
print(" Size of text vocab:",len(TEXT.vocab))

# No of unique tokens in Label
print(" Size of Label Vocab:",len(LABEL.vocab))

#Commonly used words
print(TEXT.vocab.freqs.most_common(10))

#Word Dictionary

print(TEXT.vocab.stoi)

 Size of text vocab: 10404
 Size of Label Vocab: 22
[('?', 32720), ('the', 17057), ('to', 11317), ('a', 9539), (',', 8790), ('of', 8429), ('in', 8045), ('and', 7999), ('is', 7560), ('Why', 7481)]


In [0]:
#check whether cuda is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [39]:
device

device(type='cuda')

In [0]:
Batch_Size = 64

# Load Iterator

train_iterator,valid_iterator = data.BucketIterator.splits(
    (train_data,val_data),
    batch_size = Batch_Size,
    sort_key = lambda x:len(x.text),
    sort_within_batch = True,

    device = device
)


In [0]:
# Creating the NN architecture
import torch.nn as nn

In [0]:
class my_classifier(nn.Module):
  # initialise the model
  def __init__(self,vocab_size,embedding_dim,hidden_dim,output_dim,n_layers,bidirectional,dropout):
    #constructor
    super().__init__()

    #embedding layer
    self.embedding = nn.Embedding(vocab_size,embedding_dim)

    #LSTM Layer
    self.lstm = nn.LSTM(embedding_dim,
                        hidden_dim,
                        num_layers = num_layers,
                        bidirectional = bidirectional,
                        dropout = dropout,
                        batch_first = True
                        )
    
    #dense Layer
    self.fc = nn.Linear(hidden_dim*2,output_dim)

    # activation function
    self.act = nn.Sigmoid()

    # forward pass

  def forward(self,text,text_lengths):
    embedded = self.embedding(text)

      #packed sequence
    packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded,text_lengths,batch_first=True)

    packed_output,(hidden,cell) = self.lstm(packed_embedded)

      #concat final forward and backward hidden state
    hidden = torch.cat((hidden[-2,:,:],hidden[-1,:,:]),dim =1)

    dense_outputs = self.fc(hidden)

    outputs = self.act(dense_outputs)

    return outputs

In [0]:
#define hyperparameters
size_of_vocab = len(TEXT.vocab)
embedding_dim = 100
num_hidden_nodes = 32
num_output_nodes = 1
num_layers = 2
bidirection = True
dropout = 0.2

In [0]:
#Instantiate the model
model = my_classifier(size_of_vocab,embedding_dim,num_hidden_nodes,num_output_nodes,num_layers,
                      bidirectional = True,dropout = dropout)

In [31]:
# architecture
print(model)

my_classifier(
  (embedding): Embedding(10404, 100)
  (lstm): LSTM(100, 32, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
  (act): Sigmoid()
)


In [32]:
#No. of trianable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
print(f'The model has {count_parameters(model):,} trainable parameters')

#Initialize the pretrained embedding
#pretrained_embeddings = TEXT.vocab.vectors
#model.embedding.weight.data.copy_(pretrained_embeddings)

#print(pretrained_embeddings.shape)

The model has 1,099,857 trainable parameters


In [0]:
import torch.optim as optim

In [0]:
# define criterion,loss and metric
optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()

# metric
def binary_accuracy(preds,y):
  round_preds = torch.round(preds)
  correct = (round_preds == y).float()

  acc = correct.sum()/len(correct)
  return acc

# push to cuda 

model = model.to(device)
criterion =criterion.to(device)

In [0]:
# train the model

def train(model,iterator,optimizer,criterion):

  #initialize for evry epoch
  epoch_loss = 0
  epoch_acc = 0

  # set the model in training phase
  model.train()

  for batch in iterator:

    optimizer.zero_grad() # reset gradient after evry batch

    # retrieve text and no. of words from batch
    text, text_lengths = batch.text

    # convert to 1D tensor
    predictions = model(text,text_lengths).squeeze()

    # calc loss
    loss = criterion(predictions,batch.label)

    # calc accuracy
    acc = binary_accuracy(predictions,batch.label)

    #backpropagate loss
    loss.backward()

    #update the weights
    optimizer.step()

    epoch_loss += loss.item()
    epoch_acc += acc.item()

  return epoch_loss/len(iterator),epoch_acc/len(iterator)

In [0]:
# evaluate the model
def evaluate(model,iterator,criterion):
  epoch_loss = 0
  epoch_acc = 0

  model.eval()

  with torch.no_grad():
    for batch in iterator:
      text,text_lengths = batch.text
      predictions = model(text,text_lengths).squeeze()
      loss = criterion(predictions,batch.label)
      acc = binary_accuracy(predictions,batch.label)
      epoch_loss += loss.item()
      epoch_acc += acc.item()

    return epoch_loss/len(iterator),epoch_acc/len(iterator)


In [47]:
N_Epochs = 10
best_valid_loss =float('inf')

for epoch in range(N_Epochs):

  #train the model
  train_loss ,train_acc =train(model,train_iterator,optimizer,criterion)

  #evaluat the model 
  val_loss ,val_acc = evaluate(model,valid_iterator,criterion)

  if val_loss < best_valid_loss :
    best_valid_loss = val_loss
    torch.save(model.state_dict(),'saved_weights.pt')

  print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
  print(f'\t Val. Loss: {val_loss:.3f} |  Val. Acc: {val_acc*100:.2f}%')


	Train Loss: -0.098 | Train Acc: 97.09%
	 Val. Loss: 0.517 |  Val. Acc: 85.22%
	Train Loss: -0.116 | Train Acc: 97.68%
	 Val. Loss: 0.620 |  Val. Acc: 84.93%
	Train Loss: -0.128 | Train Acc: 98.07%
	 Val. Loss: 0.610 |  Val. Acc: 84.43%
	Train Loss: -0.140 | Train Acc: 98.40%
	 Val. Loss: 0.696 |  Val. Acc: 84.81%
	Train Loss: -0.150 | Train Acc: 98.70%
	 Val. Loss: 0.816 |  Val. Acc: 84.77%
	Train Loss: -0.154 | Train Acc: 98.85%
	 Val. Loss: 0.806 |  Val. Acc: 84.67%
	Train Loss: -0.162 | Train Acc: 99.01%
	 Val. Loss: 0.826 |  Val. Acc: 84.57%
	Train Loss: -0.162 | Train Acc: 99.01%
	 Val. Loss: 0.820 |  Val. Acc: 84.51%
	Train Loss: -0.165 | Train Acc: 99.12%
	 Val. Loss: 0.826 |  Val. Acc: 84.38%
	Train Loss: -0.169 | Train Acc: 99.26%
	 Val. Loss: 0.988 |  Val. Acc: 84.90%
