In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
%autosave 10

Autosaving every 10 seconds


In [2]:
import os

import torch
from torchtext import data,datasets

In [3]:
### Seed for Randomness
SEED = 123
torch.manual_seed(SEED)

<torch._C.Generator at 0x17528935490>

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Coda Running in {device}')
!nvidia-smi

Coda Running in cuda
Thu Jun 16 11:30:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 512.95       Driver Version: 512.95       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   52C    P0    34W /  N/A |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+------------------------------------------------------------------

In [5]:
TEXT = data.Field(tokenize='spacy',tokenizer_language='en_core_web_sm')
LABEL = data.LabelField(dtype = torch.float)

In [6]:
train_data,test_data=datasets.IMDB.splits(TEXT,LABEL,root = './')

In [7]:
len(train_data)
len(test_data)

25000

25000

In [8]:
train_data,valid_data = train_data.split(split_ratio=0.7)

In [9]:
len(train_data)
len(valid_data)

17500

7500

### Let's Build the Vocab from the Train data we got by help of Spacy TEXT

In [10]:
TEXT.build_vocab(train_data,max_size = 25_000)
LABEL.build_vocab(train_data)

In [11]:
len(TEXT.vocab)
len(LABEL.vocab)

25002

2

In [12]:
TEXT.vocab.itos[:10]
LABEL.vocab.itos[:4]

['<unk>', '<pad>', 'the', ',', '.', 'and', 'a', 'of', 'to', 'is']

['neg', 'pos']

In [13]:
train_iter,valid_iter,test_iter = data.BucketIterator.splits((train_data,valid_data,test_data),batch_size=64,device = device)

In [14]:
import torch.nn as nn

In [15]:
class RNN_model(nn.Module):
    
    def __init__(self,input_dim,embedding_dim,hidden_dim,out_dim):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=input_dim,embedding_dim = embedding_dim,)
        self.rnn = nn.RNN(input_size = embedding_dim,hidden_size = hidden_dim,num_layers = 1,bias = True)
        self.fc = nn.Linear(in_features=hidden_dim,out_features=out_dim)
        
    def forward(self,text):
        embedding = self.embedding(text)
        output,hidden = self.rnn(embedding)
        
        # assert 
        return self.fc(hidden.squeeze(0))

In [16]:
len(TEXT.vocab)

25002

In [17]:
Input_dim = len(TEXT.vocab)
Embedding_dim = 100
Hidden_dim = 256
Output_dim = 1

In [18]:
model = RNN_model(input_dim=Input_dim,embedding_dim=Embedding_dim,hidden_dim=Hidden_dim,out_dim=Output_dim)

In [19]:
model.to(device)

RNN_model(
  (embedding): Embedding(25002, 100)
  (rnn): RNN(100, 256)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [20]:
for p in model.parameters():
    print (p.numel())

2500200
25600
65536
256
256
256
1


In [21]:
from torch.optim import SGD

In [22]:
optimizer  = SGD(model.parameters(),lr = 0.001,momentum = 0.9)

In [23]:
criterion = nn.BCEWithLogitsLoss()
criterion.to(device)

BCEWithLogitsLoss()

In [24]:
from tqdm import tqdm

In [25]:
def accuracy(pred,target):
    rounded_pred = torch.round(torch.sigmoid(pred))
    correct  = (rounded_pred==target).float()
    return correct.sum()/len(correct)

In [35]:
def train(model,train_loader,criterion,optimizer,epoch):
    train_loss = []
    train_accuracy = []
    model.train()
    pbar = tqdm(train_loader)
    for index,(data,target) in enumerate(pbar):
        data,targe= data.to(device),target.to(device)
        optimizer.zero_grad()
        prediction = model(data).squeeze()
        loss = criterion(prediction,target)
        acc = accuracy(prediction,target)
        loss.backward()
        optimizer.step()
        pbar.set_description(f"Epohs: {epoch} ,Batch {index} , loss {loss} accuracy {acc}")
    train_loss.append(loss)
    train_accuracy.append(acc)
    return None

In [36]:
def test(model,test_loader,criterion,epoch):
    model.eval()
    with torch.no_grad():
        for batch_id, (data,target) in enumerate(test_loader):
            data,target = data.to(device),target.to(device)
            prediction = model(data).squeeze(1)
            loss = criterion(prediction,target)
            acc =  accuracy(prediction,target)
        print(f'Epoch {epoch},loss {loss} acc = {acc}')
        return None

In [37]:
epochs = 10
for epoch in range(epochs):
    train(model = model,train_loader=train_iter,criterion=criterion,optimizer=optimizer,epoch= epoch)
    test(model,test_iter,criterion,epoch = epoch)

Epohs: 0 ,Batch 273 , loss 0.6979926824569702 accuracy 0.453125: 100%|█| 274/274 [00:15<00:00, 17.56i


Epoch 0,loss 0.6844062209129333 acc = 0.6000000238418579


Epohs: 1 ,Batch 273 , loss 0.6944852471351624 accuracy 0.46875: 100%|█| 274/274 [00:15<00:00, 17.76it


Epoch 1,loss 0.6894698739051819 acc = 0.6000000238418579


Epohs: 2 ,Batch 273 , loss 0.693769097328186 accuracy 0.5: 100%|███| 274/274 [00:15<00:00, 17.83it/s]


Epoch 2,loss 0.6841009259223938 acc = 0.6000000238418579


Epohs: 3 ,Batch 273 , loss 0.6870142221450806 accuracy 0.5625: 100%|█| 274/274 [00:15<00:00, 17.85it/


Epoch 3,loss 0.6791644096374512 acc = 0.6000000238418579


Epohs: 4 ,Batch 273 , loss 0.6920653581619263 accuracy 0.53125: 100%|█| 274/274 [00:15<00:00, 17.77it


Epoch 4,loss 0.6852030158042908 acc = 0.6000000238418579


Epohs: 5 ,Batch 273 , loss 0.6898888349533081 accuracy 0.59375: 100%|█| 274/274 [00:15<00:00, 17.99it


Epoch 5,loss 0.6876097917556763 acc = 0.6000000238418579


Epohs: 6 ,Batch 273 , loss 0.6940447092056274 accuracy 0.421875: 100%|█| 274/274 [00:15<00:00, 18.10i


Epoch 6,loss 0.6895114183425903 acc = 0.42500001192092896


Epohs: 7 ,Batch 273 , loss 0.685721755027771 accuracy 0.59375: 100%|█| 274/274 [00:15<00:00, 17.92it/


Epoch 7,loss 0.6965799331665039 acc = 0.42500001192092896


Epohs: 8 ,Batch 273 , loss 0.6928951740264893 accuracy 0.515625: 100%|█| 274/274 [00:15<00:00, 17.99i


Epoch 8,loss 0.6890819072723389 acc = 0.6000000238418579


Epohs: 9 ,Batch 273 , loss 0.6956332921981812 accuracy 0.46875: 100%|█| 274/274 [00:15<00:00, 17.87it


Epoch 9,loss 0.6959691047668457 acc = 0.42500001192092896
