In [1]:
!pip3 install torch torchvision torchaudio



In [2]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from torchtext.vocab import GloVe
from spacy.tokenizer import Tokenizer
from sklearn.model_selection import train_test_split
import spacy
import pandas as pd
import numpy as np
import os
import re
from nltk.corpus import stopwords 
import random
from tqdm import tqdm
from transformers import BertTokenizer, BertModel, BertConfig



In [3]:
# Tokenizer using spacy
nlp = spacy.load("en_core_web_sm")
tokenizer = Tokenizer(nlp.vocab)

In [4]:
# Add data from files into dataframe for easier access
def create_dataframe(source_text_path,target_text_path):
    txt_files_source = [file for file in os.listdir(source_text_path) if file.endswith('.txt')]
    txt_files_target = [file for file in os.listdir(target_text_path) if file.endswith('.txt')]
    df = pd.DataFrame(columns=['headlines','text'])
    for source,target in zip(txt_files_source,txt_files_target):
        assert source==target
        source_file_path = os.path.join(source_text_path, source)
        target_file_path = os.path.join(target_text_path, target)
        # Read the content of the file
        with open(source_file_path,'r',encoding='latin-1') as file:
            source_text = file.read()
        with open(target_file_path,'r',encoding='latin-1') as file:
            target_text = file.read()
        df.loc[len(df.index)] = [source_text,target_text]
    return df

In [5]:
# Check accuracy function
def check_accuracy(output,labels):
    _ , predpos = output.max(1)
    num_samples=len(labels)
    num_correct=(predpos==labels).sum()
    return (num_correct/num_samples)*100

# Save checkpoint
def save_checkpoint(state,filename='weights.pth.tar'):
    print('Saving weights-->')
    torch.save(state,filename)

# Load checkpoint
def load_checkpoint(checkpoint,model,optim):
    print('Loading weights-->')
    model.load_state_dict(checkpoint['state_dict'])
    optim.load_state_dict(checkpoint['optimizer'])

In [6]:
df1 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/business","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/business")
df2 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/entertainment","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/entertainment")
df3 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/politics","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/politics")
df4 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/sport","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/sport")
df5 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/tech","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/tech")

In [7]:
df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)

In [8]:
# Split into train and test sets
df = df.rename(columns = {"headlines":"source_text","text":"summary_text"})
X,Y = df["source_text"],df["summary_text"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
train_df = pd.DataFrame({'source_text': X_train, 'summary_text': Y_train})
test_df = pd.DataFrame({'source_text': X_test, 'summary_text': Y_test})

In [9]:
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",

                           "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",

                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",

                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",

                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",

                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",

                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",

                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",

                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",

                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",

                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",

                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",

                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",

                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",

                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",

                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",

                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",

                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",

                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",

                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",

                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",

                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",

                           "you're": "you are", "you've": "you have"}


stop_words = set(stopwords.words('english'))

In [10]:
def text_cleaner(text):
    newString = text.lower()
    newString = newString.replace('"', "'")
    newString = re.sub(r'\([^)]*\)', '', newString)
    newString = re.sub('"','', newString)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])    
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString) 
    tokens = [w for w in newString.split() if not w in stop_words]
    return " ".join(tokens)

In [11]:
# Tokenize and lowercase text using spacy
train_df['source_text'] = train_df['source_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])
train_df['summary_text'] = train_df['summary_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])

test_df['source_text'] = test_df['source_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])
test_df['summary_text'] = test_df['summary_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])

In [12]:
# Add START AND END tokens to summary
train_df['source_text'] = train_df['source_text'].apply(lambda x : ['_START_']+ x + ['_END_'])
train_df['summary_text'] = train_df['summary_text'].apply(lambda x : ['_START_']+ x + ['_END_'])

test_df['source_text'] = test_df['source_text'].apply(lambda x : ['_START_']+ x + ['_END_'])
test_df['summary_text'] = test_df['summary_text'].apply(lambda x : ['_START_']+ x + ['_END_'])

In [13]:
train_df.head()

Unnamed: 0,source_text,summary_text
1490,"[_START_, ferguson, fears, milan, cutting, edg...","[_START_, loss, could, worse, quality, bring, ..."
2001,"[_START_, ask, jeeves, joins, web, log, market...","[_START_, jim, lanzone, vice, president, searc..."
1572,"[_START_, safin, cool, wimbledon, newly, crown...","[_START_, expect, sampras, favourite, pressure..."
1840,"[_START_, mobiles, rack, years, use, mobile, p...","[_START_, cellnet, vodafone, mobile, phone, op..."
610,"[_START_, eminem, secret, gig, venue, revealed...","[_START_, fourth, album, rap, star, sale, two,..."


In [14]:
# Build vocabularies - each word has an index, note : words sorted in ascending order
all_tokens = train_df['source_text'].tolist() + train_df['summary_text'].tolist() + test_df['source_text'].tolist() + test_df['summary_text'].tolist()
stoi = {actual_word: idx for idx, (word_num, actual_word) in enumerate(sorted(enumerate(set(token for tokens in all_tokens for token in tokens)), key=lambda x: x[1]))}
itos = {idx: actual_word for idx, (word_num, actual_word) in enumerate(sorted(enumerate(set(token for tokens in all_tokens for token in tokens)), key=lambda x: x[1]))}

In [15]:
# Init Bert Tokenizer and Bert Model for Embeddings
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [16]:
sentence = "Hello, today is a thursday, I hope you have a great day Sanju Kutti"
encodings = bert_tokenizer.encode_plus(bert_tokenizer.tokenize(sentence),max_length=512)
sentence = torch.tensor(encodings["input_ids"]).unsqueeze(0)
print(sentence.shape)
with torch.no_grad():
    output = bert_model(sentence).last_hidden_state
print(output.shape)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


torch.Size([1, 20])
torch.Size([1, 20, 768])


In [17]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using",device)

Using cuda


In [18]:
print(bert_model.config.vocab_size)

30522


In [19]:
print(list(stoi.items())[500:505])
print(list(itos.items())[500:505])

[('aggregator', 500), ('aggregators', 501), ('aggression', 502), ('aggressive', 503), ('aggressively', 504)]
[(500, 'aggregator'), (501, 'aggregators'), (502, 'aggression'), (503, 'aggressive'), (504, 'aggressively')]


In [20]:
# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, source_texts, target_summaries, bert_tokenizer):
        self.source_texts = source_texts
        self.target_summaries = target_summaries
        self.bert_tokenizer = bert_tokenizer

    def __len__(self):
        return len(self.source_texts)

    def __getitem__(self, idx):
        source_text = ' '.join([word for word in self.source_texts[idx]])
        target_summary = ' '.join([word for word in self.target_summaries[idx]])
        with torch.no_grad():
            source_text = self.bert_tokenizer.encode_plus(self.bert_tokenizer.tokenize(source_text),max_length=512)
            target_summary = self.bert_tokenizer.encode_plus(self.bert_tokenizer.tokenize(target_summary),max_length=512)
        return torch.tensor(source_text['input_ids']), torch.tensor(target_summary['input_ids'])

In [21]:
# Create custom datasets
train_dataset = CustomDataset(train_df['source_text'].tolist(), train_df['summary_text'].tolist(),bert_tokenizer)
test_dataset = CustomDataset(test_df['source_text'].tolist(), test_df['summary_text'].tolist(),bert_tokenizer)

In [22]:
'''
Note : 
In PyTorch, the `collate_fn` parameter in the `DataLoader` can be either a function or an object of a class. Both approaches are valid, and the choice depends on your preference and the complexity of your collation logic.

1. Function as `collate_fn`:
def my_collate_fn(batch):
    # Your custom collation logic here
    return processed_batch
# Use the function with DataLoader
train_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_fn)

2. Class as `collate_fn`:
class MyCollateClass:
    def __call__(self, batch):
        # Your custom collation logic here
        return processed_batch
# Instantiate the class and use it with DataLoader
my_collate_instance = MyCollateClass()
train_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_instance)

Using a class allows you to maintain state between batches if needed, as the class instance retains its state between calls. This can be beneficial if your collation logic requires some persistent information.

The key point is that the `collate_fn` parameter should be a callable (a function or an object with a `__call__` method) that takes a list of batch data and returns the processed batch. The processing typically involves padding sequences, converting data types, or any other necessary steps to prepare the batch for the model.
'''

'\nNote : \nIn PyTorch, the `collate_fn` parameter in the `DataLoader` can be either a function or an object of a class. Both approaches are valid, and the choice depends on your preference and the complexity of your collation logic.\n\n1. Function as `collate_fn`:\ndef my_collate_fn(batch):\n    # Your custom collation logic here\n    return processed_batch\n# Use the function with DataLoader\ntrain_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_fn)\n\n2. Class as `collate_fn`:\nclass MyCollateClass:\n    def __call__(self, batch):\n        # Your custom collation logic here\n        return processed_batch\n# Instantiate the class and use it with DataLoader\nmy_collate_instance = MyCollateClass()\ntrain_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_instance)\n\nUsing a class allows you to maintain state between batches if needed, as the class instance retains its state between calls. This can be beneficial if your collation logic requires some pe

In [23]:
# Define collate function for DataLoader
def collate_fn(batch):
    sources, targets = zip(*batch)
    padded_sources = pad_sequence(sources, batch_first=True)
    padded_targets = pad_sequence(targets, batch_first=True)
    return padded_sources, padded_targets

In [24]:
# Define the Encoder Architecture using LSTM
class Encoder(nn.Module):
    def __init__(self, bert_model, embedding_dim, hidden_dim, n_layers, dropout):
        super(Encoder, self).__init__()
        self.bert_model = bert_model
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, bidirectional=True, dropout=dropout, batch_first=True)

    def forward(self, X):
        # X shape = [Batch_Size X Sequence_Len X 1]
        assert X.shape[1] <= 512 # max size sequences that BERT can handle
        with torch.no_grad():
            X = self.bert_model(X).last_hidden_state
        # X shape = [Batch_Size X Sequence_Len X embedding_dim]
        X,(hidden_state,cell_state) = self.lstm(X)
        # X shape = [Batch_Size X Seq_Len X Hidden_Dim] , Hidden_State_Shape = Cell_State_Shape = [Num_Layers X Batch_Size X Hidden_Dim]
        return hidden_state,cell_state

# Define the Decoder Architecture using LSTM
class Decoder(nn.Module):
    def __init__(self, bert_model, target_vocab_size, embedding_dim, hidden_dim, n_layers, dropout):
        super(Decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.target_vocab_size = target_vocab_size
        self.bert_model = bert_model
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, bidirectional=True, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim*2,target_vocab_size) # bidrectional hence 

    def forward(self, hidden_state, cell_state, Y, force_correction=0.5):
        # Hidden_State_Shape = Cell_State_Shape = [Num_Layers X Batch_Size X Hidden_Dim]
        # Y Shape = [Batch_Size X Sequence_Len]
        
        batch_size,seq_len = Y.shape[0],Y.shape[1]
        outputs = torch.zeros(seq_len,batch_size,self.target_vocab_size).to(device) # [Batch_Size X Sequence_Len]
        
        X = Y[:,1]
        # X shape = [Batch_Size X 1]
        for i in range(seq_len):
            X = X.unsqueeze(1) 
            # X shape = [Batch_Size X 1 X 1]
            with torch.no_grad():
                decoder_input = self.bert_model(X).last_hidden_state
            # decoder_input_shape = [Batch_Size X 1 X Embedding_Dim]
            assert decoder_input.shape[0]>0 and decoder_input.shape[1]>0
            decoder_output,(hidden_state,cell_state) = self.lstm(decoder_input,(hidden_state,cell_state))
            # Decoder_Output_Shape = [Batch_Size X 1 X hidden_dim]
            decoder_output = self.fc(decoder_output)
            # Decoder_Output_Shape = [Batch_Size X 1 X target_vocab_size]
            # Store output
            outputs[i] = decoder_output.permute(1,0,2).squeeze(0)
            _ , indexes = decoder_output.max(dim=2)
            # indexes shape = [Batch_Size X 1]
            indexes = indexes.squeeze(1)
            # use indexes as next input or correct it
            X = indexes if random.random() < 0.5 else Y[:,i]
            # indexes shape = X shape = [Batch_Size]
            
        # Output Shape = [Seq_Len X Batch_Size X Target_Vocab_Size]
        outputs = outputs.permute(1,0,2)
        outputs = outputs.reshape(-1,self.target_vocab_size)
        # Output Shape = [Batch_Size X Seq_Len X Target_Vocab_Size]
        return outputs

In [25]:
class EncDecLSTM(nn.Module):
    def __init__(self,enc,dec):
        super(EncDecLSTM,self).__init__()
        self.enc = enc
        self.dec = dec
        
    def forward(self,X,Y):
        hidden_state,cell_state = self.enc(X)
        output = self.dec(hidden_state,cell_state,Y)
        return output

In [26]:
# Instantiate the model
output_dim = bert_model.config.vocab_size
learning_rate = 0.001
embedding_dim = 768  
hidden_dim = 512
n_layers = 2
dropout = 0.2
num_epochs = 20
num_workers = 3

encoder = Encoder(bert_model, embedding_dim, hidden_dim, n_layers, dropout)
decoder = Decoder(bert_model, output_dim, embedding_dim, hidden_dim, n_layers, dropout)
model = EncDecLSTM(encoder,decoder)
print(model)

EncDecLSTM(
  (enc): Encoder(
    (bert_model): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
          

In [27]:
trainable_params = sum(p.numel() for p in encoder.parameters() if p.requires_grad)
print(trainable_params)

121032960


In [28]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

163868730


In [29]:
# Specify optimizer and loss function
optimizer = optim.Adam(model.parameters(),lr=learning_rate)
loss_fun = nn.CrossEntropyLoss()

In [30]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=num_workers)

In [31]:
source_dummy,target_dummy = next(iter(train_loader))

In [32]:
print(source_dummy.shape,target_dummy.shape)

torch.Size([8, 443]) torch.Size([8, 224])


In [33]:
print(torch.min(target_dummy),torch.max(target_dummy))

tensor(0) tensor(29577)


In [34]:
y_pred = model(source_dummy,target_dummy)
print(y_pred.shape,target_dummy.shape)

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


torch.Size([1792, 30522]) torch.Size([8, 224])


In [35]:
def train_loop(model,dataloader,loss_fun,optimizer,device):
    model.train()
    model.to(device)
    min_loss = None
    for epoch in range(num_epochs):
        losses = []
        accuracies = []
        loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)
    
            # forward pass
            y_pred = model(x,y)
            
            # calculate loss & accuracy
            loss = loss_fun(y_pred,y.reshape(-1))
            losses.append(loss.detach().item())
            
            accuracy = check_accuracy(y_pred,y.reshape(-1))
            accuracies.append(accuracy.detach().item())
            
            # zero out prior gradients
            optimizer.zero_grad()
            
            # backprop
            loss.backward()
            
            # update weights
            optimizer.step()
            
            # Update TQDM progress bar
            loop.set_description(f"Epoch [{epoch}/{num_epochs}] ")
            loop.set_postfix(loss=loss.detach().item(), accuracy=accuracy.detach().item())

        moving_loss = sum(losses) / len(losses)
        moving_accuracy = sum(accuracies) / len(accuracies)
        checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
        # Save check point
        if min_loss == None:
            min_loss = moving_loss
            save_checkpoint(checkpoint)
        elif moving_loss < min_loss:
            min_loss = moving_loss
            save_checkpoint(checkpoint)
        print('Epoch {0} : Loss = {1} , Training Accuracy={2}'.format(epoch, moving_loss, moving_accuracy))

In [36]:
train_loop(model,train_loader,loss_fun,optimizer,device)

Epoch [0/20] : 100%|██████████| 223/223 [18:41<00:00,  5.03s/it, accuracy=24.5, loss=6.66]


Saving weights-->
Epoch 0 : Loss = 4.989009191102511 , Training Accuracy=44.063086676490684


Epoch [1/20] : 100%|██████████| 223/223 [18:26<00:00,  4.96s/it, accuracy=37.4, loss=5.45]


Saving weights-->
Epoch 1 : Loss = 4.746741835846494 , Training Accuracy=44.27731052107875


Epoch [2/20] : 100%|██████████| 223/223 [18:16<00:00,  4.92s/it, accuracy=58.5, loss=3.4] 


Saving weights-->
Epoch 2 : Loss = 4.644630534766501 , Training Accuracy=45.06716356149169


Epoch [3/20] : 100%|██████████| 223/223 [18:02<00:00,  4.85s/it, accuracy=44.7, loss=4.63]

Epoch 3 : Loss = 4.663947915817055 , Training Accuracy=44.71359029692919



Epoch [4/20] : 100%|██████████| 223/223 [18:23<00:00,  4.95s/it, accuracy=47.6, loss=4.44]


Saving weights-->
Epoch 4 : Loss = 4.572192267986691 , Training Accuracy=45.53478465914192


Epoch [5/20] : 100%|██████████| 223/223 [18:24<00:00,  4.95s/it, accuracy=24.6, loss=6.5] 


Saving weights-->
Epoch 5 : Loss = 4.551498611946277 , Training Accuracy=45.45389306277972


Epoch [6/20] : 100%|██████████| 223/223 [18:19<00:00,  4.93s/it, accuracy=39.7, loss=4.87]

Epoch 6 : Loss = 4.563818603353115 , Training Accuracy=44.84078839220808



Epoch [7/20] : 100%|██████████| 223/223 [18:21<00:00,  4.94s/it, accuracy=39.5, loss=5.01]


Saving weights-->
Epoch 7 : Loss = 4.440578717287345 , Training Accuracy=45.858709070180026


Epoch [8/20] : 100%|██████████| 223/223 [18:16<00:00,  4.92s/it, accuracy=18.9, loss=6.63]

Epoch 8 : Loss = 4.457912401233553 , Training Accuracy=45.1254554286666



Epoch [9/20] : 100%|██████████| 223/223 [18:32<00:00,  4.99s/it, accuracy=24.2, loss=6.32]


Saving weights-->
Epoch 9 : Loss = 4.384627342758692 , Training Accuracy=45.35863589812822


Epoch [10/20] : 100%|██████████| 223/223 [18:18<00:00,  4.93s/it, accuracy=17.3, loss=6.45]


Saving weights-->
Epoch 10 : Loss = 4.338240559325625 , Training Accuracy=45.13006490014594


Epoch [11/20] : 100%|██████████| 223/223 [18:13<00:00,  4.90s/it, accuracy=22.8, loss=6.27]


Saving weights-->
Epoch 11 : Loss = 4.308947736372327 , Training Accuracy=44.84712798285378


Epoch [12/20] : 100%|██████████| 223/223 [18:09<00:00,  4.89s/it, accuracy=31.4, loss=5.49]


Saving weights-->
Epoch 12 : Loss = 4.246689762235222 , Training Accuracy=45.129745098507456


Epoch [13/20] : 100%|██████████| 223/223 [18:26<00:00,  4.96s/it, accuracy=48.9, loss=3.93]


Saving weights-->
Epoch 13 : Loss = 4.189839440610911 , Training Accuracy=45.00220037896537


Epoch [14/20] : 100%|██████████| 223/223 [18:14<00:00,  4.91s/it, accuracy=25.8, loss=5.43]


Saving weights-->
Epoch 14 : Loss = 4.1783909284480485 , Training Accuracy=44.40167400548276


Epoch [15/20] : 100%|██████████| 223/223 [18:11<00:00,  4.90s/it, accuracy=30.9, loss=5.21]


Saving weights-->
Epoch 15 : Loss = 4.09980836149823 , Training Accuracy=44.66537544866314


Epoch [16/20] : 100%|██████████| 223/223 [18:27<00:00,  4.97s/it, accuracy=23.5, loss=5.69]


Saving weights-->
Epoch 16 : Loss = 3.968940021211256 , Training Accuracy=45.52538209752652


Epoch [17/20] : 100%|██████████| 223/223 [18:26<00:00,  4.96s/it, accuracy=35.8, loss=4.57]


Saving weights-->
Epoch 17 : Loss = 3.9371425431940055 , Training Accuracy=45.18788345405339


Epoch [18/20] : 100%|██████████| 223/223 [18:38<00:00,  5.01s/it, accuracy=27.1, loss=5.8] 


Saving weights-->
Epoch 18 : Loss = 3.8534800717648903 , Training Accuracy=45.54622865685433


Epoch [19/20] : 100%|██████████| 223/223 [18:18<00:00,  4.93s/it, accuracy=28.2, loss=4.97]


Saving weights-->
Epoch 19 : Loss = 3.833954502114266 , Training Accuracy=45.190733366482995


In [37]:
def test_loop(model,dataloader,loss_fun,device):
    model.eval()
    model.to(device)
    losses = []
    samples,correct = 0,0
    loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
    with torch.no_grad():
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)

            # forward pass
            y_pred = model(x,y)
            
            # caclulate test loss
            loss = loss_fun(y_pred,y.reshape(-1))
            losses.append(loss.item())

            # accuracy over entire dataset
            _,predpos=y_pred.max(1)
            samples+=len(y.reshape(-1))
            correct+=(predpos==y.reshape(-1)).sum().item()
            
            # Update TQDM progress bar
            loop.set_postfix(loss=loss.item())

    print("Final Test Accuracy = ",100 * (correct/samples))

In [38]:
test_loop(model,test_loader,loss_fun,device)

100%|██████████| 56/56 [03:03<00:00,  3.28s/it, loss=5.18]

Final Test Accuracy =  48.537226904289014



