In [2]:
!pip3 install torch torchvision torchaudio



In [3]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from torchtext.vocab import GloVe
from spacy.tokenizer import Tokenizer
from sklearn.model_selection import train_test_split
import spacy
import pandas as pd
import numpy as np
import os
import re
from nltk.corpus import stopwords 
import random
from tqdm import tqdm



In [4]:
# Tokenizer using spacy
nlp = spacy.load("en_core_web_sm")
tokenizer = Tokenizer(nlp.vocab)

In [5]:
# Add data from files into dataframe for easier access
def create_dataframe(source_text_path,target_text_path):
    txt_files_source = [file for file in os.listdir(source_text_path) if file.endswith('.txt')]
    txt_files_target = [file for file in os.listdir(target_text_path) if file.endswith('.txt')]
    df = pd.DataFrame(columns=['headlines','text'])
    for source,target in zip(txt_files_source,txt_files_target):
        assert source==target
        source_file_path = os.path.join(source_text_path, source)
        target_file_path = os.path.join(target_text_path, target)
        # Read the content of the file
        with open(source_file_path,'r',encoding='latin-1') as file:
            source_text = file.read()
        with open(target_file_path,'r',encoding='latin-1') as file:
            target_text = file.read()
        df.loc[len(df.index)] = [source_text,target_text]
    return df

In [6]:
# Check accuracy function
def check_accuracy(output,labels):
    _ , predpos = output.max(1)
    num_samples=len(labels)
    num_correct=(predpos==labels).sum()
    return (num_correct/num_samples)*100

# Save checkpoint
def save_checkpoint(state,filename='weights.pth.tar'):
    print('Saving weights-->')
    torch.save(state,filename)

# Load checkpoint
def load_checkpoint(checkpoint,model,optim):
    print('Loading weights-->')
    model.load_state_dict(checkpoint['state_dict'])
    optim.load_state_dict(checkpoint['optimizer'])

In [7]:
df1 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/business","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/business")
df2 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/entertainment","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/entertainment")
df3 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/politics","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/politics")
df4 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/sport","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/sport")
df5 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/tech","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/tech")

In [8]:
df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)

In [9]:
# Split into train and test sets
df = df.rename(columns = {"headlines":"source_text","text":"summary_text"})
X,Y = df["source_text"],df["summary_text"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
train_df = pd.DataFrame({'source_text': X_train, 'summary_text': Y_train})
test_df = pd.DataFrame({'source_text': X_test, 'summary_text': Y_test})

In [10]:
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",

                           "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",

                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",

                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",

                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",

                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",

                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",

                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",

                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",

                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",

                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",

                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",

                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",

                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",

                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",

                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",

                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",

                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",

                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",

                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",

                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",

                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",

                           "you're": "you are", "you've": "you have"}


stop_words = set(stopwords.words('english'))

In [11]:
def text_cleaner(text):
    newString = text.lower()
    newString = re.sub(r'\([^)]*\)', '', newString)
    newString = re.sub('"','', newString)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])    
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString) 
    tokens = [w for w in newString.split() if not w in stop_words]
    return " ".join(tokens)

In [12]:
# Tokenize and lowercase text using spacy
train_df['source_text'] = train_df['source_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])
train_df['summary_text'] = train_df['summary_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])

test_df['source_text'] = test_df['source_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])
test_df['summary_text'] = test_df['summary_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])

In [13]:
# Add START AND END tokens to summary
train_df['source_text'] = train_df['source_text'].apply(lambda x : ['_START_']+ x + ['_END_'])
train_df['summary_text'] = train_df['summary_text'].apply(lambda x : ['_START_']+ x + ['_END_'])

test_df['source_text'] = test_df['source_text'].apply(lambda x : ['_START_']+ x + ['_END_'])
test_df['summary_text'] = test_df['summary_text'].apply(lambda x : ['_START_']+ x + ['_END_'])

In [14]:
train_df.head()

Unnamed: 0,source_text,summary_text
1490,"[_START_, ferguson, fears, milan, cutting, edg...","[_START_, loss, could, worse, quality, bring, ..."
2001,"[_START_, ask, jeeves, joins, web, log, market...","[_START_, jim, lanzone, vice, president, searc..."
1572,"[_START_, safin, cool, wimbledon, newly, crown...","[_START_, expect, sampras, favourite, pressure..."
1840,"[_START_, mobiles, rack, years, use, mobile, p...","[_START_, cellnet, vodafone, mobile, phone, op..."
610,"[_START_, eminem, secret, gig, venue, revealed...","[_START_, fourth, album, rap, star, sale, two,..."


In [15]:
# Build vocabularies - each word has an index, note : words sorted in ascending order
all_tokens = train_df['source_text'].tolist() + train_df['summary_text'].tolist() + test_df['source_text'].tolist() + test_df['summary_text'].tolist()
source_vocab = {actual_word: idx for idx, (word_num, actual_word) in enumerate(sorted(enumerate(set(token for tokens in all_tokens for token in tokens)), key=lambda x: x[1]))}
target_vocab = {actual_word: idx for idx, (word_num, actual_word) in enumerate(sorted(enumerate(set(token for tokens in all_tokens for token in tokens)), key=lambda x: x[1]))}

In [16]:
print(all_tokens[10])

['_START_', 'watchdog', 'probes', 'e', 'mail', 'deletions', 'information', 'commissioner', 'says', 'urgently', 'asking', 'details', 'cabinet', 'office', 'orders', 'telling', 'staff', 'delete', 'e', 'mails', 'three', 'months', 'old', 'richard', 'thomas', 'totally', 'condemned', 'deletion', 'e', 'mails', 'prevent', 'disclosure', 'freedom', 'information', 'laws', 'coming', 'force', 'january', 'government', 'guidance', 'said', 'e', 'mails', 'deleted', 'served', 'current', 'purpose', 'mr', 'thomas', 'said', 'tories', 'lib', 'dems', 'questioned', 'timing', 'new', 'rules', 'tory', 'leader', 'michael', 'howard', 'written', 'tony', 'blair', 'demanding', 'explanation', 'new', 'rules', 'e', 'mail', 'retention', 'monday', 'lib', 'dem', 'constitutional', 'affairs', 'committee', 'chairman', 'alan', 'beith', 'warned', 'deletion', 'millions', 'government', 'e', 'mails', 'could', 'harm', 'ability', 'key', 'probes', 'like', 'hutton', 'inquiry', 'timing', 'new', 'rules', 'freedom', 'information', 'act', 

In [17]:
len(source_vocab)

27638

In [18]:
source_vocab == target_vocab

True

In [19]:
temp = list(sorted(source_vocab.items()))
for word, idx in temp[-5:]:
    print(word,idx)

zuluaga 27633
zurich 27634
zutons 27635
zvonareva 27636
zvyagintsev 27637


In [20]:
# Load pretrained GloVe embeddings
global_vectors = GloVe(name='6B', dim=300)

.vector_cache/glove.6B.zip: 862MB [02:39, 5.39MB/s]                               
100%|█████████▉| 399999/400000 [01:05<00:00, 6068.57it/s]


In [21]:
source_vectors = torch.stack([global_vectors.get_vecs_by_tokens(word) for word, idx in sorted(source_vocab.items(), key=lambda x: x[1])])
print(type(source_vectors), source_vectors.shape)

<class 'torch.Tensor'> torch.Size([27638, 300])


In [22]:
'''
The source_vectors is the predefined word to vector mapping we have created from pretrained Glove Embeddings.
We use this as input to the Embedding Layer, which will not be trained from scratch.
Lets say a sentence is passed as input Eg : [658930, 9289283, 2624242, 89798, 53424]
The Embedding layer performs a lookup operation for every word in sentence using the source_vectors. 
and this input of size [1,5] gets converted to [1,5,100] 
'''

'\nThe source_vectors is the predefined word to vector mapping we have created from pretrained Glove Embeddings.\nWe use this as input to the Embedding Layer, which will not be trained from scratch.\nLets say a sentence is passed as input Eg : [658930, 9289283, 2624242, 89798, 53424]\nThe Embedding layer performs a lookup operation for every word in sentence using the source_vectors. \nand this input of size [1,5] gets converted to [1,5,100] \n'

In [23]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [24]:
# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, source_texts, target_summaries, source_vocab, target_vocab):
        self.source_texts = source_texts
        self.target_summaries = target_summaries
        self.source_vocab = source_vocab
        self.target_vocab = target_vocab

    def __len__(self):
        return len(self.source_texts)

    def __getitem__(self, idx):
        source_text = [self.source_vocab[word] for word in self.source_texts[idx]]
        target_summary = [self.target_vocab[word] for word in self.target_summaries[idx]]
        return torch.tensor(source_text), torch.tensor(target_summary)

In [25]:
# Create custom datasets
train_dataset = CustomDataset(train_df['source_text'].tolist(), train_df['summary_text'].tolist(), source_vocab, target_vocab)
test_dataset = CustomDataset(test_df['source_text'].tolist(), test_df['summary_text'].tolist(), source_vocab, target_vocab)

In [26]:
'''
Note : 
In PyTorch, the `collate_fn` parameter in the `DataLoader` can be either a function or an object of a class. Both approaches are valid, and the choice depends on your preference and the complexity of your collation logic.

1. Function as `collate_fn`:
def my_collate_fn(batch):
    # Your custom collation logic here
    return processed_batch
# Use the function with DataLoader
train_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_fn)

2. Class as `collate_fn`:
class MyCollateClass:
    def __call__(self, batch):
        # Your custom collation logic here
        return processed_batch
# Instantiate the class and use it with DataLoader
my_collate_instance = MyCollateClass()
train_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_instance)

Using a class allows you to maintain state between batches if needed, as the class instance retains its state between calls. This can be beneficial if your collation logic requires some persistent information.

The key point is that the `collate_fn` parameter should be a callable (a function or an object with a `__call__` method) that takes a list of batch data and returns the processed batch. The processing typically involves padding sequences, converting data types, or any other necessary steps to prepare the batch for the model.
'''

'\nNote : \nIn PyTorch, the `collate_fn` parameter in the `DataLoader` can be either a function or an object of a class. Both approaches are valid, and the choice depends on your preference and the complexity of your collation logic.\n\n1. Function as `collate_fn`:\ndef my_collate_fn(batch):\n    # Your custom collation logic here\n    return processed_batch\n# Use the function with DataLoader\ntrain_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_fn)\n\n2. Class as `collate_fn`:\nclass MyCollateClass:\n    def __call__(self, batch):\n        # Your custom collation logic here\n        return processed_batch\n# Instantiate the class and use it with DataLoader\nmy_collate_instance = MyCollateClass()\ntrain_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_instance)\n\nUsing a class allows you to maintain state between batches if needed, as the class instance retains its state between calls. This can be beneficial if your collation logic requires some pe

In [27]:
# Define collate function for DataLoader
def collate_fn(batch):
    sources, targets = zip(*batch)
    padded_sources = pad_sequence(sources, batch_first=True)
    padded_targets = pad_sequence(targets, batch_first=True)
    return padded_sources, padded_targets

In [28]:
# Define the Encoder Architecture using LSTM
class Encoder(nn.Module):
    def __init__(self, source_vectors, embedding_dim, hidden_dim, n_layers, dropout):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(source_vectors, freeze=False)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, bidirectional=True, dropout=dropout, batch_first=True)

    def forward(self, X):
        # X shape = [Batch_Size X Sequence_Len X 1]
        X = self.embedding(X)
        # X shape = [Batch_Size X Sequence_Len X Embedding_Dim]
        assert X.shape[0]>0 and X.shape[1]>0
        X,(hidden_state,cell_state) = self.lstm(X)
        # X shape = [Batch_Size X Seq_Len X Hidden_Dim] , Hidden_State_Shape = Cell_State_Shape = [Num_Layers X Batch_Size X Hidden_Dim]
        return hidden_state,cell_state

# Define the Decoder Architecture using LSTM
class Decoder(nn.Module):
    def __init__(self, source_vectors, target_vocab_size, embedding_dim, hidden_dim, n_layers, dropout):
        super(Decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.target_vocab_size = target_vocab_size
        self.embedding = nn.Embedding.from_pretrained(source_vectors, freeze=False)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, bidirectional=True, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim*2,target_vocab_size) # bidrectional hence 

    def forward(self, hidden_state, cell_state, Y, force_correction=0.5):
        # Hidden_State_Shape = Cell_State_Shape = [Num_Layers X Batch_Size X Hidden_Dim]
        # Y Shape = [Batch_Size X Sequence_Len]
        
        batch_size,seq_len = Y.shape[0],Y.shape[1]
        outputs = torch.zeros(seq_len,batch_size,self.target_vocab_size,requires_grad=True).to(device) # [Batch_Size X Sequence_Len]
        
        X = Y[:,1]
        # X shape = [Batch_Size X 1]
        for i in range(seq_len):
            X = X.unsqueeze(1) 
            # X shape = [Batch_Size X 1 X 1]
            decoder_input = self.embedding(X)
            # decoder_input_shape = [Batch_Size X 1 X Embedding_Dim]
            assert decoder_input.shape[0]>0 and decoder_input.shape[1]>0
            decoder_output,(hidden_state,cell_state) = self.lstm(decoder_input,(hidden_state,cell_state))
            # Decoder_Output_Shape = [Batch_Size X 1 X Target_Vocab_Size]
            decoder_output = self.fc(decoder_output)
            # Store output
            outputs[i] = decoder_output.permute(1,0,2).squeeze(0)
            _ , indexes = decoder_output.max(dim=2)
            # indexes shape = [Batch_Size X 1]
            indexes = indexes.squeeze(1)
            # use indexes as next input or correct it
            X = indexes if random.random() < 0.5 else Y[:,i]
            # indexes shape = X shape = [Batch_Size]
            
        # Output Shape = [Seq_Len X Batch_Size X Target_Vocab_Size]
        outputs = outputs.permute(1,0,2)
        outputs = outputs.reshape(-1,self.target_vocab_size)
        # Output Shape = [Batch_Size X Seq_Len X Target_Vocab_Size]
        return outputs

In [29]:
class EncDecLSTM(nn.Module):
    def __init__(self,enc,dec):
        super(EncDecLSTM,self).__init__()
        self.enc = enc
        self.dec = dec
        
    def forward(self,X,Y):
        hidden_state,cell_state = self.enc(X)
        output = self.dec(hidden_state,cell_state,Y)
        return output

In [30]:
# Instantiate the model
input_dim = len(source_vocab)
output_dim = len(target_vocab)
learning_rate = 0.001
embedding_dim = 300  
hidden_dim = 512
n_layers = 2
dropout = 0.2
num_epochs = 40
num_workers = 3

encoder = Encoder(source_vectors, embedding_dim, hidden_dim, n_layers, dropout)
decoder = Decoder(source_vectors, output_dim, embedding_dim, hidden_dim, n_layers, dropout)
model = EncDecLSTM(encoder,decoder)
print(model)

EncDecLSTM(
  (enc): Encoder(
    (embedding): Embedding(27638, 300)
    (lstm): LSTM(300, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (dec): Decoder(
    (embedding): Embedding(27638, 300)
    (lstm): LSTM(300, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=27638, bias=True)
  )
)


In [31]:
trainable_params = sum(p.numel() for p in encoder.parameters() if p.requires_grad)
print(trainable_params)

17925192


In [32]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

64179334


In [33]:
# Specify optimizer and loss function
optimizer = optim.Adam(model.parameters(),lr=learning_rate)
loss_fun = nn.CrossEntropyLoss()

In [34]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=num_workers)

In [35]:
source_dummy,target_dummy = next(iter(train_loader))

In [36]:
print(source_dummy.shape,target_dummy.shape)

torch.Size([8, 293]) torch.Size([8, 123])


In [37]:
y_pred = model(source_dummy,target_dummy)
print(y_pred.shape,target_dummy.shape)

torch.Size([984, 27638]) torch.Size([8, 123])


In [38]:
def train_loop(model,dataloader,loss_fun,optimizer,device):
    model.train()
    model.to(device)
    min_loss = None
    for epoch in range(num_epochs):
        losses = []
        accuracies = []
        loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)
    
            # forward pass
            y_pred = model(x,y)
            
            # calculate loss & accuracy
            loss = loss_fun(y_pred,y.reshape(-1))
            losses.append(loss.detach().item())
            
            accuracy = check_accuracy(y_pred,y.reshape(-1))
            accuracies.append(accuracy.detach().item())
            
            # zero out prior gradients
            optimizer.zero_grad()
            
            # backprop
            loss.backward()
            
            # update weights
            optimizer.step()
            
            # Update TQDM progress bar
            loop.set_description(f"Epoch [{epoch}/{num_epochs}] ")
            loop.set_postfix(loss=loss.detach().item(), accuracy=accuracy.detach().item())

        moving_loss = sum(losses) / len(losses)
        moving_accuracy = sum(accuracies) / len(accuracies)
        checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
        # Save check point
        if min_loss == None:
            min_loss = moving_loss
            save_checkpoint(checkpoint)
        elif moving_loss < min_loss:
            min_loss = moving_loss
            save_checkpoint(checkpoint)
        print('Epoch {0} : Loss = {1} , Training Accuracy={2}'.format(epoch, moving_loss, moving_accuracy))

In [39]:
train_loop(model,train_loader,loss_fun,optimizer,device)

Epoch [0/40] : 100%|██████████| 223/223 [06:34<00:00,  1.77s/it, accuracy=37, loss=5.52]  


Saving weights-->
Epoch 0 : Loss = 5.170321042762209 , Training Accuracy=43.97506107450066


Epoch [1/40] : 100%|██████████| 223/223 [06:30<00:00,  1.75s/it, accuracy=22.7, loss=6.44]


Saving weights-->
Epoch 1 : Loss = 4.849225168805486 , Training Accuracy=43.63764037572749


Epoch [2/40] : 100%|██████████| 223/223 [06:37<00:00,  1.78s/it, accuracy=41.8, loss=4.97]


Saving weights-->
Epoch 2 : Loss = 4.632201997688533 , Training Accuracy=44.765816957961285


Epoch [3/40] : 100%|██████████| 223/223 [06:32<00:00,  1.76s/it, accuracy=35.4, loss=4.95]


Saving weights-->
Epoch 3 : Loss = 4.5436811730466085 , Training Accuracy=44.25196308939981


Epoch [4/40] : 100%|██████████| 223/223 [06:33<00:00,  1.76s/it, accuracy=19.1, loss=6.61]


Saving weights-->
Epoch 4 : Loss = 4.368399988375437 , Training Accuracy=44.61642882000705


Epoch [5/40] : 100%|██████████| 223/223 [06:36<00:00,  1.78s/it, accuracy=35.7, loss=4.94]


Saving weights-->
Epoch 5 : Loss = 4.226710896855512 , Training Accuracy=45.05840121042568


Epoch [6/40] : 100%|██████████| 223/223 [06:36<00:00,  1.78s/it, accuracy=32.4, loss=5.13]


Saving weights-->
Epoch 6 : Loss = 4.08849782954417 , Training Accuracy=45.490531151604756


Epoch [7/40] : 100%|██████████| 223/223 [06:35<00:00,  1.77s/it, accuracy=37.6, loss=4.5] 


Saving weights-->
Epoch 7 : Loss = 4.009016660296863 , Training Accuracy=45.30049902227427


Epoch [8/40] : 100%|██████████| 223/223 [06:39<00:00,  1.79s/it, accuracy=48.7, loss=3.58]


Saving weights-->
Epoch 8 : Loss = 3.850891118626958 , Training Accuracy=45.94830972303724


Epoch [9/40] : 100%|██████████| 223/223 [06:31<00:00,  1.76s/it, accuracy=36.9, loss=4.81]


Saving weights-->
Epoch 9 : Loss = 3.7660788639778513 , Training Accuracy=45.8383421790974


Epoch [10/40] : 100%|██████████| 223/223 [06:35<00:00,  1.77s/it, accuracy=37.3, loss=4.1] 


Saving weights-->
Epoch 10 : Loss = 3.6013749597318503 , Training Accuracy=46.79680235824243


Epoch [11/40] : 100%|██████████| 223/223 [06:30<00:00,  1.75s/it, accuracy=34.6, loss=4.51]


Saving weights-->
Epoch 11 : Loss = 3.502199738014974 , Training Accuracy=46.942338943481445


Epoch [12/40] : 100%|██████████| 223/223 [06:37<00:00,  1.78s/it, accuracy=37.6, loss=3.9] 


Saving weights-->
Epoch 12 : Loss = 3.315505473603048 , Training Accuracy=48.544774162395115


Epoch [13/40] : 100%|██████████| 223/223 [06:35<00:00,  1.77s/it, accuracy=51.9, loss=3.21]


Saving weights-->
Epoch 13 : Loss = 3.211355582481008 , Training Accuracy=49.20275252816923


Epoch [14/40] : 100%|██████████| 223/223 [06:33<00:00,  1.76s/it, accuracy=18, loss=5.28]  


Saving weights-->
Epoch 14 : Loss = 3.083590590365799 , Training Accuracy=50.22189776672911


Epoch [15/40] : 100%|██████████| 223/223 [06:41<00:00,  1.80s/it, accuracy=28.9, loss=4.03]


Saving weights-->
Epoch 15 : Loss = 2.8759956461431733 , Training Accuracy=52.47948619175385


Epoch [16/40] : 100%|██████████| 223/223 [06:33<00:00,  1.76s/it, accuracy=38.9, loss=3.64]


Saving weights-->
Epoch 16 : Loss = 2.7932772989230306 , Training Accuracy=52.8579196502275


Epoch [17/40] : 100%|██████████| 223/223 [06:35<00:00,  1.77s/it, accuracy=39.7, loss=3.6] 


Saving weights-->
Epoch 17 : Loss = 2.629020556740697 , Training Accuracy=54.76301617472696


Epoch [18/40] : 100%|██████████| 223/223 [06:32<00:00,  1.76s/it, accuracy=43.6, loss=3.27]


Saving weights-->
Epoch 18 : Loss = 2.5090404294531443 , Training Accuracy=56.10496746798802


Epoch [19/40] : 100%|██████████| 223/223 [06:34<00:00,  1.77s/it, accuracy=33.7, loss=4.06]


Saving weights-->
Epoch 19 : Loss = 2.384683176006437 , Training Accuracy=57.53212344592997


Epoch [20/40] : 100%|██████████| 223/223 [06:33<00:00,  1.76s/it, accuracy=44.3, loss=3.18]


Saving weights-->
Epoch 20 : Loss = 2.2596387109414344 , Training Accuracy=59.085018551403095


Epoch [21/40] : 100%|██████████| 223/223 [06:36<00:00,  1.78s/it, accuracy=63.8, loss=1.97]


Saving weights-->
Epoch 21 : Loss = 2.1207772447923907 , Training Accuracy=61.06597166531824


Epoch [22/40] : 100%|██████████| 223/223 [06:36<00:00,  1.78s/it, accuracy=71, loss=1.51]  


Saving weights-->
Epoch 22 : Loss = 1.9910062578226952 , Training Accuracy=62.91295060983153


Epoch [23/40] : 100%|██████████| 223/223 [06:30<00:00,  1.75s/it, accuracy=70.7, loss=1.51]


Saving weights-->
Epoch 23 : Loss = 1.9163068928526121 , Training Accuracy=63.903144015325026


Epoch [24/40] : 100%|██████████| 223/223 [06:36<00:00,  1.78s/it, accuracy=59.4, loss=2.09]


Saving weights-->
Epoch 24 : Loss = 1.77123946753318 , Training Accuracy=66.36614737061642


Epoch [25/40] : 100%|██████████| 223/223 [06:34<00:00,  1.77s/it, accuracy=51.7, loss=2.76] 


Saving weights-->
Epoch 25 : Loss = 1.6692508320102777 , Training Accuracy=68.03035847274712


Epoch [26/40] : 100%|██████████| 223/223 [06:35<00:00,  1.78s/it, accuracy=65.8, loss=1.78] 


Saving weights-->
Epoch 26 : Loss = 1.570689267374475 , Training Accuracy=69.61465753461214


Epoch [27/40] : 100%|██████████| 223/223 [06:37<00:00,  1.78s/it, accuracy=66.1, loss=1.83] 


Saving weights-->
Epoch 27 : Loss = 1.4570283114642841 , Training Accuracy=71.71426406997202


Epoch [28/40] : 100%|██████████| 223/223 [06:33<00:00,  1.77s/it, accuracy=63.9, loss=1.93] 


Saving weights-->
Epoch 28 : Loss = 1.3711293045715367 , Training Accuracy=73.25767761709444


Epoch [29/40] : 100%|██████████| 223/223 [06:33<00:00,  1.77s/it, accuracy=64.4, loss=1.69] 


Saving weights-->
Epoch 29 : Loss = 1.2909817217176804 , Training Accuracy=74.79665391862125


Epoch [30/40] : 100%|██████████| 223/223 [06:32<00:00,  1.76s/it, accuracy=76.2, loss=1.23] 


Saving weights-->
Epoch 30 : Loss = 1.1990727996077772 , Training Accuracy=76.47885898196644


Epoch [31/40] : 100%|██████████| 223/223 [06:34<00:00,  1.77s/it, accuracy=80.5, loss=0.91] 


Saving weights-->
Epoch 31 : Loss = 1.0856732222264123 , Training Accuracy=78.61492155271796


Epoch [32/40] : 100%|██████████| 223/223 [06:36<00:00,  1.78s/it, accuracy=82.5, loss=0.864]


Saving weights-->
Epoch 32 : Loss = 0.9867342506021662 , Training Accuracy=80.71287018301241


Epoch [33/40] : 100%|██████████| 223/223 [06:33<00:00,  1.76s/it, accuracy=77.2, loss=1.24] 


Saving weights-->
Epoch 33 : Loss = 0.8960566534055188 , Training Accuracy=82.60748629719687


Epoch [34/40] : 100%|██████████| 223/223 [06:34<00:00,  1.77s/it, accuracy=71.9, loss=1.43] 


Saving weights-->
Epoch 34 : Loss = 0.791395855190508 , Training Accuracy=84.72569268059837


Epoch [35/40] : 100%|██████████| 223/223 [06:30<00:00,  1.75s/it, accuracy=87.2, loss=0.638]


Saving weights-->
Epoch 35 : Loss = 0.7154198688509218 , Training Accuracy=86.27419099679442


Epoch [36/40] : 100%|██████████| 223/223 [06:35<00:00,  1.78s/it, accuracy=83.8, loss=0.808]


Saving weights-->
Epoch 36 : Loss = 0.6269414965480967 , Training Accuracy=88.19202601000868


Epoch [37/40] : 100%|██████████| 223/223 [06:37<00:00,  1.78s/it, accuracy=92.2, loss=0.553]


Saving weights-->
Epoch 37 : Loss = 0.5549693262296407 , Training Accuracy=89.72148234961813


Epoch [38/40] : 100%|██████████| 223/223 [06:35<00:00,  1.77s/it, accuracy=91.2, loss=0.499]


Saving weights-->
Epoch 38 : Loss = 0.47322837781211186 , Training Accuracy=91.44645198257514


Epoch [39/40] : 100%|██████████| 223/223 [06:35<00:00,  1.77s/it, accuracy=92.9, loss=0.414] 


Saving weights-->
Epoch 39 : Loss = 0.40501274188537767 , Training Accuracy=92.88154673897097


In [54]:
def test_loop(model,dataloader,loss_fun,device):
    model.eval()
    model.to(device)
    losses = []
    samples,correct = 0,0
    loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
    with torch.no_grad():
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)

            # forward pass
            y_pred = model(x,y)
            
            # caclulate test loss
            loss = loss_fun(y_pred,y.reshape(-1))
            losses.append(loss.item())

            # accuracy over entire dataset
            _,predpos=y_pred.max(1)
            samples+=len(y.reshape(-1))
            correct+=(predpos==y.reshape(-1)).sum().item()
            
            # Update TQDM progress bar
            loop.set_postfix(loss=loss.item())

    print("Final Test Accuracy = ",100 * (correct/samples))

In [55]:
test_loop(model,test_loader,loss_fun,device)

100%|██████████| 56/56 [00:22<00:00,  2.52it/s, loss=7.08]

Final Test Accuracy =  54.5849563703958



