In [1]:
!pip3 install torch torchvision torchaudio



In [2]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from torchtext.vocab import GloVe
from spacy.tokenizer import Tokenizer
from sklearn.model_selection import train_test_split
import spacy
import pandas as pd
import numpy as np
import os
import re
from nltk.corpus import stopwords 
import random
from tqdm import tqdm



In [3]:
# Tokenizer using spacy
nlp = spacy.load("en_core_web_sm")
tokenizer = Tokenizer(nlp.vocab)

In [4]:
# Add data from files into dataframe for easier access
def create_dataframe(source_text_path,target_text_path):
    txt_files_source = [file for file in os.listdir(source_text_path) if file.endswith('.txt')]
    txt_files_target = [file for file in os.listdir(target_text_path) if file.endswith('.txt')]
    df = pd.DataFrame(columns=['headlines','text'])
    for source,target in zip(txt_files_source,txt_files_target):
        assert source==target
        source_file_path = os.path.join(source_text_path, source)
        target_file_path = os.path.join(target_text_path, target)
        # Read the content of the file
        with open(source_file_path,'r',encoding='latin-1') as file:
            source_text = file.read()
        with open(target_file_path,'r',encoding='latin-1') as file:
            target_text = file.read()
        df.loc[len(df.index)] = [source_text,target_text]
    return df

In [5]:
# Check accuracy function
def check_accuracy(output,labels):
    _ , predpos = output.max(1)
    num_samples=len(labels)
    num_correct=(predpos==labels).sum()
    return (num_correct/num_samples)*100

# Save checkpoint
def save_checkpoint(state,filename='weights.pth.tar'):
    print('Saving weights-->')
    torch.save(state,filename)

# Load checkpoint
def load_checkpoint(checkpoint,model,optim):
    print('Loading weights-->')
    model.load_state_dict(checkpoint['state_dict'])
    optim.load_state_dict(checkpoint['optimizer'])

In [6]:
df1 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/business","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/business")
df2 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/entertainment","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/entertainment")
df3 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/politics","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/politics")
df4 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/sport","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/sport")
df5 = create_dataframe("/kaggle/input/bbc-news-summary/BBC News Summary/News Articles/tech","/kaggle/input/bbc-news-summary/BBC News Summary/Summaries/tech")

In [7]:
df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)

In [8]:
# Split into train and test sets
df = df.rename(columns = {"headlines":"source_text","text":"summary_text"})
X,Y = df["source_text"],df["summary_text"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
train_df = pd.DataFrame({'source_text': X_train, 'summary_text': Y_train})
test_df = pd.DataFrame({'source_text': X_test, 'summary_text': Y_test})

In [9]:
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",

                           "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",

                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",

                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",

                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",

                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",

                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",

                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",

                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",

                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",

                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",

                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",

                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",

                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",

                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",

                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",

                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",

                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",

                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",

                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",

                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",

                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",

                           "you're": "you are", "you've": "you have"}


stop_words = set(stopwords.words('english'))

In [10]:
def text_cleaner(text):
    newString = text.lower()
    newString = re.sub(r'\([^)]*\)', '', newString)
    newString = re.sub('"','', newString)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])    
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString) 
    tokens = [w for w in newString.split() if not w in stop_words]
    return " ".join(tokens)

In [11]:
# Tokenize and lowercase text using spacy
train_df['source_text'] = train_df['source_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])
train_df['summary_text'] = train_df['summary_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])

test_df['source_text'] = test_df['source_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])
test_df['summary_text'] = test_df['summary_text'].apply(lambda x: [token.text.lower() for token in tokenizer(text_cleaner(x))])

In [12]:
# Add START AND END tokens to summary
train_df['source_text'] = train_df['source_text'].apply(lambda x : ['_START_']+ x + ['_END_'])
train_df['summary_text'] = train_df['summary_text'].apply(lambda x : ['_START_']+ x + ['_END_'])

test_df['source_text'] = test_df['source_text'].apply(lambda x : ['_START_']+ x + ['_END_'])
test_df['summary_text'] = test_df['summary_text'].apply(lambda x : ['_START_']+ x + ['_END_'])

In [13]:
train_df.head()

Unnamed: 0,source_text,summary_text
1490,"[_START_, ferguson, fears, milan, cutting, edg...","[_START_, loss, could, worse, quality, bring, ..."
2001,"[_START_, ask, jeeves, joins, web, log, market...","[_START_, jim, lanzone, vice, president, searc..."
1572,"[_START_, safin, cool, wimbledon, newly, crown...","[_START_, expect, sampras, favourite, pressure..."
1840,"[_START_, mobiles, rack, years, use, mobile, p...","[_START_, cellnet, vodafone, mobile, phone, op..."
610,"[_START_, eminem, secret, gig, venue, revealed...","[_START_, fourth, album, rap, star, sale, two,..."


In [14]:
# Build vocabularies - each word has an index, note : words sorted in ascending order
all_tokens = train_df['source_text'].tolist() + train_df['summary_text'].tolist()
source_vocab = {actual_word: idx for idx, (word_num, actual_word) in enumerate(sorted(enumerate(set(token for tokens in all_tokens for token in tokens)), key=lambda x: x[1]))}
target_vocab = {actual_word: idx for idx, (word_num, actual_word) in enumerate(sorted(enumerate(set(token for tokens in all_tokens for token in tokens)), key=lambda x: x[1]))}

In [15]:
print(all_tokens[10])

['_START_', 'watchdog', 'probes', 'e', 'mail', 'deletions', 'information', 'commissioner', 'says', 'urgently', 'asking', 'details', 'cabinet', 'office', 'orders', 'telling', 'staff', 'delete', 'e', 'mails', 'three', 'months', 'old', 'richard', 'thomas', 'totally', 'condemned', 'deletion', 'e', 'mails', 'prevent', 'disclosure', 'freedom', 'information', 'laws', 'coming', 'force', 'january', 'government', 'guidance', 'said', 'e', 'mails', 'deleted', 'served', 'current', 'purpose', 'mr', 'thomas', 'said', 'tories', 'lib', 'dems', 'questioned', 'timing', 'new', 'rules', 'tory', 'leader', 'michael', 'howard', 'written', 'tony', 'blair', 'demanding', 'explanation', 'new', 'rules', 'e', 'mail', 'retention', 'monday', 'lib', 'dem', 'constitutional', 'affairs', 'committee', 'chairman', 'alan', 'beith', 'warned', 'deletion', 'millions', 'government', 'e', 'mails', 'could', 'harm', 'ability', 'key', 'probes', 'like', 'hutton', 'inquiry', 'timing', 'new', 'rules', 'freedom', 'information', 'act', 

In [16]:
len(source_vocab)

25308

In [17]:
source_vocab == target_vocab

True

In [18]:
temp = list(sorted(source_vocab.items()))
for word, idx in temp[-5:]:
    print(word,idx)

zuluaga 25303
zurich 25304
zutons 25305
zvonareva 25306
zvyagintsev 25307


In [19]:
# Load pretrained GloVe embeddings
global_vectors = GloVe(name='6B', dim=300)

.vector_cache/glove.6B.zip: 862MB [02:38, 5.43MB/s]                              
100%|█████████▉| 399999/400000 [01:07<00:00, 5967.08it/s]


In [20]:
source_vectors = torch.stack([global_vectors.get_vecs_by_tokens(word) for word, idx in sorted(source_vocab.items(), key=lambda x: x[1])])
print(type(source_vectors), source_vectors.shape)

<class 'torch.Tensor'> torch.Size([25308, 300])


In [21]:
'''
The source_vectors is the predefined word to vector mapping we have created from pretrained Glove Embeddings.
We use this as input to the Embedding Layer, which will not be trained from scratch.
Lets say a sentence is passed as input Eg : [658930, 9289283, 2624242, 89798, 53424]
The Embedding layer performs a lookup operation for every word in sentence using the source_vectors. 
and this input of size [1,5] gets converted to [1,5,100] 
'''

'\nThe source_vectors is the predefined word to vector mapping we have created from pretrained Glove Embeddings.\nWe use this as input to the Embedding Layer, which will not be trained from scratch.\nLets say a sentence is passed as input Eg : [658930, 9289283, 2624242, 89798, 53424]\nThe Embedding layer performs a lookup operation for every word in sentence using the source_vectors. \nand this input of size [1,5] gets converted to [1,5,100] \n'

In [22]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [23]:
# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, source_texts, target_summaries, source_vocab, target_vocab):
        self.source_texts = source_texts
        self.target_summaries = target_summaries
        self.source_vocab = source_vocab
        self.target_vocab = target_vocab

    def __len__(self):
        return len(self.source_texts)

    def __getitem__(self, idx):
        source_text = [self.source_vocab[word] for word in self.source_texts[idx]]
        target_summary = [self.target_vocab[word] for word in self.target_summaries[idx]]
        return torch.tensor(source_text), torch.tensor(target_summary)

In [24]:
# Create custom datasets
train_dataset = CustomDataset(train_df['source_text'].tolist(), train_df['summary_text'].tolist(), source_vocab, target_vocab)
test_dataset = CustomDataset(test_df['source_text'].tolist(), test_df['summary_text'].tolist(), source_vocab, target_vocab)

In [25]:
'''
Note : 
In PyTorch, the `collate_fn` parameter in the `DataLoader` can be either a function or an object of a class. Both approaches are valid, and the choice depends on your preference and the complexity of your collation logic.

1. Function as `collate_fn`:
def my_collate_fn(batch):
    # Your custom collation logic here
    return processed_batch
# Use the function with DataLoader
train_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_fn)

2. Class as `collate_fn`:
class MyCollateClass:
    def __call__(self, batch):
        # Your custom collation logic here
        return processed_batch
# Instantiate the class and use it with DataLoader
my_collate_instance = MyCollateClass()
train_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_instance)

Using a class allows you to maintain state between batches if needed, as the class instance retains its state between calls. This can be beneficial if your collation logic requires some persistent information.

The key point is that the `collate_fn` parameter should be a callable (a function or an object with a `__call__` method) that takes a list of batch data and returns the processed batch. The processing typically involves padding sequences, converting data types, or any other necessary steps to prepare the batch for the model.
'''

'\nNote : \nIn PyTorch, the `collate_fn` parameter in the `DataLoader` can be either a function or an object of a class. Both approaches are valid, and the choice depends on your preference and the complexity of your collation logic.\n\n1. Function as `collate_fn`:\ndef my_collate_fn(batch):\n    # Your custom collation logic here\n    return processed_batch\n# Use the function with DataLoader\ntrain_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_fn)\n\n2. Class as `collate_fn`:\nclass MyCollateClass:\n    def __call__(self, batch):\n        # Your custom collation logic here\n        return processed_batch\n# Instantiate the class and use it with DataLoader\nmy_collate_instance = MyCollateClass()\ntrain_loader = DataLoader(dataset, batch_size=64, collate_fn=my_collate_instance)\n\nUsing a class allows you to maintain state between batches if needed, as the class instance retains its state between calls. This can be beneficial if your collation logic requires some pe

In [26]:
# Define collate function for DataLoader
def collate_fn(batch):
    sources, targets = zip(*batch)
    padded_sources = pad_sequence(sources, batch_first=True)
    padded_targets = pad_sequence(targets, batch_first=True)
    return padded_sources, padded_targets

In [27]:
# Define the Encoder Architecture using LSTM
class Encoder(nn.Module):
    def __init__(self, source_vectors, embedding_dim, hidden_dim, n_layers, dropout):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(source_vectors, freeze=False)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, bidirectional=True, dropout=dropout, batch_first=True)

    def forward(self, X):
        # X shape = [Batch_Size X Sequence_Len X 1]
        X = self.embedding(X)
        # X shape = [Batch_Size X Sequence_Len X Embedding_Dim]
        assert X.shape[0]>0 and X.shape[1]>0
        X,(hidden_state,cell_state) = self.lstm(X)
        # X shape = [Batch_Size X Seq_Len X Hidden_Dim] , Hidden_State_Shape = Cell_State_Shape = [Num_Layers X Batch_Size X Hidden_Dim]
        return hidden_state,cell_state

# Define the Decoder Architecture using LSTM
class Decoder(nn.Module):
    def __init__(self, source_vectors, target_vocab_size, embedding_dim, hidden_dim, n_layers, dropout):
        super(Decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.target_vocab_size = target_vocab_size
        self.embedding = nn.Embedding.from_pretrained(source_vectors, freeze=False)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, bidirectional=True, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim*2,target_vocab_size) # bidrectional hence 

    def forward(self, hidden_state, cell_state, Y, force_correction=0.5):
        # Hidden_State_Shape = Cell_State_Shape = [Num_Layers X Batch_Size X Hidden_Dim]
        # Y Shape = [Batch_Size X Sequence_Len]
        
        batch_size,seq_len = Y.shape[0],Y.shape[1]
        outputs = torch.zeros(seq_len,batch_size,self.target_vocab_size,requires_grad=True).to(device) # [Batch_Size X Sequence_Len]
        
        X = Y[:,1]
        # X shape = [Batch_Size X 1]
        for i in range(seq_len):
            X = X.unsqueeze(1) 
            # X shape = [Batch_Size X 1 X 1]
            decoder_input = self.embedding(X)
            # decoder_input_shape = [Batch_Size X 1 X Embedding_Dim]
            assert decoder_input.shape[0]>0 and decoder_input.shape[1]>0
            decoder_output,(hidden_state,cell_state) = self.lstm(decoder_input,(hidden_state,cell_state))
            # Decoder_Output_Shape = [Batch_Size X 1 X Target_Vocab_Size]
            decoder_output = self.fc(decoder_output)
            # Store output
            outputs[i] = decoder_output.permute(1,0,2).squeeze(0)
            _ , indexes = decoder_output.max(dim=2)
            # indexes shape = [Batch_Size X 1]
            indexes = indexes.squeeze(1)
            # use indexes as next input or correct it
            X = indexes if random.random() < 0.5 else Y[:,i]
            # indexes shape = X shape = [Batch_Size]
            
        # Output Shape = [Seq_Len X Batch_Size X Target_Vocab_Size]
        outputs = outputs.permute(1,0,2)
        outputs = outputs.reshape(-1,self.target_vocab_size)
        # Output Shape = [Batch_Size X Seq_Len X Target_Vocab_Size]
        return outputs

In [28]:
class EncDecLSTM(nn.Module):
    def __init__(self,enc,dec):
        super(EncDecLSTM,self).__init__()
        self.enc = enc
        self.dec = dec
        
    def forward(self,X,Y):
        hidden_state,cell_state = self.enc(X)
        output = self.dec(hidden_state,cell_state,Y)
        return output

In [29]:
# Instantiate the model
input_dim = len(source_vocab)
output_dim = len(target_vocab)
learning_rate = 0.001
embedding_dim = 300  
hidden_dim = 512
n_layers = 2
dropout = 0.2
num_epochs = 25
num_workers = 2

encoder = Encoder(source_vectors, embedding_dim, hidden_dim, n_layers, dropout)
decoder = Decoder(source_vectors, output_dim, embedding_dim, hidden_dim, n_layers, dropout)
model = EncDecLSTM(encoder,decoder)
print(model)

EncDecLSTM(
  (enc): Encoder(
    (embedding): Embedding(25308, 300)
    (lstm): LSTM(300, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (dec): Decoder(
    (embedding): Embedding(25308, 300)
    (lstm): LSTM(300, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=25308, bias=True)
  )
)


In [30]:
trainable_params = sum(p.numel() for p in encoder.parameters() if p.requires_grad)
print(trainable_params)

17226192


In [31]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

60393084


In [32]:
# Specify optimizer and loss function
optimizer = optim.Adam(model.parameters(),lr=learning_rate)
loss_fun = nn.CrossEntropyLoss()

In [33]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=num_workers)

In [34]:
source_dummy,target_dummy = next(iter(train_loader))

In [35]:
print(source_dummy.shape,target_dummy.shape)

torch.Size([8, 324]) torch.Size([8, 143])


In [36]:
y_pred = model(source_dummy,target_dummy)
print(y_pred.shape,target_dummy.shape)

torch.Size([1144, 25308]) torch.Size([8, 143])


In [39]:
def train_loop(encoder,decoder,dataloader,loss_fun,optimizer,device):
    encoder.train()
    encoder.to(device)
    decoder.train()
    decoder.to(device)
    min_loss = None
    for epoch in range(num_epochs):
        losses = []
        accuracies = []
        loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)
    
            # forward pass
            y_pred = model(x,y)
            
            # calculate loss & accuracy
            loss = loss_fun(y_pred,y.reshape(-1))
            losses.append(loss.detach().item())
            
            accuracy = check_accuracy(y_pred,y.reshape(-1))
            accuracies.append(accuracy.detach().item())
            
            # zero out prior gradients
            optimizer.zero_grad()
            
            # backprop
            loss.backward()
            
            # update weights
            optimizer.step()
            
            # Update TQDM progress bar
            loop.set_description(f"Epoch [{epoch}/{num_epochs}] ")
            loop.set_postfix(loss=loss.detach().item(), accuracy=accuracy.detach().item())

        moving_loss = sum(losses) / len(losses)
        moving_accuracy = sum(accuracies) / len(accuracies)
        checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
        # Save check point
        if min_loss == None:
            min_loss = moving_loss
            save_checkpoint(checkpoint)
        elif moving_loss < min_loss:
            min_loss = moving_loss
            save_checkpoint(checkpoint)
        print('Epoch {0} : Loss = {1} , Accuracy={2}'.format(epoch, moving_loss, moving_accuracy))

In [40]:
train_loop(encoder,decoder,train_loader,loss_fun,optimizer,device)

Epoch [0/25] : 100%|██████████| 223/223 [05:57<00:00,  1.60s/it, accuracy=36.4, loss=5.32]


Saving weights-->
Epoch 0 : Loss = 4.8169199768203255 , Accuracy=43.79491660627014


Epoch [1/25] : 100%|██████████| 223/223 [05:53<00:00,  1.59s/it, accuracy=51.7, loss=3.85]


Saving weights-->
Epoch 1 : Loss = 4.610716999378974 , Accuracy=44.13662564914857


Epoch [2/25] : 100%|██████████| 223/223 [06:00<00:00,  1.62s/it, accuracy=36.5, loss=5.07]


Saving weights-->
Epoch 2 : Loss = 4.443664215070784 , Accuracy=44.40977132480775


Epoch [3/25] : 100%|██████████| 223/223 [05:59<00:00,  1.61s/it, accuracy=41.8, loss=4.56]


Saving weights-->
Epoch 3 : Loss = 4.279679609521088 , Accuracy=44.82060778835964


Epoch [4/25] : 100%|██████████| 223/223 [06:00<00:00,  1.62s/it, accuracy=28.5, loss=5.35]


Saving weights-->
Epoch 4 : Loss = 4.144228878577194 , Accuracy=45.10568827470856


Epoch [5/25] : 100%|██████████| 223/223 [05:57<00:00,  1.60s/it, accuracy=52.8, loss=3.55]


Saving weights-->
Epoch 5 : Loss = 4.0475257624425165 , Accuracy=44.93337908347091


Epoch [6/25] : 100%|██████████| 223/223 [05:56<00:00,  1.60s/it, accuracy=38.7, loss=4.17]


Saving weights-->
Epoch 6 : Loss = 3.9077368825005845 , Accuracy=45.363040864200336


Epoch [7/25] : 100%|██████████| 223/223 [05:57<00:00,  1.60s/it, accuracy=49.7, loss=3.56]


Saving weights-->
Epoch 7 : Loss = 3.7733583637417163 , Accuracy=45.759215034176954


Epoch [8/25] : 100%|██████████| 223/223 [05:59<00:00,  1.61s/it, accuracy=48.1, loss=3.4] 


Saving weights-->
Epoch 8 : Loss = 3.5881300282585245 , Accuracy=46.86658919980173


Epoch [9/25] : 100%|██████████| 223/223 [05:59<00:00,  1.61s/it, accuracy=34.8, loss=4.39]


Saving weights-->
Epoch 9 : Loss = 3.4644860563791386 , Accuracy=47.372889984883535


Epoch [10/25] : 100%|██████████| 223/223 [05:52<00:00,  1.58s/it, accuracy=25, loss=4.74]  


Saving weights-->
Epoch 10 : Loss = 3.3623503072379415 , Accuracy=47.52821568202545


Epoch [11/25] : 100%|██████████| 223/223 [06:01<00:00,  1.62s/it, accuracy=42.4, loss=3.66]


Saving weights-->
Epoch 11 : Loss = 3.1475155075569323 , Accuracy=49.79815477106069


Epoch [12/25] : 100%|██████████| 223/223 [05:55<00:00,  1.60s/it, accuracy=49.3, loss=3.15]


Saving weights-->
Epoch 12 : Loss = 3.0223503973451966 , Accuracy=50.75443864830941


Epoch [13/25] : 100%|██████████| 223/223 [06:00<00:00,  1.61s/it, accuracy=33.7, loss=4.03]


Saving weights-->
Epoch 13 : Loss = 2.811319390754529 , Accuracy=53.09127147315329


Epoch [14/25] : 100%|██████████| 223/223 [05:55<00:00,  1.59s/it, accuracy=50, loss=2.98]  


Saving weights-->
Epoch 14 : Loss = 2.6896044785666358 , Accuracy=54.140135666714656


Epoch [15/25] : 100%|██████████| 223/223 [06:01<00:00,  1.62s/it, accuracy=58.9, loss=2.46]


Saving weights-->
Epoch 15 : Loss = 2.528768129947474 , Accuracy=56.080495945541315


Epoch [16/25] : 100%|██████████| 223/223 [06:00<00:00,  1.62s/it, accuracy=50.3, loss=2.87]


Saving weights-->
Epoch 16 : Loss = 2.382731790499837 , Accuracy=57.724022116896286


Epoch [17/25] : 100%|██████████| 223/223 [06:02<00:00,  1.63s/it, accuracy=57.2, loss=2.28]


Saving weights-->
Epoch 17 : Loss = 2.216430801447197 , Accuracy=59.89502105370765


Epoch [18/25] : 100%|██████████| 223/223 [05:54<00:00,  1.59s/it, accuracy=45.3, loss=3.15]


Saving weights-->
Epoch 18 : Loss = 2.148166098936791 , Accuracy=60.54665638192352


Epoch [19/25] : 100%|██████████| 223/223 [06:01<00:00,  1.62s/it, accuracy=49.4, loss=2.79]


Saving weights-->
Epoch 19 : Loss = 1.973728955059308 , Accuracy=63.227892238462985


Epoch [20/25] : 100%|██████████| 223/223 [06:02<00:00,  1.63s/it, accuracy=52.7, loss=2.49]


Saving weights-->
Epoch 20 : Loss = 1.863277077140295 , Accuracy=64.87480899143647


Epoch [21/25] : 100%|██████████| 223/223 [06:01<00:00,  1.62s/it, accuracy=76, loss=1.18]   


Saving weights-->
Epoch 21 : Loss = 1.742361314360871 , Accuracy=66.6836456161978


Epoch [22/25] : 100%|██████████| 223/223 [05:56<00:00,  1.60s/it, accuracy=74, loss=1.25]   


Saving weights-->
Epoch 22 : Loss = 1.6295097064009696 , Accuracy=68.6317094195584


Epoch [23/25] : 100%|██████████| 223/223 [05:59<00:00,  1.61s/it, accuracy=64.4, loss=1.77] 


Saving weights-->
Epoch 23 : Loss = 1.5311317812701513 , Accuracy=70.27389476759016


Epoch [24/25] : 100%|██████████| 223/223 [06:03<00:00,  1.63s/it, accuracy=71.7, loss=1.45] 


Saving weights-->
Epoch 24 : Loss = 1.3879474823784934 , Accuracy=72.81760350043463
