In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
import random
import numpy as np
import tqdm
import math
from torch.autograd import Variable

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def build_dictionary(dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary
    
full_dictionary_location = "words_250000_train.txt"
full_dictionary = build_dictionary(full_dictionary_location)

In [3]:
class Dict_Dataset(Dataset):
    def __init__(self, dictionary):
        self.words = dictionary
        # Create a mapping from each letter to its corresponding index (0-25)
        self.alphabets = 'abcdefghijklmnopqrstuvwxyz'
        self.CHAR_TO_INDEX = {char: idx for idx, char in enumerate(self.alphabets)}

        self.letter_weight = {}
        for i in dictionary:
            for l in self.alphabets:
                if l in i:
                    if self.letter_weight.get(l):
                        self.letter_weight[l] += 1
                    else:
                        self.letter_weight[l] = 1

    def __len__(self):
        return len(self.words)
    
    def cnt_to_guesses(self,char_set,cnt):
        lst = list(char_set)
        return list(np.random.choice(lst, cnt, p=[self.letter_weight[i] for i in lst]/np.sum([self.letter_weight[i] for i in lst]), replace=False))
    
    def one_hot_encode(self,char):
        """Convert a character to a one-hot vector."""
        vec = torch.zeros(28)
        vec[self.CHAR_TO_INDEX[char]] = 1.0
        return vec

    def word_to_matrix(self,word, correct_guesses,wrong_guesses):
        word = word.lower()  # Ensure the word is lowercase
        matrix = torch.zeros(38, 28)  # Initialize a (38, 28) matrix with zeros 27,28 digits for depicting blanks and wrong vector respect.
        # matrix[:len(word),26]=1  ## depicting word length in our sequence
        # print(matrix)
        for i, char in enumerate(word):
            if char in correct_guesses:
                # print(char)
                matrix[i] = self.one_hot_encode(char)
        # print('rssa')
        for i, char in enumerate(wrong_guesses):
            # print(char)
            matrix[32+i] = self.one_hot_encode(char)  
            matrix[32+i,27]=1
        matrix[:len(word),26]=1  ## depicting word length in our sequence
        return matrix

    def multi_encode(self,set_char):
        string = ''.join(set_char)
        vec = torch.zeros(26)
        for char in string:
            vec[self.CHAR_TO_INDEX[char]] = 1.0
        return vec
    
    def __getitem__(self, idx):
        wrd = self.words[idx]
        set_alpha = set(wrd)

        if len(set_alpha)==1:
            return self.word_to_matrix( wrd, '', '' ), self.multi_encode(set_alpha)
        
        if 'e' in set_alpha:
            cnt_correct_guess = np.random.randint(len(set_alpha)-1) ## -1 for e, -1 for atleast one unguessed
            cnt_incorrect_guess = np.random.randint(6) ## 0 to 5
            correct_guesses = self.cnt_to_guesses(set_alpha-set('e'),cnt_correct_guess)
            wrong_guesses = self.cnt_to_guesses(set(self.alphabets)-set_alpha,cnt_incorrect_guess)
            return self.word_to_matrix( wrd, ''.join(correct_guesses)+'e', ''.join(wrong_guesses) ), self.multi_encode(set_alpha - set(''.join(correct_guesses)+'e'))
        
        elif 'a' in set_alpha:
            cnt_correct_guess = np.random.randint(len(set_alpha)-1) ## -1 for a, -1 for atleast one unguessed
            cnt_incorrect_guess = np.random.randint(5) ## 0 to 4 , one for 'e'
            correct_guesses = self.cnt_to_guesses(set_alpha-set('a'),cnt_correct_guess)
            wrong_guesses = self.cnt_to_guesses(set(self.alphabets)-set_alpha-set('e'),cnt_incorrect_guess)
            return self.word_to_matrix( wrd, ''.join(correct_guesses)+'a', ''.join(wrong_guesses)+'e' ), self.multi_encode(set_alpha - set(''.join(correct_guesses)+'a'))
        
        elif 'i' in set_alpha:
            cnt_correct_guess = np.random.randint(len(set_alpha)-1) ## -1 for i, -1 for atleast one unguessed
            cnt_incorrect_guess = np.random.randint(4) ## 0 to 3 , two for 'e','a'
            correct_guesses = self.cnt_to_guesses(set_alpha-set('i'),cnt_correct_guess)
            wrong_guesses = self.cnt_to_guesses(set(self.alphabets)-set_alpha-set('ea'),cnt_incorrect_guess)
            return self.word_to_matrix( wrd, ''.join(correct_guesses)+'i', ''.join(wrong_guesses)+'ea' ), self.multi_encode(set_alpha - set(''.join(correct_guesses)+'i'))
        
        elif 'o' in set_alpha:
            cnt_correct_guess = np.random.randint(len(set_alpha)-1) ## -1 for o, -1 for atleast one unguessed
            cnt_incorrect_guess = np.random.randint(3) ## 0 to 2 , three for 'e','a','i'
            correct_guesses = self.cnt_to_guesses(set_alpha-set('o'),cnt_correct_guess)
            wrong_guesses = self.cnt_to_guesses(set(self.alphabets)-set_alpha-set('eai'),cnt_incorrect_guess)
            return self.word_to_matrix( wrd, ''.join(correct_guesses)+'o', ''.join(wrong_guesses)+'eai' ), self.multi_encode(set_alpha - set(''.join(correct_guesses)+'o'))
        
        else:
            return self.word_to_matrix( wrd, '', 'eaio' ), self.multi_encode(set_alpha)


In [4]:
dataset = Dict_Dataset(full_dictionary)

In [5]:
class LSTMClassifier(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length,device):
		super(LSTMClassifier, self).__init__()
		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_length = embedding_length
		self.device = device
		
		self.word_embeddings = nn.Linear(vocab_size, embedding_length)
		# self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) 
		self.lstm = nn.LSTM(embedding_length, hidden_size) # Our main hero for this tutorial
		self.label = nn.Linear(hidden_size, output_size)
		
	def forward(self, input_sentence, batch_size=None):
		input = self.word_embeddings(input_sentence) 
		input = input.permute(1, 0, 2) 
		if batch_size is None:
			h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).to(self.device)) 
			c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).to(self.device)) 
		else:
			h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).to(self.device))
			c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).to(self.device))
		output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
		final_output = self.label(final_hidden_state[-1]) 
		
		return final_output

In [6]:
def clip_gradient(model, clip_value):
    params = list(filter(lambda p: p.grad is not None, model.parameters()))
    for p in params:
        p.grad.data.clamp_(-clip_value, clip_value)

In [8]:
# Hyperparameters
input_dim = 28  # Input size of each sequence element
seq_len = 38    # Sequence length
num_classes = 26  # Number of classes
embed_size = 48  # Embedding size
num_heads = 4    # Number of heads in multi-head attention
hidden_dim = 196  # Hidden dimension size in the feedforward layer
num_layers = 4  # Number of Transformer Encoder layers
dropout = 0.1    # Dropout rate
batch_size = 32

# Create a dataset and data loader
dataset = Dict_Dataset(full_dictionary)
train_set, val_set = torch.utils.data.random_split(dataset, [0.8, 0.2])

train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True,num_workers=6,drop_last=True)
val_dataloader = DataLoader(val_set, batch_size=batch_size, shuffle=True,num_workers=4,drop_last=True)
device = 'cuda:2'
# Initialize the model, loss function, and optimizer
model = LSTMClassifier(batch_size, num_classes, hidden_dim, input_dim, embed_size,device)
# model.load_state_dict(torch.load('models/lstm_best'))

model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
max_epoch_loss = 99
num_epochs = 150
flag=0
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm.tqdm(train_dataloader):
        optimizer.zero_grad()
        outputs = model(inputs.to(device))
        # print(outputs)
        # stop
        loss = criterion(outputs.cpu(), labels)
        loss.backward()
        clip_gradient(model, 1e-1)
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(dataset)
    if epoch_loss < max_epoch_loss:
        flag = 1
        max_epoch_loss = epoch_loss
        torch.save(model.state_dict(), f'models/lstm_2_{epoch}')
        torch.save(model.state_dict(), f'models/lstm_2_best')
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm.tqdm(val_dataloader):
            outputs = model(inputs.to(device)).cpu()
            # print(torch.argmax(outputs.data, 1))
            # print(labels)
            predicted = torch.argmax(outputs.data, 1)
            total += labels.size(0)
            # print(torch.sum(torch.Tensor([labels[ind,i] for ind,i in enumerate(predicted)])))
            correct += torch.sum(torch.Tensor([labels[ind,i] for ind,i in enumerate(predicted)])).item()
    
    accuracy = 100 * correct / total
    if flag==1:
        flag=0
        print(f'Accuracy: {accuracy:.2f}%')


print("Training complete.")


100%|██████████| 7103/7103 [01:15<00:00, 94.53it/s] 


Epoch [1/150], Loss: 0.3268


100%|██████████| 1420/1420 [00:09<00:00, 150.46it/s]


Accuracy: 54.63%


100%|██████████| 7103/7103 [01:16<00:00, 92.92it/s] 


Epoch [2/150], Loss: 0.2747


100%|██████████| 1420/1420 [00:10<00:00, 134.53it/s]


Accuracy: 60.24%


100%|██████████| 7103/7103 [01:15<00:00, 93.53it/s] 


Epoch [3/150], Loss: 0.2625


100%|██████████| 1420/1420 [00:10<00:00, 134.22it/s]


Accuracy: 62.96%


100%|██████████| 7103/7103 [01:17<00:00, 92.08it/s] 


Epoch [4/150], Loss: 0.2564


100%|██████████| 1420/1420 [00:11<00:00, 118.88it/s]


Accuracy: 64.39%


100%|██████████| 7103/7103 [01:14<00:00, 95.70it/s] 


Epoch [5/150], Loss: 0.2539


100%|██████████| 1420/1420 [00:10<00:00, 141.36it/s]


Accuracy: 65.36%


100%|██████████| 7103/7103 [01:14<00:00, 95.42it/s] 


Epoch [6/150], Loss: 0.2516


100%|██████████| 1420/1420 [00:10<00:00, 138.67it/s]


Accuracy: 65.78%


100%|██████████| 7103/7103 [01:15<00:00, 93.95it/s] 


Epoch [7/150], Loss: 0.2496


100%|██████████| 1420/1420 [00:10<00:00, 133.58it/s]


Accuracy: 65.77%


100%|██████████| 7103/7103 [01:11<00:00, 98.90it/s] 


Epoch [8/150], Loss: 0.2487


100%|██████████| 1420/1420 [00:10<00:00, 141.71it/s]


Accuracy: 66.59%


100%|██████████| 7103/7103 [01:13<00:00, 96.65it/s] 


Epoch [9/150], Loss: 0.2471


100%|██████████| 1420/1420 [00:09<00:00, 145.48it/s]


Accuracy: 67.29%


100%|██████████| 7103/7103 [01:16<00:00, 92.73it/s] 


Epoch [10/150], Loss: 0.2464


100%|██████████| 1420/1420 [00:10<00:00, 139.10it/s]


Accuracy: 67.71%


100%|██████████| 7103/7103 [01:22<00:00, 85.62it/s] 


Epoch [11/150], Loss: 0.2457


100%|██████████| 1420/1420 [00:10<00:00, 133.31it/s]


Accuracy: 67.22%


100%|██████████| 7103/7103 [01:13<00:00, 96.51it/s] 


Epoch [12/150], Loss: 0.2446


100%|██████████| 1420/1420 [00:09<00:00, 144.16it/s]


Accuracy: 67.39%


100%|██████████| 7103/7103 [01:17<00:00, 91.48it/s] 


Epoch [13/150], Loss: 0.2440


100%|██████████| 1420/1420 [00:09<00:00, 148.62it/s]


Accuracy: 68.07%


100%|██████████| 7103/7103 [01:18<00:00, 89.97it/s] 


Epoch [14/150], Loss: 0.2434


100%|██████████| 1420/1420 [00:11<00:00, 128.34it/s]


Accuracy: 68.07%


100%|██████████| 7103/7103 [01:22<00:00, 85.94it/s] 


Epoch [15/150], Loss: 0.2431


100%|██████████| 1420/1420 [00:11<00:00, 123.05it/s]


Accuracy: 68.30%


100%|██████████| 7103/7103 [01:24<00:00, 83.70it/s] 


Epoch [16/150], Loss: 0.2430


100%|██████████| 1420/1420 [00:11<00:00, 119.02it/s]


Accuracy: 68.30%


100%|██████████| 7103/7103 [02:21<00:00, 50.29it/s] 


Epoch [17/150], Loss: 0.2422


100%|██████████| 1420/1420 [00:22<00:00, 63.99it/s]


Accuracy: 68.48%


100%|██████████| 7103/7103 [02:26<00:00, 48.64it/s]


Epoch [18/150], Loss: 0.2417


100%|██████████| 1420/1420 [00:19<00:00, 74.36it/s] 


Accuracy: 68.96%


100%|██████████| 7103/7103 [01:32<00:00, 76.83it/s] 


Epoch [19/150], Loss: 0.2416


100%|██████████| 1420/1420 [00:10<00:00, 135.69it/s]


Accuracy: 68.91%


100%|██████████| 7103/7103 [01:28<00:00, 80.45it/s] 


Epoch [20/150], Loss: 0.2411


100%|██████████| 1420/1420 [00:11<00:00, 124.71it/s]


Accuracy: 68.84%


100%|██████████| 7103/7103 [01:27<00:00, 80.94it/s] 


Epoch [21/150], Loss: 0.2410


100%|██████████| 1420/1420 [00:10<00:00, 133.65it/s]


Accuracy: 68.63%


100%|██████████| 7103/7103 [01:25<00:00, 82.79it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 141.28it/s]
100%|██████████| 7103/7103 [01:14<00:00, 95.11it/s] 


Epoch [23/150], Loss: 0.2406


100%|██████████| 1420/1420 [00:08<00:00, 167.34it/s]


Accuracy: 68.85%


100%|██████████| 7103/7103 [01:06<00:00, 106.67it/s]


Epoch [24/150], Loss: 0.2397


100%|██████████| 1420/1420 [00:08<00:00, 171.45it/s]


Accuracy: 69.27%


100%|██████████| 7103/7103 [01:04<00:00, 109.37it/s]
100%|██████████| 1420/1420 [00:08<00:00, 171.18it/s]
100%|██████████| 7103/7103 [01:08<00:00, 103.55it/s]
100%|██████████| 1420/1420 [00:08<00:00, 173.03it/s]
100%|██████████| 7103/7103 [01:08<00:00, 103.85it/s]


Epoch [27/150], Loss: 0.2395


100%|██████████| 1420/1420 [00:08<00:00, 158.87it/s]


Accuracy: 69.50%


100%|██████████| 7103/7103 [01:11<00:00, 98.94it/s] 
100%|██████████| 1420/1420 [00:08<00:00, 161.66it/s]
100%|██████████| 7103/7103 [01:10<00:00, 100.60it/s]
100%|██████████| 1420/1420 [00:08<00:00, 170.88it/s]
100%|██████████| 7103/7103 [01:07<00:00, 104.80it/s]


Epoch [30/150], Loss: 0.2390


100%|██████████| 1420/1420 [00:08<00:00, 169.61it/s]


Accuracy: 69.84%


100%|██████████| 7103/7103 [01:11<00:00, 100.01it/s]


Epoch [31/150], Loss: 0.2389


100%|██████████| 1420/1420 [00:11<00:00, 124.41it/s]


Accuracy: 69.61%


100%|██████████| 7103/7103 [01:28<00:00, 80.49it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 137.23it/s]
100%|██████████| 7103/7103 [01:31<00:00, 77.81it/s] 


Epoch [33/150], Loss: 0.2386


100%|██████████| 1420/1420 [00:11<00:00, 125.00it/s]


Accuracy: 69.63%


100%|██████████| 7103/7103 [01:31<00:00, 77.91it/s] 


Epoch [34/150], Loss: 0.2386


100%|██████████| 1420/1420 [00:11<00:00, 121.94it/s]


Accuracy: 69.72%


100%|██████████| 7103/7103 [01:29<00:00, 79.02it/s] 
100%|██████████| 1420/1420 [00:11<00:00, 128.96it/s]
100%|██████████| 7103/7103 [01:34<00:00, 74.93it/s] 


Epoch [36/150], Loss: 0.2380


100%|██████████| 1420/1420 [00:12<00:00, 109.32it/s]


Accuracy: 69.71%


100%|██████████| 7103/7103 [01:29<00:00, 79.63it/s] 
100%|██████████| 1420/1420 [00:11<00:00, 125.46it/s]
100%|██████████| 7103/7103 [01:33<00:00, 76.31it/s] 


Epoch [38/150], Loss: 0.2377


100%|██████████| 1420/1420 [00:12<00:00, 115.36it/s]


Accuracy: 69.59%


100%|██████████| 7103/7103 [01:35<00:00, 74.23it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 111.49it/s]
100%|██████████| 7103/7103 [01:34<00:00, 75.00it/s] 


Epoch [40/150], Loss: 0.2375


100%|██████████| 1420/1420 [00:12<00:00, 116.45it/s]


Accuracy: 69.93%


100%|██████████| 7103/7103 [01:28<00:00, 80.14it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 130.00it/s]
100%|██████████| 7103/7103 [01:31<00:00, 77.22it/s] 


Epoch [42/150], Loss: 0.2374


100%|██████████| 1420/1420 [00:11<00:00, 124.04it/s]


Accuracy: 69.65%


100%|██████████| 7103/7103 [01:31<00:00, 77.34it/s] 


Epoch [43/150], Loss: 0.2373


100%|██████████| 1420/1420 [00:11<00:00, 124.82it/s]


Accuracy: 70.11%


100%|██████████| 7103/7103 [00:56<00:00, 124.82it/s]


Epoch [44/150], Loss: 0.2372


100%|██████████| 1420/1420 [00:06<00:00, 218.99it/s]


Accuracy: 70.03%


100%|██████████| 7103/7103 [00:48<00:00, 146.43it/s]


Epoch [45/150], Loss: 0.2371


100%|██████████| 1420/1420 [00:06<00:00, 232.72it/s]


Accuracy: 69.89%


100%|██████████| 7103/7103 [00:47<00:00, 149.74it/s]
100%|██████████| 1420/1420 [00:09<00:00, 145.64it/s]
100%|██████████| 7103/7103 [00:47<00:00, 149.61it/s]
100%|██████████| 1420/1420 [00:06<00:00, 216.03it/s]
100%|██████████| 7103/7103 [00:46<00:00, 153.46it/s]


Epoch [48/150], Loss: 0.2368


100%|██████████| 1420/1420 [00:05<00:00, 245.79it/s]


Accuracy: 69.88%


100%|██████████| 7103/7103 [00:49<00:00, 143.83it/s]


Epoch [49/150], Loss: 0.2366


100%|██████████| 1420/1420 [00:09<00:00, 145.58it/s]


Accuracy: 70.62%


100%|██████████| 7103/7103 [01:19<00:00, 89.71it/s] 
100%|██████████| 1420/1420 [00:11<00:00, 122.45it/s]
100%|██████████| 7103/7103 [01:23<00:00, 85.36it/s] 


Epoch [51/150], Loss: 0.2362


100%|██████████| 1420/1420 [00:10<00:00, 140.30it/s]


Accuracy: 70.22%


100%|██████████| 7103/7103 [01:23<00:00, 84.77it/s] 
100%|██████████| 1420/1420 [00:11<00:00, 126.75it/s]
100%|██████████| 7103/7103 [01:23<00:00, 84.83it/s] 
100%|██████████| 1420/1420 [00:11<00:00, 124.63it/s]
100%|██████████| 7103/7103 [01:25<00:00, 82.93it/s] 


Epoch [54/150], Loss: 0.2361


100%|██████████| 1420/1420 [00:11<00:00, 124.10it/s]


Accuracy: 70.57%


100%|██████████| 7103/7103 [01:24<00:00, 84.37it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 134.76it/s]
100%|██████████| 7103/7103 [01:24<00:00, 84.05it/s] 


Epoch [56/150], Loss: 0.2360


100%|██████████| 1420/1420 [00:11<00:00, 124.86it/s]


Accuracy: 70.30%


100%|██████████| 7103/7103 [01:22<00:00, 86.05it/s] 
100%|██████████| 1420/1420 [00:11<00:00, 124.11it/s]
100%|██████████| 7103/7103 [01:23<00:00, 85.10it/s] 


Epoch [58/150], Loss: 0.2358


100%|██████████| 1420/1420 [00:11<00:00, 124.60it/s]


Accuracy: 70.44%


100%|██████████| 7103/7103 [01:26<00:00, 82.10it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 134.23it/s]
100%|██████████| 7103/7103 [01:22<00:00, 86.38it/s] 


Epoch [60/150], Loss: 0.2356


100%|██████████| 1420/1420 [00:10<00:00, 134.36it/s]


Accuracy: 70.40%


100%|██████████| 7103/7103 [01:26<00:00, 82.51it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 112.70it/s]
100%|██████████| 7103/7103 [01:33<00:00, 75.79it/s] 


Epoch [62/150], Loss: 0.2355


100%|██████████| 1420/1420 [00:11<00:00, 128.86it/s]


Accuracy: 70.40%


100%|██████████| 7103/7103 [01:38<00:00, 72.07it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 115.25it/s]
100%|██████████| 7103/7103 [01:39<00:00, 71.56it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 113.57it/s]
100%|██████████| 7103/7103 [01:32<00:00, 76.47it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 118.01it/s]
100%|██████████| 7103/7103 [01:42<00:00, 69.27it/s] 


Epoch [66/150], Loss: 0.2354


100%|██████████| 1420/1420 [00:13<00:00, 107.77it/s]


Accuracy: 70.66%


100%|██████████| 7103/7103 [01:35<00:00, 74.52it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 114.78it/s]
100%|██████████| 7103/7103 [01:40<00:00, 70.49it/s] 


Epoch [68/150], Loss: 0.2352


100%|██████████| 1420/1420 [00:14<00:00, 99.99it/s] 


Accuracy: 70.51%


100%|██████████| 7103/7103 [01:42<00:00, 69.37it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 105.21it/s]
100%|██████████| 7103/7103 [01:33<00:00, 75.73it/s] 
100%|██████████| 1420/1420 [00:07<00:00, 185.13it/s]
100%|██████████| 7103/7103 [01:09<00:00, 101.77it/s]


Epoch [71/150], Loss: 0.2352


100%|██████████| 1420/1420 [00:07<00:00, 184.65it/s]


Accuracy: 70.88%


100%|██████████| 7103/7103 [01:06<00:00, 106.27it/s]


Epoch [72/150], Loss: 0.2352


100%|██████████| 1420/1420 [00:07<00:00, 186.22it/s]


Accuracy: 70.65%


100%|██████████| 7103/7103 [01:04<00:00, 110.96it/s]


Epoch [73/150], Loss: 0.2351


100%|██████████| 1420/1420 [00:07<00:00, 188.56it/s]


Accuracy: 70.84%


100%|██████████| 7103/7103 [01:04<00:00, 109.34it/s]
100%|██████████| 1420/1420 [00:08<00:00, 166.55it/s]
100%|██████████| 7103/7103 [01:04<00:00, 110.44it/s]
100%|██████████| 1420/1420 [00:08<00:00, 158.45it/s]
100%|██████████| 7103/7103 [01:08<00:00, 103.93it/s]


Epoch [76/150], Loss: 0.2348


100%|██████████| 1420/1420 [00:09<00:00, 144.65it/s]


Accuracy: 70.72%


100%|██████████| 7103/7103 [01:06<00:00, 106.40it/s]


Epoch [77/150], Loss: 0.2347


100%|██████████| 1420/1420 [00:09<00:00, 152.84it/s]


Accuracy: 70.81%


100%|██████████| 7103/7103 [01:17<00:00, 91.96it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 135.69it/s]
100%|██████████| 7103/7103 [01:21<00:00, 87.21it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 141.96it/s]
100%|██████████| 7103/7103 [01:21<00:00, 86.89it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 138.89it/s]
100%|██████████| 7103/7103 [01:20<00:00, 88.06it/s] 
100%|██████████| 1420/1420 [00:09<00:00, 145.48it/s]
100%|██████████| 7103/7103 [01:20<00:00, 88.47it/s] 
100%|██████████| 1420/1420 [00:09<00:00, 142.87it/s]
100%|██████████| 7103/7103 [01:21<00:00, 86.68it/s] 


Epoch [83/150], Loss: 0.2346


100%|██████████| 1420/1420 [00:12<00:00, 111.11it/s]


Accuracy: 70.59%


100%|██████████| 7103/7103 [01:33<00:00, 76.13it/s] 


Epoch [84/150], Loss: 0.2345


100%|██████████| 1420/1420 [00:13<00:00, 105.82it/s]


Accuracy: 70.90%


100%|██████████| 7103/7103 [01:39<00:00, 71.62it/s] 


Epoch [85/150], Loss: 0.2342


100%|██████████| 1420/1420 [00:11<00:00, 123.53it/s]


Accuracy: 70.83%


100%|██████████| 7103/7103 [01:31<00:00, 77.96it/s] 
100%|██████████| 1420/1420 [00:09<00:00, 149.55it/s]
100%|██████████| 7103/7103 [01:21<00:00, 87.63it/s] 
100%|██████████| 1420/1420 [00:10<00:00, 136.64it/s]
100%|██████████| 7103/7103 [01:39<00:00, 71.24it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 111.47it/s]
100%|██████████| 7103/7103 [01:46<00:00, 66.54it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 104.64it/s]
100%|██████████| 7103/7103 [01:42<00:00, 69.54it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 103.33it/s]
100%|██████████| 7103/7103 [01:48<00:00, 65.28it/s] 


Epoch [91/150], Loss: 0.2340


100%|██████████| 1420/1420 [00:13<00:00, 107.75it/s]


Accuracy: 70.69%


100%|██████████| 7103/7103 [01:48<00:00, 65.58it/s] 
100%|██████████| 1420/1420 [00:14<00:00, 97.08it/s] 
100%|██████████| 7103/7103 [01:43<00:00, 68.63it/s] 
100%|██████████| 1420/1420 [00:14<00:00, 97.47it/s] 
100%|██████████| 7103/7103 [01:54<00:00, 61.84it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 110.49it/s]
100%|██████████| 7103/7103 [01:56<00:00, 61.14it/s]
100%|██████████| 1420/1420 [00:17<00:00, 83.32it/s] 
100%|██████████| 7103/7103 [01:50<00:00, 64.36it/s] 


Epoch [96/150], Loss: 0.2339


100%|██████████| 1420/1420 [00:12<00:00, 109.32it/s]


Accuracy: 71.00%


100%|██████████| 7103/7103 [01:54<00:00, 61.91it/s]


Epoch [97/150], Loss: 0.2338


100%|██████████| 1420/1420 [00:16<00:00, 84.39it/s] 


Accuracy: 71.06%


100%|██████████| 7103/7103 [02:07<00:00, 55.50it/s]
100%|██████████| 1420/1420 [00:19<00:00, 72.70it/s]
100%|██████████| 7103/7103 [01:10<00:00, 101.38it/s]
100%|██████████| 1420/1420 [00:07<00:00, 189.72it/s]
100%|██████████| 7103/7103 [01:05<00:00, 108.81it/s]


Epoch [100/150], Loss: 0.2337


100%|██████████| 1420/1420 [00:07<00:00, 186.79it/s]


Accuracy: 71.12%


100%|██████████| 7103/7103 [01:03<00:00, 111.59it/s]
100%|██████████| 1420/1420 [00:08<00:00, 173.34it/s]
100%|██████████| 7103/7103 [01:05<00:00, 108.39it/s]
100%|██████████| 1420/1420 [00:07<00:00, 179.77it/s]
100%|██████████| 7103/7103 [01:02<00:00, 114.49it/s]
100%|██████████| 1420/1420 [00:07<00:00, 177.83it/s]
100%|██████████| 7103/7103 [01:29<00:00, 79.03it/s] 
100%|██████████| 1420/1420 [00:15<00:00, 89.12it/s] 
100%|██████████| 7103/7103 [01:48<00:00, 65.55it/s] 
100%|██████████| 1420/1420 [00:16<00:00, 88.56it/s] 
100%|██████████| 7103/7103 [01:49<00:00, 64.85it/s] 


Epoch [106/150], Loss: 0.2334


100%|██████████| 1420/1420 [00:14<00:00, 97.17it/s] 


Accuracy: 71.03%


100%|██████████| 7103/7103 [01:58<00:00, 60.19it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 105.78it/s]
100%|██████████| 7103/7103 [01:48<00:00, 65.63it/s] 
100%|██████████| 1420/1420 [00:17<00:00, 82.58it/s]
100%|██████████| 7103/7103 [01:50<00:00, 64.36it/s] 
100%|██████████| 1420/1420 [00:14<00:00, 97.77it/s] 
100%|██████████| 7103/7103 [01:47<00:00, 66.07it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 113.65it/s]
100%|██████████| 7103/7103 [01:44<00:00, 68.00it/s] 
100%|██████████| 1420/1420 [00:14<00:00, 96.23it/s] 
100%|██████████| 7103/7103 [01:44<00:00, 68.05it/s]


Epoch [112/150], Loss: 0.2331


100%|██████████| 1420/1420 [00:14<00:00, 101.10it/s]


Accuracy: 71.02%


100%|██████████| 7103/7103 [01:48<00:00, 65.23it/s]
100%|██████████| 1420/1420 [00:17<00:00, 83.27it/s] 
100%|██████████| 7103/7103 [01:49<00:00, 64.98it/s]
100%|██████████| 1420/1420 [00:15<00:00, 89.16it/s] 
100%|██████████| 7103/7103 [01:47<00:00, 65.91it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 102.79it/s]
100%|██████████| 7103/7103 [01:51<00:00, 63.90it/s]
100%|██████████| 1420/1420 [00:15<00:00, 91.74it/s] 
100%|██████████| 7103/7103 [01:48<00:00, 65.40it/s]
100%|██████████| 1420/1420 [00:16<00:00, 84.14it/s] 
100%|██████████| 7103/7103 [01:47<00:00, 66.05it/s]


Epoch [118/150], Loss: 0.2330


100%|██████████| 1420/1420 [00:15<00:00, 89.37it/s] 


Accuracy: 71.03%


100%|██████████| 7103/7103 [01:45<00:00, 67.44it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 109.77it/s]
100%|██████████| 7103/7103 [01:46<00:00, 66.65it/s] 
100%|██████████| 1420/1420 [00:14<00:00, 101.34it/s]
100%|██████████| 7103/7103 [01:56<00:00, 60.85it/s]


Epoch [121/150], Loss: 0.2329


100%|██████████| 1420/1420 [00:14<00:00, 99.07it/s] 


Accuracy: 70.98%


100%|██████████| 7103/7103 [01:59<00:00, 59.59it/s]
100%|██████████| 1420/1420 [00:16<00:00, 86.64it/s] 
100%|██████████| 7103/7103 [01:22<00:00, 86.13it/s] 
100%|██████████| 1420/1420 [00:16<00:00, 86.41it/s] 
100%|██████████| 7103/7103 [01:03<00:00, 111.13it/s]


Epoch [124/150], Loss: 0.2329


100%|██████████| 1420/1420 [00:06<00:00, 210.85it/s]


Accuracy: 71.02%


100%|██████████| 7103/7103 [00:58<00:00, 120.90it/s]
100%|██████████| 1420/1420 [00:07<00:00, 201.91it/s]
100%|██████████| 7103/7103 [01:03<00:00, 111.72it/s]
100%|██████████| 1420/1420 [00:07<00:00, 179.46it/s]
100%|██████████| 7103/7103 [01:03<00:00, 112.10it/s]


Epoch [127/150], Loss: 0.2326


100%|██████████| 1420/1420 [00:07<00:00, 178.59it/s]


Accuracy: 71.02%


100%|██████████| 7103/7103 [01:12<00:00, 97.80it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 108.98it/s]
100%|██████████| 7103/7103 [01:47<00:00, 65.78it/s]
100%|██████████| 1420/1420 [00:13<00:00, 106.38it/s]
100%|██████████| 7103/7103 [01:41<00:00, 70.15it/s] 
100%|██████████| 1420/1420 [00:15<00:00, 91.55it/s] 
100%|██████████| 7103/7103 [01:42<00:00, 69.54it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 110.81it/s]
100%|██████████| 7103/7103 [01:44<00:00, 68.17it/s]
100%|██████████| 1420/1420 [00:15<00:00, 91.57it/s] 
100%|██████████| 7103/7103 [01:38<00:00, 71.89it/s] 
100%|██████████| 1420/1420 [00:12<00:00, 115.80it/s]
100%|██████████| 7103/7103 [01:42<00:00, 69.60it/s] 
100%|██████████| 1420/1420 [00:14<00:00, 97.38it/s] 
100%|██████████| 7103/7103 [01:37<00:00, 72.56it/s] 
100%|██████████| 1420/1420 [00:13<00:00, 107.04it/s]
100%|██████████| 7103/7103 [01:41<00:00, 69.85it/s] 
100%|██████████| 1420/1420 [00:16<00:00, 85.59it/s] 
100%|██████████| 7103/7103 [01:47<00:00, 66.23it

Epoch [149/150], Loss: 0.2325


100%|██████████| 1420/1420 [00:07<00:00, 183.84it/s]


Accuracy: 71.40%


100%|██████████| 7103/7103 [01:11<00:00, 99.30it/s] 
100%|██████████| 1420/1420 [00:08<00:00, 172.94it/s]

Training complete.



