In [10]:
from model import Model
import torch
torch.backends.cudnn.benchmark=True
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import argparse
import time
import numpy as np
import subprocess
from numpy import random
import copy
device="cpu"
from tqdm import tqdm

# import matplotlib.pyplot as plt
from data_loader import genomics_data

In [11]:
def kaiming_normal_init(m):
	if isinstance(m, nn.Conv2d):
		nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
	elif isinstance(m, nn.Linear):
		nn.init.kaiming_normal_(m.weight, nonlinearity='sigmoid')


class Model(nn.Module):
	def __init__(self, classes):
		# Hyper Parameters
		self.init_lr = 0.1
		self.num_epochs = 40
		self.batch_size = 48
		
		self.pretrained = False
		self.momentum = 0.9
		self.weight_decay = 0.0001
		# Constant to provide numerical stability while normalizing
		self.epsilon = 1e-16

		# Network architecture
		super(Model, self).__init__()
		model = nn.Sequential(
		nn.Conv1d(1, 6, kernel_size=6),
		nn.ReLU(),
		nn.Conv1d(6, 3, kernel_size=6),
		nn.ReLU(),
		nn.Flatten(),
		nn.Linear(12258,1024),
		nn.ReLU()
		)

		self.feature_extractor=model

		self.feature_extractor.apply(kaiming_normal_init)
		self.fc = nn.Linear(1024, classes, bias=False)

		self.n_classes = 0

	def forward(self, x):
		x = self.feature_extractor(x)
		x = x.view(x.size(0), -1)
		x = self.fc(x)
		return x



	def classify(self, images):
		"""Classify images by softmax

		Args:
			x: input image batch
		Returns:
			preds: Tensor of size (batch_size,)
		"""
		_, preds = torch.max(torch.softmax(self.forward(images), dim=1), dim=1, keepdim=False)

		return preds

	def update(self, dataset, class_map):

		self.compute_means = True

		# Save a copy to compute distillation outputs


		classes = list(set(dataset.train_labels))

		loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size,
											   shuffle=True, num_workers=8)

		print("Batch Size (for n_classes classes) : ", len(dataset))
		optimizer = optim.SGD(self.parameters(), lr=self.init_lr, momentum = self.momentum, weight_decay=self.weight_decay)

		with tqdm(total=self.num_epochs) as pbar:
			for epoch in range(self.num_epochs):
				
				for i, (indices, images, labels) in enumerate(loader):
					seen_labels = []
					images = Variable(torch.FloatTensor(images)).to(device)
					seen_labels = torch.LongTensor([class_map[label] for label in labels.numpy()])
					labels = Variable(seen_labels).to(device)
					# indices = indices.cuda()

					optimizer.zero_grad()
					logits = self.forward(images)

					loss = nn.CrossEntropyLoss()(logits, labels)

					loss.backward()
					optimizer.step()

					if (i+1) % 1 == 0:
						tqdm.write('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
							   %(epoch+1, self.num_epochs, i+1, np.ceil(len(dataset)/self.batch_size), loss.data))

				pbar.update(1)

In [12]:
model = Model(66)
model.to(device)
all_classes = np.arange(66)

train_set = genomics_data(train=True,classes=all_classes)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=48,
													shuffle=True, num_workers=8)

test_set = genomics_data(train=False,classes=all_classes)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=48,
                                            shuffle=False, num_workers=8)


model.update(train_set, all_classes)

Batch Size (for n_classes classes) :  6947


  0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/40], Iter [1/145] Loss: 4.1855
Epoch [1/40], Iter [2/145] Loss: 4.1856
Epoch [1/40], Iter [3/145] Loss: 4.1348


  0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/40], Iter [4/145] Loss: 4.0267
Epoch [1/40], Iter [5/145] Loss: 3.5059
Epoch [1/40], Iter [6/145] Loss: 5.8337


  0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/40], Iter [7/145] Loss: 4.1936
Epoch [1/40], Iter [8/145] Loss: 4.2061
Epoch [1/40], Iter [9/145] Loss: 4.2076


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [10/145] Loss: 4.2027
Epoch [1/40], Iter [11/145] Loss: 4.1929
Epoch [1/40], Iter [12/145] Loss: 4.1902


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [13/145] Loss: 4.1891
Epoch [1/40], Iter [14/145] Loss: 4.1879
Epoch [1/40], Iter [15/145] Loss: 4.1866


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [16/145] Loss: 4.1856
Epoch [1/40], Iter [17/145] Loss: 4.1843
Epoch [1/40], Iter [18/145] Loss: 4.1853


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [19/145] Loss: 4.1833
Epoch [1/40], Iter [20/145] Loss: 4.1802
Epoch [1/40], Iter [21/145] Loss: 4.1821


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [22/145] Loss: 4.1800
Epoch [1/40], Iter [23/145] Loss: 4.1775
Epoch [1/40], Iter [24/145] Loss: 4.1755


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [25/145] Loss: 4.1733
Epoch [1/40], Iter [26/145] Loss: 4.1639
Epoch [1/40], Iter [27/145] Loss: 4.1765


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [28/145] Loss: 4.1580
Epoch [1/40], Iter [29/145] Loss: 4.1480
Epoch [1/40], Iter [30/145] Loss: 4.1648


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [31/145] Loss: 4.1590
Epoch [1/40], Iter [32/145] Loss: 4.1196
Epoch [1/40], Iter [33/145] Loss: 4.1079


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [34/145] Loss: 4.1098
Epoch [1/40], Iter [35/145] Loss: 4.1130
Epoch [1/40], Iter [36/145] Loss: 4.0424


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [37/145] Loss: 4.0588
Epoch [1/40], Iter [38/145] Loss: 4.1558
Epoch [1/40], Iter [39/145] Loss: 3.9946


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [40/145] Loss: 3.8901
Epoch [1/40], Iter [41/145] Loss: 3.8942
Epoch [1/40], Iter [42/145] Loss: 3.9037


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [43/145] Loss: 3.7668
Epoch [1/40], Iter [44/145] Loss: 3.8698
Epoch [1/40], Iter [45/145] Loss: 4.0652


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [46/145] Loss: 3.8436
Epoch [1/40], Iter [47/145] Loss: 3.6338
Epoch [1/40], Iter [48/145] Loss: 3.7878


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [49/145] Loss: 3.4080
Epoch [1/40], Iter [50/145] Loss: 3.9748
Epoch [1/40], Iter [51/145] Loss: 3.8403


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [52/145] Loss: 3.9497
Epoch [1/40], Iter [53/145] Loss: 3.7153
Epoch [1/40], Iter [54/145] Loss: 3.6764


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [55/145] Loss: 4.0156
Epoch [1/40], Iter [56/145] Loss: 3.7831
Epoch [1/40], Iter [57/145] Loss: 3.8696


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [58/145] Loss: 3.8162
Epoch [1/40], Iter [59/145] Loss: 3.8501
Epoch [1/40], Iter [60/145] Loss: 3.7347


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [61/145] Loss: 3.8865
Epoch [1/40], Iter [62/145] Loss: 3.7907
Epoch [1/40], Iter [63/145] Loss: 3.8167


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [64/145] Loss: 3.8867
Epoch [1/40], Iter [65/145] Loss: 3.9235
Epoch [1/40], Iter [66/145] Loss: 3.7314


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [67/145] Loss: 4.0821
Epoch [1/40], Iter [68/145] Loss: 4.0871
Epoch [1/40], Iter [69/145] Loss: 3.9174


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [70/145] Loss: 3.8639
Epoch [1/40], Iter [71/145] Loss: 3.7919
Epoch [1/40], Iter [72/145] Loss: 3.8784


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [73/145] Loss: 3.6255
Epoch [1/40], Iter [74/145] Loss: 3.7647
Epoch [1/40], Iter [75/145] Loss: 4.0776


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [76/145] Loss: 3.8024
Epoch [1/40], Iter [77/145] Loss: 3.7575
Epoch [1/40], Iter [78/145] Loss: 3.8362


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [79/145] Loss: 4.0102
Epoch [1/40], Iter [80/145] Loss: 3.7290
Epoch [1/40], Iter [81/145] Loss: 3.8658


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [82/145] Loss: 3.7344
Epoch [1/40], Iter [83/145] Loss: 3.8457
Epoch [1/40], Iter [84/145] Loss: 3.8116


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [85/145] Loss: 4.0803
Epoch [1/40], Iter [86/145] Loss: 3.6224
Epoch [1/40], Iter [87/145] Loss: 3.9002


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [88/145] Loss: 3.7152
Epoch [1/40], Iter [89/145] Loss: 4.0220
Epoch [1/40], Iter [90/145] Loss: 3.7657


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [91/145] Loss: 4.1032
Epoch [1/40], Iter [92/145] Loss: 3.6124
Epoch [1/40], Iter [93/145] Loss: 4.0094


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [94/145] Loss: 4.2247
Epoch [1/40], Iter [95/145] Loss: 3.6863
Epoch [1/40], Iter [96/145] Loss: 4.0027


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [97/145] Loss: 3.8522
Epoch [1/40], Iter [98/145] Loss: 3.9981
Epoch [1/40], Iter [99/145] Loss: 3.6927


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [100/145] Loss: 4.1275
Epoch [1/40], Iter [101/145] Loss: 3.7692
Epoch [1/40], Iter [102/145] Loss: 3.6892


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [103/145] Loss: 3.9325
Epoch [1/40], Iter [104/145] Loss: 3.6827
Epoch [1/40], Iter [105/145] Loss: 3.9475


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [106/145] Loss: 3.8427
Epoch [1/40], Iter [107/145] Loss: 3.7797
Epoch [1/40], Iter [108/145] Loss: 3.7318


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [109/145] Loss: 3.8245
Epoch [1/40], Iter [110/145] Loss: 3.9144
Epoch [1/40], Iter [111/145] Loss: 3.8573


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [112/145] Loss: 4.1258
Epoch [1/40], Iter [113/145] Loss: 3.6097
Epoch [1/40], Iter [114/145] Loss: 3.5886


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [115/145] Loss: 3.7417
Epoch [1/40], Iter [116/145] Loss: 3.7035
Epoch [1/40], Iter [117/145] Loss: 3.8207


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [118/145] Loss: 3.9027
Epoch [1/40], Iter [119/145] Loss: 4.0708
Epoch [1/40], Iter [120/145] Loss: 3.5674


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [121/145] Loss: 3.8311
Epoch [1/40], Iter [122/145] Loss: 4.0619
Epoch [1/40], Iter [123/145] Loss: 3.9488


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [124/145] Loss: 3.9334
Epoch [1/40], Iter [125/145] Loss: 3.7275
Epoch [1/40], Iter [126/145] Loss: 3.7226


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [127/145] Loss: 3.9858
Epoch [1/40], Iter [128/145] Loss: 4.0657
Epoch [1/40], Iter [129/145] Loss: 3.7957


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [130/145] Loss: 3.9959
Epoch [1/40], Iter [131/145] Loss: 3.6528
Epoch [1/40], Iter [132/145] Loss: 3.8452


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [133/145] Loss: 3.8434
Epoch [1/40], Iter [134/145] Loss: 3.8287
Epoch [1/40], Iter [135/145] Loss: 3.8726


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [136/145] Loss: 3.7402
Epoch [1/40], Iter [137/145] Loss: 3.7529
Epoch [1/40], Iter [138/145] Loss: 4.0367


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [139/145] Loss: 3.9973
Epoch [1/40], Iter [140/145] Loss: 3.9785
Epoch [1/40], Iter [141/145] Loss: 4.0571


  0%|          | 0/40 [00:11<?, ?it/s]

Epoch [1/40], Iter [142/145] Loss: 4.0488
Epoch [1/40], Iter [143/145] Loss: 3.7557
Epoch [1/40], Iter [144/145] Loss: 3.9995


  2%|▎         | 1/40 [00:11<07:21, 11.32s/it]

Epoch [1/40], Iter [145/145] Loss: 3.9144


  2%|▎         | 1/40 [00:11<07:21, 11.32s/it]

Epoch [2/40], Iter [1/145] Loss: 4.0477
Epoch [2/40], Iter [2/145] Loss: 3.8434
Epoch [2/40], Iter [3/145] Loss: 3.6896


  2%|▎         | 1/40 [00:12<07:21, 11.32s/it]

Epoch [2/40], Iter [4/145] Loss: 4.0224
Epoch [2/40], Iter [5/145] Loss: 3.7432
Epoch [2/40], Iter [6/145] Loss: 3.9442


  2%|▎         | 1/40 [00:12<07:21, 11.32s/it]

Epoch [2/40], Iter [7/145] Loss: 3.6899
Epoch [2/40], Iter [8/145] Loss: 3.6665
Epoch [2/40], Iter [9/145] Loss: 3.7852


  2%|▎         | 1/40 [00:12<07:21, 11.32s/it]

Epoch [2/40], Iter [10/145] Loss: 3.5165
Epoch [2/40], Iter [11/145] Loss: 3.9390
Epoch [2/40], Iter [12/145] Loss: 3.7774


  2%|▎         | 1/40 [00:12<07:21, 11.32s/it]

Epoch [2/40], Iter [13/145] Loss: 3.8606
Epoch [2/40], Iter [14/145] Loss: 3.9828
Epoch [2/40], Iter [15/145] Loss: 3.8575


  2%|▎         | 1/40 [00:12<07:21, 11.32s/it]

Epoch [2/40], Iter [16/145] Loss: 4.0919
Epoch [2/40], Iter [17/145] Loss: 3.9759
Epoch [2/40], Iter [18/145] Loss: 4.0154


  2%|▎         | 1/40 [00:13<07:21, 11.32s/it]

Epoch [2/40], Iter [19/145] Loss: 4.1535
Epoch [2/40], Iter [20/145] Loss: 3.6803
Epoch [2/40], Iter [21/145] Loss: 3.9531


  2%|▎         | 1/40 [00:13<07:21, 11.32s/it]

Epoch [2/40], Iter [22/145] Loss: 3.8981
Epoch [2/40], Iter [23/145] Loss: 3.7520
Epoch [2/40], Iter [24/145] Loss: 3.8811


  2%|▎         | 1/40 [00:13<07:21, 11.32s/it]

Epoch [2/40], Iter [25/145] Loss: 3.8090
Epoch [2/40], Iter [26/145] Loss: 3.7725
Epoch [2/40], Iter [27/145] Loss: 3.8622


  2%|▎         | 1/40 [00:13<07:21, 11.32s/it]

Epoch [2/40], Iter [28/145] Loss: 3.9282
Epoch [2/40], Iter [29/145] Loss: 3.6880
Epoch [2/40], Iter [30/145] Loss: 3.8784


  2%|▎         | 1/40 [00:14<07:21, 11.32s/it]

Epoch [2/40], Iter [31/145] Loss: 4.0527
Epoch [2/40], Iter [32/145] Loss: 3.8692
Epoch [2/40], Iter [33/145] Loss: 3.7885


  2%|▎         | 1/40 [00:14<07:21, 11.32s/it]

Epoch [2/40], Iter [34/145] Loss: 3.6853
Epoch [2/40], Iter [35/145] Loss: 3.9739
Epoch [2/40], Iter [36/145] Loss: 3.7601


  2%|▎         | 1/40 [00:14<07:21, 11.32s/it]

Epoch [2/40], Iter [37/145] Loss: 3.8443
Epoch [2/40], Iter [38/145] Loss: 3.8069
Epoch [2/40], Iter [39/145] Loss: 4.2784


  2%|▎         | 1/40 [00:14<07:21, 11.32s/it]

Epoch [2/40], Iter [40/145] Loss: 4.0468
Epoch [2/40], Iter [41/145] Loss: 3.8891
Epoch [2/40], Iter [42/145] Loss: 4.1525


  2%|▎         | 1/40 [00:14<07:21, 11.32s/it]

Epoch [2/40], Iter [43/145] Loss: 3.6510
Epoch [2/40], Iter [44/145] Loss: 3.6224
Epoch [2/40], Iter [45/145] Loss: 4.0101


  2%|▎         | 1/40 [00:15<07:21, 11.32s/it]

Epoch [2/40], Iter [46/145] Loss: 3.8691
Epoch [2/40], Iter [47/145] Loss: 3.9606
Epoch [2/40], Iter [48/145] Loss: 3.8275


  2%|▎         | 1/40 [00:15<07:21, 11.32s/it]

Epoch [2/40], Iter [49/145] Loss: 3.9996
Epoch [2/40], Iter [50/145] Loss: 3.9745
Epoch [2/40], Iter [51/145] Loss: 3.7067


  2%|▎         | 1/40 [00:15<07:21, 11.32s/it]

Epoch [2/40], Iter [52/145] Loss: 4.0152
Epoch [2/40], Iter [53/145] Loss: 4.0516
Epoch [2/40], Iter [54/145] Loss: 4.0711


  2%|▎         | 1/40 [00:15<07:21, 11.32s/it]

Epoch [2/40], Iter [55/145] Loss: 3.8121
Epoch [2/40], Iter [56/145] Loss: 3.6867
Epoch [2/40], Iter [57/145] Loss: 3.8768


  2%|▎         | 1/40 [00:16<07:21, 11.32s/it]

Epoch [2/40], Iter [58/145] Loss: 3.9105
Epoch [2/40], Iter [59/145] Loss: 3.6187
Epoch [2/40], Iter [60/145] Loss: 3.6936


  2%|▎         | 1/40 [00:16<07:21, 11.32s/it]

Epoch [2/40], Iter [61/145] Loss: 4.2120
Epoch [2/40], Iter [62/145] Loss: 3.8605
Epoch [2/40], Iter [63/145] Loss: 3.6165


  2%|▎         | 1/40 [00:16<07:21, 11.32s/it]

Epoch [2/40], Iter [64/145] Loss: 3.7640
Epoch [2/40], Iter [65/145] Loss: 3.9069
Epoch [2/40], Iter [66/145] Loss: 3.9347


  2%|▎         | 1/40 [00:16<07:21, 11.32s/it]

Epoch [2/40], Iter [67/145] Loss: 4.0242
Epoch [2/40], Iter [68/145] Loss: 3.7684
Epoch [2/40], Iter [69/145] Loss: 3.9511


  2%|▎         | 1/40 [00:16<07:21, 11.32s/it]

Epoch [2/40], Iter [70/145] Loss: 3.7803
Epoch [2/40], Iter [71/145] Loss: 3.7362
Epoch [2/40], Iter [72/145] Loss: 4.0285


  2%|▎         | 1/40 [00:17<07:21, 11.32s/it]

Epoch [2/40], Iter [73/145] Loss: 3.6269
Epoch [2/40], Iter [74/145] Loss: 3.8645
Epoch [2/40], Iter [75/145] Loss: 3.7011


  2%|▎         | 1/40 [00:17<07:21, 11.32s/it]

Epoch [2/40], Iter [76/145] Loss: 3.9666
Epoch [2/40], Iter [77/145] Loss: 3.8353
Epoch [2/40], Iter [78/145] Loss: 3.5723


  2%|▎         | 1/40 [00:17<07:21, 11.32s/it]

Epoch [2/40], Iter [79/145] Loss: 3.7567
Epoch [2/40], Iter [80/145] Loss: 3.8128
Epoch [2/40], Iter [81/145] Loss: 3.8663


  2%|▎         | 1/40 [00:17<07:21, 11.32s/it]

Epoch [2/40], Iter [82/145] Loss: 3.5640
Epoch [2/40], Iter [83/145] Loss: 3.8710
Epoch [2/40], Iter [84/145] Loss: 3.6670


  2%|▎         | 1/40 [00:18<07:21, 11.32s/it]

Epoch [2/40], Iter [85/145] Loss: 3.6496
Epoch [2/40], Iter [86/145] Loss: 3.7292
Epoch [2/40], Iter [87/145] Loss: 3.9407


  2%|▎         | 1/40 [00:18<07:21, 11.32s/it]

Epoch [2/40], Iter [88/145] Loss: 4.0481
Epoch [2/40], Iter [89/145] Loss: 3.7143
Epoch [2/40], Iter [90/145] Loss: 3.7631


  2%|▎         | 1/40 [00:18<07:21, 11.32s/it]

Epoch [2/40], Iter [91/145] Loss: 3.9153
Epoch [2/40], Iter [92/145] Loss: 3.9853
Epoch [2/40], Iter [93/145] Loss: 3.7530


  2%|▎         | 1/40 [00:18<07:21, 11.32s/it]

Epoch [2/40], Iter [94/145] Loss: 3.7840
Epoch [2/40], Iter [95/145] Loss: 3.8668
Epoch [2/40], Iter [96/145] Loss: 3.8846


  2%|▎         | 1/40 [00:19<07:21, 11.32s/it]

Epoch [2/40], Iter [97/145] Loss: 3.9453
Epoch [2/40], Iter [98/145] Loss: 3.7334
Epoch [2/40], Iter [99/145] Loss: 3.9086


  2%|▎         | 1/40 [00:19<07:21, 11.32s/it]

Epoch [2/40], Iter [100/145] Loss: 3.7278
Epoch [2/40], Iter [101/145] Loss: 3.8685
Epoch [2/40], Iter [102/145] Loss: 3.7877


  2%|▎         | 1/40 [00:19<07:21, 11.32s/it]

Epoch [2/40], Iter [103/145] Loss: 3.7322
Epoch [2/40], Iter [104/145] Loss: 3.5281
Epoch [2/40], Iter [105/145] Loss: 3.7052


  2%|▎         | 1/40 [00:19<07:21, 11.32s/it]

Epoch [2/40], Iter [106/145] Loss: 4.0305
Epoch [2/40], Iter [107/145] Loss: 3.5661
Epoch [2/40], Iter [108/145] Loss: 4.1150


  2%|▎         | 1/40 [00:19<07:21, 11.32s/it]

Epoch [2/40], Iter [109/145] Loss: 4.0045
Epoch [2/40], Iter [110/145] Loss: 3.8168
Epoch [2/40], Iter [111/145] Loss: 3.7077


  2%|▎         | 1/40 [00:20<07:21, 11.32s/it]

Epoch [2/40], Iter [112/145] Loss: 3.6181
Epoch [2/40], Iter [113/145] Loss: 3.9369
Epoch [2/40], Iter [114/145] Loss: 4.1095


  2%|▎         | 1/40 [00:20<07:21, 11.32s/it]

Epoch [2/40], Iter [115/145] Loss: 4.1938
Epoch [2/40], Iter [116/145] Loss: 3.7953
Epoch [2/40], Iter [117/145] Loss: 3.7188


  2%|▎         | 1/40 [00:20<07:21, 11.32s/it]

Epoch [2/40], Iter [118/145] Loss: 3.9816
Epoch [2/40], Iter [119/145] Loss: 3.8375
Epoch [2/40], Iter [120/145] Loss: 3.9762


  2%|▎         | 1/40 [00:20<07:21, 11.32s/it]

Epoch [2/40], Iter [121/145] Loss: 3.8373
Epoch [2/40], Iter [122/145] Loss: 3.6825
Epoch [2/40], Iter [123/145] Loss: 3.8713


  2%|▎         | 1/40 [00:21<07:21, 11.32s/it]

Epoch [2/40], Iter [124/145] Loss: 3.7307
Epoch [2/40], Iter [125/145] Loss: 3.7772
Epoch [2/40], Iter [126/145] Loss: 3.8523


  2%|▎         | 1/40 [00:21<07:21, 11.32s/it]

Epoch [2/40], Iter [127/145] Loss: 3.5769
Epoch [2/40], Iter [128/145] Loss: 3.8757
Epoch [2/40], Iter [129/145] Loss: 3.6372


  2%|▎         | 1/40 [00:21<07:21, 11.32s/it]

Epoch [2/40], Iter [130/145] Loss: 4.0288
Epoch [2/40], Iter [131/145] Loss: 3.8370
Epoch [2/40], Iter [132/145] Loss: 3.5832


  2%|▎         | 1/40 [00:21<07:21, 11.32s/it]

Epoch [2/40], Iter [133/145] Loss: 3.6487
Epoch [2/40], Iter [134/145] Loss: 3.4330
Epoch [2/40], Iter [135/145] Loss: 3.8875


  2%|▎         | 1/40 [00:22<07:21, 11.32s/it]

Epoch [2/40], Iter [136/145] Loss: 3.5830
Epoch [2/40], Iter [137/145] Loss: 4.0213
Epoch [2/40], Iter [138/145] Loss: 4.0345


  2%|▎         | 1/40 [00:22<07:21, 11.32s/it]

Epoch [2/40], Iter [139/145] Loss: 3.8319
Epoch [2/40], Iter [140/145] Loss: 3.8020
Epoch [2/40], Iter [141/145] Loss: 3.7149


  2%|▎         | 1/40 [00:22<07:21, 11.32s/it]

Epoch [2/40], Iter [142/145] Loss: 4.0745
Epoch [2/40], Iter [143/145] Loss: 4.0131
Epoch [2/40], Iter [144/145] Loss: 3.7622


  5%|▌         | 2/40 [00:22<07:10, 11.32s/it]

Epoch [2/40], Iter [145/145] Loss: 4.1081


  5%|▌         | 2/40 [00:23<07:10, 11.32s/it]

Epoch [3/40], Iter [1/145] Loss: 3.7170
Epoch [3/40], Iter [2/145] Loss: 3.7065
Epoch [3/40], Iter [3/145] Loss: 3.8565


  5%|▌         | 2/40 [00:23<07:10, 11.32s/it]

Epoch [3/40], Iter [4/145] Loss: 3.8214
Epoch [3/40], Iter [5/145] Loss: 3.9427
Epoch [3/40], Iter [6/145] Loss: 4.0579


  5%|▌         | 2/40 [00:23<07:10, 11.32s/it]

Epoch [3/40], Iter [7/145] Loss: 4.0038
Epoch [3/40], Iter [8/145] Loss: 3.8847
Epoch [3/40], Iter [9/145] Loss: 3.8049


  5%|▌         | 2/40 [00:23<07:10, 11.32s/it]

Epoch [3/40], Iter [10/145] Loss: 3.9203
Epoch [3/40], Iter [11/145] Loss: 3.5071
Epoch [3/40], Iter [12/145] Loss: 3.9330


  5%|▌         | 2/40 [00:24<07:10, 11.32s/it]

Epoch [3/40], Iter [13/145] Loss: 4.1096
Epoch [3/40], Iter [14/145] Loss: 3.7900
Epoch [3/40], Iter [15/145] Loss: 3.9300


  5%|▌         | 2/40 [00:24<07:10, 11.32s/it]

Epoch [3/40], Iter [16/145] Loss: 3.9684
Epoch [3/40], Iter [17/145] Loss: 3.9101
Epoch [3/40], Iter [18/145] Loss: 3.6832


  5%|▌         | 2/40 [00:24<07:10, 11.32s/it]

Epoch [3/40], Iter [19/145] Loss: 3.7599
Epoch [3/40], Iter [20/145] Loss: 3.8422
Epoch [3/40], Iter [21/145] Loss: 3.8181


  5%|▌         | 2/40 [00:24<07:10, 11.32s/it]

Epoch [3/40], Iter [22/145] Loss: 3.8034
Epoch [3/40], Iter [23/145] Loss: 3.9352
Epoch [3/40], Iter [24/145] Loss: 3.9030


  5%|▌         | 2/40 [00:24<07:10, 11.32s/it]

Epoch [3/40], Iter [25/145] Loss: 3.7083
Epoch [3/40], Iter [26/145] Loss: 3.7033
Epoch [3/40], Iter [27/145] Loss: 3.8718


  5%|▌         | 2/40 [00:25<07:10, 11.32s/it]

Epoch [3/40], Iter [28/145] Loss: 3.8598
Epoch [3/40], Iter [29/145] Loss: 3.9296
Epoch [3/40], Iter [30/145] Loss: 3.9317


  5%|▌         | 2/40 [00:25<07:10, 11.32s/it]

Epoch [3/40], Iter [31/145] Loss: 3.8666
Epoch [3/40], Iter [32/145] Loss: 3.8229
Epoch [3/40], Iter [33/145] Loss: 3.8206


  5%|▌         | 2/40 [00:25<07:10, 11.32s/it]

Epoch [3/40], Iter [34/145] Loss: 3.6940
Epoch [3/40], Iter [35/145] Loss: 3.9824
Epoch [3/40], Iter [36/145] Loss: 3.8437


  5%|▌         | 2/40 [00:25<07:10, 11.32s/it]

Epoch [3/40], Iter [37/145] Loss: 3.8680
Epoch [3/40], Iter [38/145] Loss: 3.9246
Epoch [3/40], Iter [39/145] Loss: 3.8539


  5%|▌         | 2/40 [00:26<07:10, 11.32s/it]

Epoch [3/40], Iter [40/145] Loss: 3.8713
Epoch [3/40], Iter [41/145] Loss: 3.8380
Epoch [3/40], Iter [42/145] Loss: 3.8322


  5%|▌         | 2/40 [00:26<07:10, 11.32s/it]

Epoch [3/40], Iter [43/145] Loss: 3.7711
Epoch [3/40], Iter [44/145] Loss: 3.9229
Epoch [3/40], Iter [45/145] Loss: 3.9653


  5%|▌         | 2/40 [00:26<07:10, 11.32s/it]

Epoch [3/40], Iter [46/145] Loss: 3.8460
Epoch [3/40], Iter [47/145] Loss: 3.5756
Epoch [3/40], Iter [48/145] Loss: 3.7754


  5%|▌         | 2/40 [00:26<07:10, 11.32s/it]

Epoch [3/40], Iter [49/145] Loss: 3.5505
Epoch [3/40], Iter [50/145] Loss: 3.9063
Epoch [3/40], Iter [51/145] Loss: 4.1281


  5%|▌         | 2/40 [00:26<07:10, 11.32s/it]

Epoch [3/40], Iter [52/145] Loss: 3.9412
Epoch [3/40], Iter [53/145] Loss: 4.0284
Epoch [3/40], Iter [54/145] Loss: 3.9178


  5%|▌         | 2/40 [00:27<07:10, 11.32s/it]

Epoch [3/40], Iter [55/145] Loss: 3.7873
Epoch [3/40], Iter [56/145] Loss: 3.9257
Epoch [3/40], Iter [57/145] Loss: 4.1037


  5%|▌         | 2/40 [00:27<07:10, 11.32s/it]

Epoch [3/40], Iter [58/145] Loss: 3.7826
Epoch [3/40], Iter [59/145] Loss: 3.8824
Epoch [3/40], Iter [60/145] Loss: 3.9218


  5%|▌         | 2/40 [00:27<07:10, 11.32s/it]

Epoch [3/40], Iter [61/145] Loss: 3.7072
Epoch [3/40], Iter [62/145] Loss: 3.8001
Epoch [3/40], Iter [63/145] Loss: 3.9101


  5%|▌         | 2/40 [00:27<07:10, 11.32s/it]

Epoch [3/40], Iter [64/145] Loss: 3.8780
Epoch [3/40], Iter [65/145] Loss: 3.9615
Epoch [3/40], Iter [66/145] Loss: 3.6992


  5%|▌         | 2/40 [00:28<07:10, 11.32s/it]

Epoch [3/40], Iter [67/145] Loss: 3.8903
Epoch [3/40], Iter [68/145] Loss: 3.5783
Epoch [3/40], Iter [69/145] Loss: 4.0019


  5%|▌         | 2/40 [00:28<07:10, 11.32s/it]

Epoch [3/40], Iter [70/145] Loss: 3.7900
Epoch [3/40], Iter [71/145] Loss: 3.7874
Epoch [3/40], Iter [72/145] Loss: 3.7447


  5%|▌         | 2/40 [00:28<07:10, 11.32s/it]

Epoch [3/40], Iter [73/145] Loss: 3.9136
Epoch [3/40], Iter [74/145] Loss: 3.7534
Epoch [3/40], Iter [75/145] Loss: 3.7266


  5%|▌         | 2/40 [00:28<07:10, 11.32s/it]

Epoch [3/40], Iter [76/145] Loss: 3.8328
Epoch [3/40], Iter [77/145] Loss: 3.9284
Epoch [3/40], Iter [78/145] Loss: 3.9926


  5%|▌         | 2/40 [00:28<07:10, 11.32s/it]

Epoch [3/40], Iter [79/145] Loss: 3.7614
Epoch [3/40], Iter [80/145] Loss: 3.8425
Epoch [3/40], Iter [81/145] Loss: 3.5519


  5%|▌         | 2/40 [00:29<07:10, 11.32s/it]

Epoch [3/40], Iter [82/145] Loss: 3.7111
Epoch [3/40], Iter [83/145] Loss: 4.0207
Epoch [3/40], Iter [84/145] Loss: 3.7886


  5%|▌         | 2/40 [00:29<07:10, 11.32s/it]

Epoch [3/40], Iter [85/145] Loss: 3.9514
Epoch [3/40], Iter [86/145] Loss: 3.8202
Epoch [3/40], Iter [87/145] Loss: 4.0977


  5%|▌         | 2/40 [00:29<07:10, 11.32s/it]

Epoch [3/40], Iter [88/145] Loss: 3.8754
Epoch [3/40], Iter [89/145] Loss: 3.9090
Epoch [3/40], Iter [90/145] Loss: 3.9809


  5%|▌         | 2/40 [00:29<07:10, 11.32s/it]

Epoch [3/40], Iter [91/145] Loss: 3.8770
Epoch [3/40], Iter [92/145] Loss: 3.8452
Epoch [3/40], Iter [93/145] Loss: 3.8104


  5%|▌         | 2/40 [00:30<07:10, 11.32s/it]

Epoch [3/40], Iter [94/145] Loss: 3.9404
Epoch [3/40], Iter [95/145] Loss: 3.8230
Epoch [3/40], Iter [96/145] Loss: 3.9306


  5%|▌         | 2/40 [00:30<07:10, 11.32s/it]

Epoch [3/40], Iter [97/145] Loss: 3.7607
Epoch [3/40], Iter [98/145] Loss: 3.7501
Epoch [3/40], Iter [99/145] Loss: 3.7727


  5%|▌         | 2/40 [00:30<07:10, 11.32s/it]

Epoch [3/40], Iter [100/145] Loss: 3.6103
Epoch [3/40], Iter [101/145] Loss: 3.9256
Epoch [3/40], Iter [102/145] Loss: 3.7910


  5%|▌         | 2/40 [00:30<07:10, 11.32s/it]

Epoch [3/40], Iter [103/145] Loss: 3.6635
Epoch [3/40], Iter [104/145] Loss: 4.0103
Epoch [3/40], Iter [105/145] Loss: 3.7085


  5%|▌         | 2/40 [00:30<07:10, 11.32s/it]

Epoch [3/40], Iter [106/145] Loss: 3.5208
Epoch [3/40], Iter [107/145] Loss: 3.9195
Epoch [3/40], Iter [108/145] Loss: 3.8469


  5%|▌         | 2/40 [00:31<07:10, 11.32s/it]

Epoch [3/40], Iter [109/145] Loss: 3.7570
Epoch [3/40], Iter [110/145] Loss: 3.8251
Epoch [3/40], Iter [111/145] Loss: 3.7729


  5%|▌         | 2/40 [00:31<07:10, 11.32s/it]

Epoch [3/40], Iter [112/145] Loss: 3.7987
Epoch [3/40], Iter [113/145] Loss: 3.7983
Epoch [3/40], Iter [114/145] Loss: 3.7458


  5%|▌         | 2/40 [00:31<07:10, 11.32s/it]

Epoch [3/40], Iter [115/145] Loss: 3.8880
Epoch [3/40], Iter [116/145] Loss: 3.5868
Epoch [3/40], Iter [117/145] Loss: 4.0040


  5%|▌         | 2/40 [00:31<07:10, 11.32s/it]

Epoch [3/40], Iter [118/145] Loss: 3.8182
Epoch [3/40], Iter [119/145] Loss: 3.8369
Epoch [3/40], Iter [120/145] Loss: 3.8021


  5%|▌         | 2/40 [00:32<07:10, 11.32s/it]

Epoch [3/40], Iter [121/145] Loss: 3.7208
Epoch [3/40], Iter [122/145] Loss: 3.9552
Epoch [3/40], Iter [123/145] Loss: 3.9561


  5%|▌         | 2/40 [00:32<07:10, 11.32s/it]

Epoch [3/40], Iter [124/145] Loss: 3.6644
Epoch [3/40], Iter [125/145] Loss: 3.7158
Epoch [3/40], Iter [126/145] Loss: 3.9865


  5%|▌         | 2/40 [00:32<07:10, 11.32s/it]

Epoch [3/40], Iter [127/145] Loss: 3.9290
Epoch [3/40], Iter [128/145] Loss: 4.0351
Epoch [3/40], Iter [129/145] Loss: 3.7790


  5%|▌         | 2/40 [00:32<07:10, 11.32s/it]

Epoch [3/40], Iter [130/145] Loss: 3.8376
Epoch [3/40], Iter [131/145] Loss: 3.9746
Epoch [3/40], Iter [132/145] Loss: 3.8456


  5%|▌         | 2/40 [00:32<07:10, 11.32s/it]

Epoch [3/40], Iter [133/145] Loss: 3.7880
Epoch [3/40], Iter [134/145] Loss: 3.6085
Epoch [3/40], Iter [135/145] Loss: 3.7961


  5%|▌         | 2/40 [00:33<07:10, 11.32s/it]

Epoch [3/40], Iter [136/145] Loss: 4.0078
Epoch [3/40], Iter [137/145] Loss: 3.8140
Epoch [3/40], Iter [138/145] Loss: 3.7543


  5%|▌         | 2/40 [00:33<07:10, 11.32s/it]

Epoch [3/40], Iter [139/145] Loss: 3.6935
Epoch [3/40], Iter [140/145] Loss: 3.6979
Epoch [3/40], Iter [141/145] Loss: 3.7179


  5%|▌         | 2/40 [00:33<07:10, 11.32s/it]

Epoch [3/40], Iter [142/145] Loss: 3.5675
Epoch [3/40], Iter [143/145] Loss: 3.8593
Epoch [3/40], Iter [144/145] Loss: 3.7337


  8%|▊         | 3/40 [00:33<06:56, 11.25s/it]

Epoch [3/40], Iter [145/145] Loss: 4.0261


Exception ignored in: <function _releaseLock at 0x7f91febe29d0>
Traceback (most recent call last):
  File "/home/anubhav/anaconda3/envs/torch/lib/python3.9/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 
  8%|▊         | 3/40 [00:34<06:56, 11.25s/it]

Epoch [4/40], Iter [1/145] Loss: 4.0097
Epoch [4/40], Iter [2/145] Loss: 3.5634
Epoch [4/40], Iter [3/145] Loss: 4.0465


  8%|▊         | 3/40 [00:34<06:56, 11.25s/it]

Epoch [4/40], Iter [4/145] Loss: 4.0674
Epoch [4/40], Iter [5/145] Loss: 3.5561
Epoch [4/40], Iter [6/145] Loss: 3.7254


  8%|▊         | 3/40 [00:34<06:56, 11.25s/it]

Epoch [4/40], Iter [7/145] Loss: 3.7628
Epoch [4/40], Iter [8/145] Loss: 3.7308
Epoch [4/40], Iter [9/145] Loss: 3.6739


  8%|▊         | 3/40 [00:34<06:56, 11.25s/it]

Epoch [4/40], Iter [10/145] Loss: 3.7432
Epoch [4/40], Iter [11/145] Loss: 3.7725
Epoch [4/40], Iter [12/145] Loss: 3.8529


  8%|▊         | 3/40 [00:35<06:56, 11.25s/it]

Epoch [4/40], Iter [13/145] Loss: 3.9385
Epoch [4/40], Iter [14/145] Loss: 4.1153
Epoch [4/40], Iter [15/145] Loss: 3.5894


  8%|▊         | 3/40 [00:35<06:56, 11.25s/it]

Epoch [4/40], Iter [16/145] Loss: 3.9518
Epoch [4/40], Iter [17/145] Loss: 3.6946
Epoch [4/40], Iter [18/145] Loss: 3.7750


  8%|▊         | 3/40 [00:35<06:56, 11.25s/it]

Epoch [4/40], Iter [19/145] Loss: 3.8294
Epoch [4/40], Iter [20/145] Loss: 4.0127
Epoch [4/40], Iter [21/145] Loss: 3.9833


  8%|▊         | 3/40 [00:35<06:56, 11.25s/it]

Epoch [4/40], Iter [22/145] Loss: 3.9968
Epoch [4/40], Iter [23/145] Loss: 3.8528
Epoch [4/40], Iter [24/145] Loss: 3.9569


  8%|▊         | 3/40 [00:36<06:56, 11.25s/it]

Epoch [4/40], Iter [25/145] Loss: 3.8984
Epoch [4/40], Iter [26/145] Loss: 4.0229
Epoch [4/40], Iter [27/145] Loss: 3.9333


  8%|▊         | 3/40 [00:36<06:56, 11.25s/it]

Epoch [4/40], Iter [28/145] Loss: 4.0018
Epoch [4/40], Iter [29/145] Loss: 3.5781
Epoch [4/40], Iter [30/145] Loss: 3.6637


  8%|▊         | 3/40 [00:36<06:56, 11.25s/it]

Epoch [4/40], Iter [31/145] Loss: 3.9236
Epoch [4/40], Iter [32/145] Loss: 3.9246
Epoch [4/40], Iter [33/145] Loss: 3.9048


  8%|▊         | 3/40 [00:36<06:56, 11.25s/it]

Epoch [4/40], Iter [34/145] Loss: 3.7018
Epoch [4/40], Iter [35/145] Loss: 3.8235
Epoch [4/40], Iter [36/145] Loss: 3.7798


  8%|▊         | 3/40 [00:37<06:56, 11.25s/it]

Epoch [4/40], Iter [37/145] Loss: 3.8235
Epoch [4/40], Iter [38/145] Loss: 4.0970
Epoch [4/40], Iter [39/145] Loss: 3.8545


  8%|▊         | 3/40 [00:37<06:56, 11.25s/it]

Epoch [4/40], Iter [40/145] Loss: 4.0307
Epoch [4/40], Iter [41/145] Loss: 3.9008
Epoch [4/40], Iter [42/145] Loss: 3.9474


  8%|▊         | 3/40 [00:37<06:56, 11.25s/it]

Epoch [4/40], Iter [43/145] Loss: 3.7455
Epoch [4/40], Iter [44/145] Loss: 3.5867
Epoch [4/40], Iter [45/145] Loss: 3.6303


  8%|▊         | 3/40 [00:37<06:56, 11.25s/it]

Epoch [4/40], Iter [46/145] Loss: 4.0340
Epoch [4/40], Iter [47/145] Loss: 3.9053
Epoch [4/40], Iter [48/145] Loss: 4.0950


  8%|▊         | 3/40 [00:38<06:56, 11.25s/it]

Epoch [4/40], Iter [49/145] Loss: 3.8609
Epoch [4/40], Iter [50/145] Loss: 3.8425
Epoch [4/40], Iter [51/145] Loss: 3.6067


  8%|▊         | 3/40 [00:38<06:56, 11.25s/it]

Epoch [4/40], Iter [52/145] Loss: 3.8335
Epoch [4/40], Iter [53/145] Loss: 3.7398
Epoch [4/40], Iter [54/145] Loss: 3.6529


  8%|▊         | 3/40 [00:38<06:56, 11.25s/it]

Epoch [4/40], Iter [55/145] Loss: 3.5941
Epoch [4/40], Iter [56/145] Loss: 3.8052
Epoch [4/40], Iter [57/145] Loss: 3.9585


  8%|▊         | 3/40 [00:38<06:56, 11.25s/it]

Epoch [4/40], Iter [58/145] Loss: 3.8952
Epoch [4/40], Iter [59/145] Loss: 3.7712
Epoch [4/40], Iter [60/145] Loss: 3.6493


  8%|▊         | 3/40 [00:39<06:56, 11.25s/it]

Epoch [4/40], Iter [61/145] Loss: 3.6332
Epoch [4/40], Iter [62/145] Loss: 4.0156
Epoch [4/40], Iter [63/145] Loss: 3.9570


  8%|▊         | 3/40 [00:39<06:56, 11.25s/it]

Epoch [4/40], Iter [64/145] Loss: 4.0182
Epoch [4/40], Iter [65/145] Loss: 3.6986
Epoch [4/40], Iter [66/145] Loss: 3.6744


  8%|▊         | 3/40 [00:39<06:56, 11.25s/it]

Epoch [4/40], Iter [67/145] Loss: 3.8890
Epoch [4/40], Iter [68/145] Loss: 3.7515
Epoch [4/40], Iter [69/145] Loss: 3.6661


  8%|▊         | 3/40 [00:39<06:56, 11.25s/it]

Epoch [4/40], Iter [70/145] Loss: 3.7672
Epoch [4/40], Iter [71/145] Loss: 3.8529
Epoch [4/40], Iter [72/145] Loss: 3.7520


  8%|▊         | 3/40 [00:40<06:56, 11.25s/it]

Epoch [4/40], Iter [73/145] Loss: 3.7784
Epoch [4/40], Iter [74/145] Loss: 3.8185
Epoch [4/40], Iter [75/145] Loss: 3.8721


  8%|▊         | 3/40 [00:40<06:56, 11.25s/it]

Epoch [4/40], Iter [76/145] Loss: 3.7795
Epoch [4/40], Iter [77/145] Loss: 3.8667
Epoch [4/40], Iter [78/145] Loss: 3.6676


  8%|▊         | 3/40 [00:40<06:56, 11.25s/it]

Epoch [4/40], Iter [79/145] Loss: 4.1227
Epoch [4/40], Iter [80/145] Loss: 3.7954
Epoch [4/40], Iter [81/145] Loss: 3.6836


  8%|▊         | 3/40 [00:40<06:56, 11.25s/it]

Epoch [4/40], Iter [82/145] Loss: 3.7510
Epoch [4/40], Iter [83/145] Loss: 3.8437
Epoch [4/40], Iter [84/145] Loss: 3.6625


  8%|▊         | 3/40 [00:41<06:56, 11.25s/it]

Epoch [4/40], Iter [85/145] Loss: 3.5549
Epoch [4/40], Iter [86/145] Loss: 4.0240
Epoch [4/40], Iter [87/145] Loss: 3.5628


  8%|▊         | 3/40 [00:41<06:56, 11.25s/it]

Epoch [4/40], Iter [88/145] Loss: 3.8680
Epoch [4/40], Iter [89/145] Loss: 3.8409
Epoch [4/40], Iter [90/145] Loss: 4.0047


  8%|▊         | 3/40 [00:41<06:56, 11.25s/it]

Epoch [4/40], Iter [91/145] Loss: 3.8424
Epoch [4/40], Iter [92/145] Loss: 3.6068
Epoch [4/40], Iter [93/145] Loss: 3.7788


  8%|▊         | 3/40 [00:41<06:56, 11.25s/it]

Epoch [4/40], Iter [94/145] Loss: 4.1633
Epoch [4/40], Iter [95/145] Loss: 3.9686
Epoch [4/40], Iter [96/145] Loss: 3.9152


  8%|▊         | 3/40 [00:42<06:56, 11.25s/it]

Epoch [4/40], Iter [97/145] Loss: 3.8405
Epoch [4/40], Iter [98/145] Loss: 3.9045
Epoch [4/40], Iter [99/145] Loss: 4.2008


  8%|▊         | 3/40 [00:42<06:56, 11.25s/it]

Epoch [4/40], Iter [100/145] Loss: 3.8179
Epoch [4/40], Iter [101/145] Loss: 3.9133
Epoch [4/40], Iter [102/145] Loss: 3.6686


  8%|▊         | 3/40 [00:42<06:56, 11.25s/it]

Epoch [4/40], Iter [103/145] Loss: 3.6887
Epoch [4/40], Iter [104/145] Loss: 4.1610
Epoch [4/40], Iter [105/145] Loss: 3.8093


  8%|▊         | 3/40 [00:42<06:56, 11.25s/it]

Epoch [4/40], Iter [106/145] Loss: 4.0077
Epoch [4/40], Iter [107/145] Loss: 3.7307
Epoch [4/40], Iter [108/145] Loss: 3.6331


  8%|▊         | 3/40 [00:42<06:56, 11.25s/it]

Epoch [4/40], Iter [109/145] Loss: 3.7673
Epoch [4/40], Iter [110/145] Loss: 3.8413
Epoch [4/40], Iter [111/145] Loss: 3.8220


  8%|▊         | 3/40 [00:43<06:56, 11.25s/it]

Epoch [4/40], Iter [112/145] Loss: 3.8124
Epoch [4/40], Iter [113/145] Loss: 4.0057
Epoch [4/40], Iter [114/145] Loss: 3.9798


  8%|▊         | 3/40 [00:43<06:56, 11.25s/it]

Epoch [4/40], Iter [115/145] Loss: 3.8728
Epoch [4/40], Iter [116/145] Loss: 3.6164
Epoch [4/40], Iter [117/145] Loss: 3.9595


  8%|▊         | 3/40 [00:43<06:56, 11.25s/it]

Epoch [4/40], Iter [118/145] Loss: 4.0537
Epoch [4/40], Iter [119/145] Loss: 4.0000
Epoch [4/40], Iter [120/145] Loss: 3.7830


  8%|▊         | 3/40 [00:43<06:56, 11.25s/it]

Epoch [4/40], Iter [121/145] Loss: 3.9217
Epoch [4/40], Iter [122/145] Loss: 4.0975
Epoch [4/40], Iter [123/145] Loss: 4.0012


  8%|▊         | 3/40 [00:44<06:56, 11.25s/it]

Epoch [4/40], Iter [124/145] Loss: 4.0101
Epoch [4/40], Iter [125/145] Loss: 3.9226
Epoch [4/40], Iter [126/145] Loss: 3.9344


  8%|▊         | 3/40 [00:44<06:56, 11.25s/it]

Epoch [4/40], Iter [127/145] Loss: 3.8148
Epoch [4/40], Iter [128/145] Loss: 3.9343
Epoch [4/40], Iter [129/145] Loss: 3.8706


  8%|▊         | 3/40 [00:44<06:56, 11.25s/it]

Epoch [4/40], Iter [130/145] Loss: 3.7623
Epoch [4/40], Iter [131/145] Loss: 3.6922
Epoch [4/40], Iter [132/145] Loss: 3.6494


  8%|▊         | 3/40 [00:44<06:56, 11.25s/it]

Epoch [4/40], Iter [133/145] Loss: 3.9558
Epoch [4/40], Iter [134/145] Loss: 3.7424
Epoch [4/40], Iter [135/145] Loss: 3.4986


  8%|▊         | 3/40 [00:45<06:56, 11.25s/it]

Epoch [4/40], Iter [136/145] Loss: 4.0030
Epoch [4/40], Iter [137/145] Loss: 3.7399
Epoch [4/40], Iter [138/145] Loss: 3.4127


  8%|▊         | 3/40 [00:45<06:56, 11.25s/it]

Epoch [4/40], Iter [139/145] Loss: 3.7818
Epoch [4/40], Iter [140/145] Loss: 3.9451
Epoch [4/40], Iter [141/145] Loss: 3.6385


  8%|▊         | 3/40 [00:45<06:56, 11.25s/it]

Epoch [4/40], Iter [142/145] Loss: 3.7821
Epoch [4/40], Iter [143/145] Loss: 4.0426
Epoch [4/40], Iter [144/145] Loss: 3.7346


 10%|█         | 4/40 [00:45<06:54, 11.52s/it]

Epoch [4/40], Iter [145/145] Loss: 3.7662


 10%|█         | 4/40 [00:46<06:54, 11.52s/it]

Epoch [5/40], Iter [1/145] Loss: 4.0589
Epoch [5/40], Iter [2/145] Loss: 3.6996
Epoch [5/40], Iter [3/145] Loss: 4.0349


 10%|█         | 4/40 [00:46<06:54, 11.52s/it]

Epoch [5/40], Iter [4/145] Loss: 3.8773
Epoch [5/40], Iter [5/145] Loss: 3.9111
Epoch [5/40], Iter [6/145] Loss: 3.8756


 10%|█         | 4/40 [00:46<06:54, 11.52s/it]

Epoch [5/40], Iter [7/145] Loss: 3.7131
Epoch [5/40], Iter [8/145] Loss: 3.6983
Epoch [5/40], Iter [9/145] Loss: 3.6903


 10%|█         | 4/40 [00:46<06:54, 11.52s/it]

Epoch [5/40], Iter [10/145] Loss: 3.6196
Epoch [5/40], Iter [11/145] Loss: 3.6045
Epoch [5/40], Iter [12/145] Loss: 3.8214


 10%|█         | 4/40 [00:47<06:54, 11.52s/it]

Epoch [5/40], Iter [13/145] Loss: 3.8095
Epoch [5/40], Iter [14/145] Loss: 3.9185
Epoch [5/40], Iter [15/145] Loss: 3.9234


 10%|█         | 4/40 [00:47<06:54, 11.52s/it]

Epoch [5/40], Iter [16/145] Loss: 4.0226
Epoch [5/40], Iter [17/145] Loss: 3.9620
Epoch [5/40], Iter [18/145] Loss: 3.9608


 10%|█         | 4/40 [00:47<06:54, 11.52s/it]

Epoch [5/40], Iter [19/145] Loss: 3.6889
Epoch [5/40], Iter [20/145] Loss: 3.9672
Epoch [5/40], Iter [21/145] Loss: 3.8006


 10%|█         | 4/40 [00:47<06:54, 11.52s/it]

Epoch [5/40], Iter [22/145] Loss: 4.1092
Epoch [5/40], Iter [23/145] Loss: 3.9583
Epoch [5/40], Iter [24/145] Loss: 3.6923


 10%|█         | 4/40 [00:48<06:54, 11.52s/it]

Epoch [5/40], Iter [25/145] Loss: 3.8896
Epoch [5/40], Iter [26/145] Loss: 3.9435
Epoch [5/40], Iter [27/145] Loss: 3.7954


 10%|█         | 4/40 [00:48<06:54, 11.52s/it]

Epoch [5/40], Iter [28/145] Loss: 3.8436
Epoch [5/40], Iter [29/145] Loss: 3.6829
Epoch [5/40], Iter [30/145] Loss: 3.8445


 10%|█         | 4/40 [00:48<06:54, 11.52s/it]

Epoch [5/40], Iter [31/145] Loss: 3.8311
Epoch [5/40], Iter [32/145] Loss: 3.9957
Epoch [5/40], Iter [33/145] Loss: 3.8716


 10%|█         | 4/40 [00:48<06:54, 11.52s/it]

Epoch [5/40], Iter [34/145] Loss: 3.8568
Epoch [5/40], Iter [35/145] Loss: 4.2020
Epoch [5/40], Iter [36/145] Loss: 3.7930


 10%|█         | 4/40 [00:49<06:54, 11.52s/it]

Epoch [5/40], Iter [37/145] Loss: 3.7594
Epoch [5/40], Iter [38/145] Loss: 3.7128
Epoch [5/40], Iter [39/145] Loss: 3.8910


 10%|█         | 4/40 [00:49<06:54, 11.52s/it]

Epoch [5/40], Iter [40/145] Loss: 3.8189
Epoch [5/40], Iter [41/145] Loss: 3.4435
Epoch [5/40], Iter [42/145] Loss: 4.2510


 10%|█         | 4/40 [00:49<06:54, 11.52s/it]

Epoch [5/40], Iter [43/145] Loss: 3.5929
Epoch [5/40], Iter [44/145] Loss: 3.8180
Epoch [5/40], Iter [45/145] Loss: 3.6954


 10%|█         | 4/40 [00:49<06:54, 11.52s/it]

Epoch [5/40], Iter [46/145] Loss: 3.8275
Epoch [5/40], Iter [47/145] Loss: 3.8991
Epoch [5/40], Iter [48/145] Loss: 3.8240


 10%|█         | 4/40 [00:50<06:54, 11.52s/it]

Epoch [5/40], Iter [49/145] Loss: 3.7891
Epoch [5/40], Iter [50/145] Loss: 3.6125
Epoch [5/40], Iter [51/145] Loss: 3.8325


 10%|█         | 4/40 [00:50<06:54, 11.52s/it]

Epoch [5/40], Iter [52/145] Loss: 4.1198
Epoch [5/40], Iter [53/145] Loss: 3.8447
Epoch [5/40], Iter [54/145] Loss: 3.8472


 10%|█         | 4/40 [00:50<06:54, 11.52s/it]

Epoch [5/40], Iter [55/145] Loss: 3.8145
Epoch [5/40], Iter [56/145] Loss: 4.1437
Epoch [5/40], Iter [57/145] Loss: 3.7243


 10%|█         | 4/40 [00:50<06:54, 11.52s/it]

Epoch [5/40], Iter [58/145] Loss: 3.8507
Epoch [5/40], Iter [59/145] Loss: 3.6471
Epoch [5/40], Iter [60/145] Loss: 4.0803


 10%|█         | 4/40 [00:51<06:54, 11.52s/it]