In [2]:
from model import Model
import torch
torch.backends.cudnn.benchmark=True
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import argparse
import time
import numpy as np
import subprocess
from numpy import random
import copy
device="cpu"
from tqdm import tqdm

# import matplotlib.pyplot as plt
from data_loader import genomics_data

In [3]:
def kaiming_normal_init(m):
	if isinstance(m, nn.Conv2d):
		nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
	elif isinstance(m, nn.Linear):
		nn.init.kaiming_normal_(m.weight, nonlinearity='sigmoid')


class Model(nn.Module):
	def __init__(self, classes):
		# Hyper Parameters
		self.init_lr = 0.1
		self.num_epochs = 40
		self.batch_size = 48
		
		self.pretrained = False
		self.momentum = 0.9
		self.weight_decay = 0.0001
		# Constant to provide numerical stability while normalizing
		self.epsilon = 1e-16

		# Network architecture
		super(Model, self).__init__()
		model = nn.Sequential(
		nn.Conv1d(1, 6, kernel_size=6),
		nn.ReLU(),
		nn.Conv1d(6, 3, kernel_size=6),
		nn.ReLU(),
		nn.Flatten(),
		nn.Linear(12258,1024),
		nn.ReLU()
		)

		self.feature_extractor=model

		self.feature_extractor.apply(kaiming_normal_init)
		self.fc = nn.Linear(1024, classes, bias=False)

		self.n_classes = 0

	def forward(self, x):
		x = self.feature_extractor(x)
		x = x.view(x.size(0), -1)
		x = self.fc(x)
		return x



	def classify(self, images):
		"""Classify images by softmax

		Args:
			x: input image batch
		Returns:
			preds: Tensor of size (batch_size,)
		"""
		_, preds = torch.max(torch.softmax(self.forward(images), dim=1), dim=1, keepdim=False)

		return preds

	def update(self, dataset, class_map):

		self.compute_means = True

		# Save a copy to compute distillation outputs


		classes = list(set(dataset.train_labels))

		loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size,
											   shuffle=True, num_workers=8)

		print("Batch Size (for n_classes classes) : ", len(dataset))
		optimizer = optim.SGD(self.parameters(), lr=self.init_lr, momentum = self.momentum, weight_decay=self.weight_decay)

		with tqdm(total=self.num_epochs) as pbar:
			for epoch in range(self.num_epochs):
				
				for i, (indices, images, labels) in enumerate(loader):
					seen_labels = []
					images = Variable(torch.FloatTensor(images)).to(device)
					seen_labels = torch.LongTensor([class_map[label] for label in labels.numpy()])
					labels = Variable(seen_labels).to(device)
					# indices = indices.cuda()

					optimizer.zero_grad()
					logits = self.forward(images)

					loss = nn.CrossEntropyLoss()(logits, labels)

					loss.backward()
					optimizer.step()

					if (i+1) % 1 == 0:
						tqdm.write('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
							   %(epoch+1, self.num_epochs, i+1, np.ceil(len(dataset)/self.batch_size), loss.data))

				pbar.update(1)

In [4]:
model = Model(66)
model.to(device)
all_classes = np.arange(66)

train_set = genomics_data(train=True,classes=all_classes)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=48,
													shuffle=True, num_workers=8)

test_set = genomics_data(train=False,classes=all_classes)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=48,
                                            shuffle=False, num_workers=8)


model.update(train_set, all_classes)

Batch Size (for n_classes classes) :  6947


  0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/40], Iter [1/145] Loss: 4.2125
Epoch [1/40], Iter [2/145] Loss: 4.0463
Epoch [1/40], Iter [3/145] Loss: 3.4918


  0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/40], Iter [4/145] Loss: 8.5089
Epoch [1/40], Iter [5/145] Loss: 4.1945
Epoch [1/40], Iter [6/145] Loss: 4.2024


  0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/40], Iter [7/145] Loss: 4.1974
Epoch [1/40], Iter [8/145] Loss: 4.1923
Epoch [1/40], Iter [9/145] Loss: 4.1906


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [10/145] Loss: 4.1889
Epoch [1/40], Iter [11/145] Loss: 4.1872
Epoch [1/40], Iter [12/145] Loss: 4.1881


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [13/145] Loss: 4.1831
Epoch [1/40], Iter [14/145] Loss: 4.1862
Epoch [1/40], Iter [15/145] Loss: 4.1833


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [16/145] Loss: 4.1833
Epoch [1/40], Iter [17/145] Loss: 4.1796
Epoch [1/40], Iter [18/145] Loss: 4.1803


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [19/145] Loss: 4.1739
Epoch [1/40], Iter [20/145] Loss: 4.1822
Epoch [1/40], Iter [21/145] Loss: 4.1733


  0%|          | 0/40 [00:01<?, ?it/s]

Epoch [1/40], Iter [22/145] Loss: 4.1776
Epoch [1/40], Iter [23/145] Loss: 4.1503
Epoch [1/40], Iter [24/145] Loss: 4.1789


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [25/145] Loss: 4.1456
Epoch [1/40], Iter [26/145] Loss: 4.1657
Epoch [1/40], Iter [27/145] Loss: 4.1490


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [28/145] Loss: 4.1443
Epoch [1/40], Iter [29/145] Loss: 4.1241
Epoch [1/40], Iter [30/145] Loss: 4.0941


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [31/145] Loss: 4.0659
Epoch [1/40], Iter [32/145] Loss: 4.1345
Epoch [1/40], Iter [33/145] Loss: 4.0920


  0%|          | 0/40 [00:02<?, ?it/s]

Epoch [1/40], Iter [34/145] Loss: 4.0813
Epoch [1/40], Iter [35/145] Loss: 4.0013
Epoch [1/40], Iter [36/145] Loss: 3.9548


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [37/145] Loss: 3.9267
Epoch [1/40], Iter [38/145] Loss: 3.9794
Epoch [1/40], Iter [39/145] Loss: 3.9256


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [40/145] Loss: 3.7475
Epoch [1/40], Iter [41/145] Loss: 3.9238
Epoch [1/40], Iter [42/145] Loss: 3.8157


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [43/145] Loss: 3.9770
Epoch [1/40], Iter [44/145] Loss: 4.0178
Epoch [1/40], Iter [45/145] Loss: 4.0288


  0%|          | 0/40 [00:03<?, ?it/s]

Epoch [1/40], Iter [46/145] Loss: 3.7720
Epoch [1/40], Iter [47/145] Loss: 3.9248
Epoch [1/40], Iter [48/145] Loss: 3.7649


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [49/145] Loss: 4.1118
Epoch [1/40], Iter [50/145] Loss: 3.9860
Epoch [1/40], Iter [51/145] Loss: 3.9436


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [52/145] Loss: 3.8041
Epoch [1/40], Iter [53/145] Loss: 3.8189
Epoch [1/40], Iter [54/145] Loss: 3.8037


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [55/145] Loss: 3.7042
Epoch [1/40], Iter [56/145] Loss: 3.6185
Epoch [1/40], Iter [57/145] Loss: 3.8268


  0%|          | 0/40 [00:04<?, ?it/s]

Epoch [1/40], Iter [58/145] Loss: 3.6658
Epoch [1/40], Iter [59/145] Loss: 3.8022
Epoch [1/40], Iter [60/145] Loss: 3.8664


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [61/145] Loss: 3.7623
Epoch [1/40], Iter [62/145] Loss: 4.2333
Epoch [1/40], Iter [63/145] Loss: 4.0211


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [64/145] Loss: 3.9838
Epoch [1/40], Iter [65/145] Loss: 3.7170
Epoch [1/40], Iter [66/145] Loss: 3.6162


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [67/145] Loss: 3.6225
Epoch [1/40], Iter [68/145] Loss: 3.6898
Epoch [1/40], Iter [69/145] Loss: 3.9669


  0%|          | 0/40 [00:05<?, ?it/s]

Epoch [1/40], Iter [70/145] Loss: 3.9336
Epoch [1/40], Iter [71/145] Loss: 4.1694
Epoch [1/40], Iter [72/145] Loss: 3.6277


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [73/145] Loss: 4.0223
Epoch [1/40], Iter [74/145] Loss: 3.8795
Epoch [1/40], Iter [75/145] Loss: 3.8521


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [76/145] Loss: 3.7358
Epoch [1/40], Iter [77/145] Loss: 4.0366
Epoch [1/40], Iter [78/145] Loss: 3.9832


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [79/145] Loss: 3.7721
Epoch [1/40], Iter [80/145] Loss: 3.7714
Epoch [1/40], Iter [81/145] Loss: 3.8300


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [82/145] Loss: 3.9466
Epoch [1/40], Iter [83/145] Loss: 3.8553
Epoch [1/40], Iter [84/145] Loss: 3.9269


  0%|          | 0/40 [00:06<?, ?it/s]

Epoch [1/40], Iter [85/145] Loss: 3.7481
Epoch [1/40], Iter [86/145] Loss: 3.9072
Epoch [1/40], Iter [87/145] Loss: 3.8906


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [88/145] Loss: 3.8591
Epoch [1/40], Iter [89/145] Loss: 4.0057
Epoch [1/40], Iter [90/145] Loss: 3.8686


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [91/145] Loss: 3.9763
Epoch [1/40], Iter [92/145] Loss: 3.8127
Epoch [1/40], Iter [93/145] Loss: 3.9526


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [94/145] Loss: 4.0031
Epoch [1/40], Iter [95/145] Loss: 4.0179
Epoch [1/40], Iter [96/145] Loss: 3.8328


  0%|          | 0/40 [00:07<?, ?it/s]

Epoch [1/40], Iter [97/145] Loss: 3.8907
Epoch [1/40], Iter [98/145] Loss: 4.0342
Epoch [1/40], Iter [99/145] Loss: 4.0953


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [100/145] Loss: 3.8852
Epoch [1/40], Iter [101/145] Loss: 3.9822
Epoch [1/40], Iter [102/145] Loss: 3.9966


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [103/145] Loss: 3.9414
Epoch [1/40], Iter [104/145] Loss: 3.8726
Epoch [1/40], Iter [105/145] Loss: 4.0268


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [106/145] Loss: 3.7026
Epoch [1/40], Iter [107/145] Loss: 3.8489
Epoch [1/40], Iter [108/145] Loss: 3.9552


  0%|          | 0/40 [00:08<?, ?it/s]

Epoch [1/40], Iter [109/145] Loss: 3.5893
Epoch [1/40], Iter [110/145] Loss: 3.7323
Epoch [1/40], Iter [111/145] Loss: 3.7654


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [112/145] Loss: 3.8780
Epoch [1/40], Iter [113/145] Loss: 3.7978
Epoch [1/40], Iter [114/145] Loss: 3.8177


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [115/145] Loss: 3.9401
Epoch [1/40], Iter [116/145] Loss: 3.6073
Epoch [1/40], Iter [117/145] Loss: 4.0039


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [118/145] Loss: 3.8758
Epoch [1/40], Iter [119/145] Loss: 3.6840
Epoch [1/40], Iter [120/145] Loss: 3.5938


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [121/145] Loss: 3.7513
Epoch [1/40], Iter [122/145] Loss: 3.9865
Epoch [1/40], Iter [123/145] Loss: 4.0137


  0%|          | 0/40 [00:09<?, ?it/s]

Epoch [1/40], Iter [124/145] Loss: 3.7298
Epoch [1/40], Iter [125/145] Loss: 3.6907
Epoch [1/40], Iter [126/145] Loss: 3.8547


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [127/145] Loss: 3.6993
Epoch [1/40], Iter [128/145] Loss: 3.9330
Epoch [1/40], Iter [129/145] Loss: 3.8308


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [130/145] Loss: 3.7499
Epoch [1/40], Iter [131/145] Loss: 3.8117
Epoch [1/40], Iter [132/145] Loss: 3.9200


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [133/145] Loss: 3.8477
Epoch [1/40], Iter [134/145] Loss: 3.8261
Epoch [1/40], Iter [135/145] Loss: 3.8020


  0%|          | 0/40 [00:10<?, ?it/s]

Epoch [1/40], Iter [136/145] Loss: 3.5403
Epoch [1/40], Iter [137/145] Loss: 3.6937
Epoch [1/40], Iter [138/145] Loss: 3.9621


  0%|          | 0/40 [00:11<?, ?it/s]

Epoch [1/40], Iter [139/145] Loss: 4.0094
Epoch [1/40], Iter [140/145] Loss: 3.8026
Epoch [1/40], Iter [141/145] Loss: 3.8889


  0%|          | 0/40 [00:11<?, ?it/s]

Epoch [1/40], Iter [142/145] Loss: 3.8403
Epoch [1/40], Iter [143/145] Loss: 3.9670
Epoch [1/40], Iter [144/145] Loss: 3.6564


  2%|▎         | 1/40 [00:11<07:25, 11.43s/it]

Epoch [1/40], Iter [145/145] Loss: 3.7967


  2%|▎         | 1/40 [00:11<07:25, 11.43s/it]

Epoch [2/40], Iter [1/145] Loss: 3.8850
Epoch [2/40], Iter [2/145] Loss: 3.9697
Epoch [2/40], Iter [3/145] Loss: 3.7083


  2%|▎         | 1/40 [00:12<07:25, 11.43s/it]

Epoch [2/40], Iter [4/145] Loss: 3.9207
Epoch [2/40], Iter [5/145] Loss: 3.9355
Epoch [2/40], Iter [6/145] Loss: 3.7584


  2%|▎         | 1/40 [00:12<07:25, 11.43s/it]

Epoch [2/40], Iter [7/145] Loss: 3.7526
Epoch [2/40], Iter [8/145] Loss: 3.7802
Epoch [2/40], Iter [9/145] Loss: 3.8686


  2%|▎         | 1/40 [00:12<07:25, 11.43s/it]

Epoch [2/40], Iter [10/145] Loss: 3.8108
Epoch [2/40], Iter [11/145] Loss: 3.8023
Epoch [2/40], Iter [12/145] Loss: 3.9192


  2%|▎         | 1/40 [00:12<07:25, 11.43s/it]

Epoch [2/40], Iter [13/145] Loss: 3.8211
Epoch [2/40], Iter [14/145] Loss: 3.9950
Epoch [2/40], Iter [15/145] Loss: 3.9202


  2%|▎         | 1/40 [00:12<07:25, 11.43s/it]

Epoch [2/40], Iter [16/145] Loss: 3.8305
Epoch [2/40], Iter [17/145] Loss: 4.0219
Epoch [2/40], Iter [18/145] Loss: 3.8042


  2%|▎         | 1/40 [00:13<07:25, 11.43s/it]

Epoch [2/40], Iter [19/145] Loss: 3.8010
Epoch [2/40], Iter [20/145] Loss: 3.9686
Epoch [2/40], Iter [21/145] Loss: 3.8853


  2%|▎         | 1/40 [00:13<07:25, 11.43s/it]

Epoch [2/40], Iter [22/145] Loss: 4.0723
Epoch [2/40], Iter [23/145] Loss: 3.8979
Epoch [2/40], Iter [24/145] Loss: 3.9342


  2%|▎         | 1/40 [00:13<07:25, 11.43s/it]

Epoch [2/40], Iter [25/145] Loss: 3.8138
Epoch [2/40], Iter [26/145] Loss: 3.7841
Epoch [2/40], Iter [27/145] Loss: 3.9608


  2%|▎         | 1/40 [00:13<07:25, 11.43s/it]

Epoch [2/40], Iter [28/145] Loss: 3.7344
Epoch [2/40], Iter [29/145] Loss: 3.9509
Epoch [2/40], Iter [30/145] Loss: 3.9745


  2%|▎         | 1/40 [00:14<07:25, 11.43s/it]

Epoch [2/40], Iter [31/145] Loss: 3.9326
Epoch [2/40], Iter [32/145] Loss: 3.9591
Epoch [2/40], Iter [33/145] Loss: 3.7692


  2%|▎         | 1/40 [00:14<07:25, 11.43s/it]

Epoch [2/40], Iter [34/145] Loss: 4.0419
Epoch [2/40], Iter [35/145] Loss: 4.0306
Epoch [2/40], Iter [36/145] Loss: 3.8452


  2%|▎         | 1/40 [00:14<07:25, 11.43s/it]

Epoch [2/40], Iter [37/145] Loss: 3.8196
Epoch [2/40], Iter [38/145] Loss: 3.9649
Epoch [2/40], Iter [39/145] Loss: 3.8928


  2%|▎         | 1/40 [00:14<07:25, 11.43s/it]

Epoch [2/40], Iter [40/145] Loss: 4.0109
Epoch [2/40], Iter [41/145] Loss: 3.8391
Epoch [2/40], Iter [42/145] Loss: 4.0737


  2%|▎         | 1/40 [00:15<07:25, 11.43s/it]

Epoch [2/40], Iter [43/145] Loss: 3.9366
Epoch [2/40], Iter [44/145] Loss: 3.6534
Epoch [2/40], Iter [45/145] Loss: 3.9400


  2%|▎         | 1/40 [00:15<07:25, 11.43s/it]

Epoch [2/40], Iter [46/145] Loss: 3.6695
Epoch [2/40], Iter [47/145] Loss: 3.8416
Epoch [2/40], Iter [48/145] Loss: 3.7164


  2%|▎         | 1/40 [00:15<07:25, 11.43s/it]

Epoch [2/40], Iter [49/145] Loss: 3.7416
Epoch [2/40], Iter [50/145] Loss: 3.7800
Epoch [2/40], Iter [51/145] Loss: 4.0542


  2%|▎         | 1/40 [00:15<07:25, 11.43s/it]

Epoch [2/40], Iter [52/145] Loss: 3.9329
Epoch [2/40], Iter [53/145] Loss: 3.7337
Epoch [2/40], Iter [54/145] Loss: 4.0370


  2%|▎         | 1/40 [00:15<07:25, 11.43s/it]

Epoch [2/40], Iter [55/145] Loss: 4.1373
Epoch [2/40], Iter [56/145] Loss: 3.7606
Epoch [2/40], Iter [57/145] Loss: 3.9192


  2%|▎         | 1/40 [00:16<07:25, 11.43s/it]

Epoch [2/40], Iter [58/145] Loss: 3.8591
Epoch [2/40], Iter [59/145] Loss: 3.7264
Epoch [2/40], Iter [60/145] Loss: 3.8654


  2%|▎         | 1/40 [00:16<07:25, 11.43s/it]

Epoch [2/40], Iter [61/145] Loss: 3.6356
Epoch [2/40], Iter [62/145] Loss: 3.9946
Epoch [2/40], Iter [63/145] Loss: 3.6577


  2%|▎         | 1/40 [00:16<07:25, 11.43s/it]

Epoch [2/40], Iter [64/145] Loss: 3.9562
Epoch [2/40], Iter [65/145] Loss: 3.8558
Epoch [2/40], Iter [66/145] Loss: 3.7117


  2%|▎         | 1/40 [00:16<07:25, 11.43s/it]

Epoch [2/40], Iter [67/145] Loss: 3.7295
Epoch [2/40], Iter [68/145] Loss: 3.7959
Epoch [2/40], Iter [69/145] Loss: 3.7042


  2%|▎         | 1/40 [00:17<07:25, 11.43s/it]

Epoch [2/40], Iter [70/145] Loss: 3.8497
Epoch [2/40], Iter [71/145] Loss: 3.6546
Epoch [2/40], Iter [72/145] Loss: 4.2204


  2%|▎         | 1/40 [00:17<07:25, 11.43s/it]

Epoch [2/40], Iter [73/145] Loss: 3.6776
Epoch [2/40], Iter [74/145] Loss: 4.0044
Epoch [2/40], Iter [75/145] Loss: 3.8715


  2%|▎         | 1/40 [00:17<07:25, 11.43s/it]

Epoch [2/40], Iter [76/145] Loss: 3.5533
Epoch [2/40], Iter [77/145] Loss: 3.7612
Epoch [2/40], Iter [78/145] Loss: 3.4795


  2%|▎         | 1/40 [00:17<07:25, 11.43s/it]

Epoch [2/40], Iter [79/145] Loss: 3.5646
Epoch [2/40], Iter [80/145] Loss: 3.7460
Epoch [2/40], Iter [81/145] Loss: 4.1052


  2%|▎         | 1/40 [00:17<07:25, 11.43s/it]

Epoch [2/40], Iter [82/145] Loss: 3.9390
Epoch [2/40], Iter [83/145] Loss: 3.4790
Epoch [2/40], Iter [84/145] Loss: 3.8138


  2%|▎         | 1/40 [00:18<07:25, 11.43s/it]

Epoch [2/40], Iter [85/145] Loss: 3.8467
Epoch [2/40], Iter [86/145] Loss: 3.7250
Epoch [2/40], Iter [87/145] Loss: 3.8988


  2%|▎         | 1/40 [00:18<07:25, 11.43s/it]

Epoch [2/40], Iter [88/145] Loss: 3.7399
Epoch [2/40], Iter [89/145] Loss: 3.9875
Epoch [2/40], Iter [90/145] Loss: 3.7150


  2%|▎         | 1/40 [00:18<07:25, 11.43s/it]

Epoch [2/40], Iter [91/145] Loss: 3.5814
Epoch [2/40], Iter [92/145] Loss: 3.8924
Epoch [2/40], Iter [93/145] Loss: 3.9017


  2%|▎         | 1/40 [00:18<07:25, 11.43s/it]

Epoch [2/40], Iter [94/145] Loss: 3.8870
Epoch [2/40], Iter [95/145] Loss: 3.5800
Epoch [2/40], Iter [96/145] Loss: 3.7237


  2%|▎         | 1/40 [00:19<07:25, 11.43s/it]

Epoch [2/40], Iter [97/145] Loss: 3.9087
Epoch [2/40], Iter [98/145] Loss: 3.5419
Epoch [2/40], Iter [99/145] Loss: 3.6626


  2%|▎         | 1/40 [00:19<07:25, 11.43s/it]

Epoch [2/40], Iter [100/145] Loss: 3.9092
Epoch [2/40], Iter [101/145] Loss: 3.9486
Epoch [2/40], Iter [102/145] Loss: 3.6194


  2%|▎         | 1/40 [00:19<07:25, 11.43s/it]

Epoch [2/40], Iter [103/145] Loss: 3.7992
Epoch [2/40], Iter [104/145] Loss: 3.9489
Epoch [2/40], Iter [105/145] Loss: 3.8334


  2%|▎         | 1/40 [00:19<07:25, 11.43s/it]

Epoch [2/40], Iter [106/145] Loss: 3.9432
Epoch [2/40], Iter [107/145] Loss: 3.9745
Epoch [2/40], Iter [108/145] Loss: 3.9606


  2%|▎         | 1/40 [00:20<07:25, 11.43s/it]

Epoch [2/40], Iter [109/145] Loss: 3.9173
Epoch [2/40], Iter [110/145] Loss: 3.9241
Epoch [2/40], Iter [111/145] Loss: 3.5890


  2%|▎         | 1/40 [00:20<07:25, 11.43s/it]

Epoch [2/40], Iter [112/145] Loss: 4.0071
Epoch [2/40], Iter [113/145] Loss: 3.9876
Epoch [2/40], Iter [114/145] Loss: 3.6199


  2%|▎         | 1/40 [00:20<07:25, 11.43s/it]

Epoch [2/40], Iter [115/145] Loss: 3.8929
Epoch [2/40], Iter [116/145] Loss: 3.9711
Epoch [2/40], Iter [117/145] Loss: 3.9186


  2%|▎         | 1/40 [00:20<07:25, 11.43s/it]

Epoch [2/40], Iter [118/145] Loss: 3.8014
Epoch [2/40], Iter [119/145] Loss: 3.8242
Epoch [2/40], Iter [120/145] Loss: 3.9087


  2%|▎         | 1/40 [00:20<07:25, 11.43s/it]

Epoch [2/40], Iter [121/145] Loss: 3.8907
Epoch [2/40], Iter [122/145] Loss: 3.9367
Epoch [2/40], Iter [123/145] Loss: 3.8104


  2%|▎         | 1/40 [00:21<07:25, 11.43s/it]

Epoch [2/40], Iter [124/145] Loss: 3.8714
Epoch [2/40], Iter [125/145] Loss: 3.9019
Epoch [2/40], Iter [126/145] Loss: 3.9542


  2%|▎         | 1/40 [00:21<07:25, 11.43s/it]

Epoch [2/40], Iter [127/145] Loss: 3.7401
Epoch [2/40], Iter [128/145] Loss: 3.8408
Epoch [2/40], Iter [129/145] Loss: 3.7739


  2%|▎         | 1/40 [00:21<07:25, 11.43s/it]

Epoch [2/40], Iter [130/145] Loss: 3.7991
Epoch [2/40], Iter [131/145] Loss: 3.8351
Epoch [2/40], Iter [132/145] Loss: 3.8993


  2%|▎         | 1/40 [00:21<07:25, 11.43s/it]

Epoch [2/40], Iter [133/145] Loss: 3.6386
Epoch [2/40], Iter [134/145] Loss: 3.6381
Epoch [2/40], Iter [135/145] Loss: 3.6524


  2%|▎         | 1/40 [00:22<07:25, 11.43s/it]

Epoch [2/40], Iter [136/145] Loss: 3.5950
Epoch [2/40], Iter [137/145] Loss: 3.7080
Epoch [2/40], Iter [138/145] Loss: 3.9075


  2%|▎         | 1/40 [00:22<07:25, 11.43s/it]

Epoch [2/40], Iter [139/145] Loss: 3.8258
Epoch [2/40], Iter [140/145] Loss: 3.7076
Epoch [2/40], Iter [141/145] Loss: 3.8689


  2%|▎         | 1/40 [00:22<07:25, 11.43s/it]

Epoch [2/40], Iter [142/145] Loss: 3.6258
Epoch [2/40], Iter [143/145] Loss: 3.8715
Epoch [2/40], Iter [144/145] Loss: 3.9680


  5%|▌         | 2/40 [00:22<07:10, 11.33s/it]

Epoch [2/40], Iter [145/145] Loss: 3.6473


  5%|▌         | 2/40 [00:23<07:10, 11.33s/it]

Epoch [3/40], Iter [1/145] Loss: 3.8926
Epoch [3/40], Iter [2/145] Loss: 3.7772
Epoch [3/40], Iter [3/145] Loss: 3.7917


  5%|▌         | 2/40 [00:23<07:10, 11.33s/it]

Epoch [3/40], Iter [4/145] Loss: 3.8622
Epoch [3/40], Iter [5/145] Loss: 3.8308
Epoch [3/40], Iter [6/145] Loss: 3.6968


  5%|▌         | 2/40 [00:23<07:10, 11.33s/it]

Epoch [3/40], Iter [7/145] Loss: 3.7560
Epoch [3/40], Iter [8/145] Loss: 3.8769
Epoch [3/40], Iter [9/145] Loss: 3.6181


  5%|▌         | 2/40 [00:23<07:10, 11.33s/it]

Epoch [3/40], Iter [10/145] Loss: 3.8264
Epoch [3/40], Iter [11/145] Loss: 3.7286
Epoch [3/40], Iter [12/145] Loss: 3.5814


  5%|▌         | 2/40 [00:23<07:10, 11.33s/it]

Epoch [3/40], Iter [13/145] Loss: 3.8402
Epoch [3/40], Iter [14/145] Loss: 3.9678
Epoch [3/40], Iter [15/145] Loss: 3.8883


  5%|▌         | 2/40 [00:24<07:10, 11.33s/it]

Epoch [3/40], Iter [16/145] Loss: 3.8183
Epoch [3/40], Iter [17/145] Loss: 4.1298
Epoch [3/40], Iter [18/145] Loss: 3.5738


  5%|▌         | 2/40 [00:24<07:10, 11.33s/it]

Epoch [3/40], Iter [19/145] Loss: 3.7928
Epoch [3/40], Iter [20/145] Loss: 3.7149
Epoch [3/40], Iter [21/145] Loss: 3.7620


  5%|▌         | 2/40 [00:24<07:10, 11.33s/it]

Epoch [3/40], Iter [22/145] Loss: 3.6614
Epoch [3/40], Iter [23/145] Loss: 4.0108
Epoch [3/40], Iter [24/145] Loss: 3.8647


  5%|▌         | 2/40 [00:24<07:10, 11.33s/it]

Epoch [3/40], Iter [25/145] Loss: 3.7183
Epoch [3/40], Iter [26/145] Loss: 3.7432
Epoch [3/40], Iter [27/145] Loss: 3.8874


  5%|▌         | 2/40 [00:25<07:10, 11.33s/it]

Epoch [3/40], Iter [28/145] Loss: 3.8212
Epoch [3/40], Iter [29/145] Loss: 4.0318
Epoch [3/40], Iter [30/145] Loss: 3.8309


  5%|▌         | 2/40 [00:25<07:10, 11.33s/it]

Epoch [3/40], Iter [31/145] Loss: 3.7823
Epoch [3/40], Iter [32/145] Loss: 3.7116
Epoch [3/40], Iter [33/145] Loss: 4.1093


  5%|▌         | 2/40 [00:25<07:10, 11.33s/it]

Epoch [3/40], Iter [34/145] Loss: 3.6074
Epoch [3/40], Iter [35/145] Loss: 3.9143
Epoch [3/40], Iter [36/145] Loss: 3.9526


  5%|▌         | 2/40 [00:25<07:10, 11.33s/it]

Epoch [3/40], Iter [37/145] Loss: 3.7726
Epoch [3/40], Iter [38/145] Loss: 3.6780
Epoch [3/40], Iter [39/145] Loss: 3.7293


  5%|▌         | 2/40 [00:25<07:10, 11.33s/it]

Epoch [3/40], Iter [40/145] Loss: 3.9002
Epoch [3/40], Iter [41/145] Loss: 3.6255
Epoch [3/40], Iter [42/145] Loss: 3.8863


  5%|▌         | 2/40 [00:26<07:10, 11.33s/it]

Epoch [3/40], Iter [43/145] Loss: 3.8271
Epoch [3/40], Iter [44/145] Loss: 3.8376
Epoch [3/40], Iter [45/145] Loss: 4.1087


  5%|▌         | 2/40 [00:26<07:10, 11.33s/it]

Epoch [3/40], Iter [46/145] Loss: 3.9620
Epoch [3/40], Iter [47/145] Loss: 3.8019
Epoch [3/40], Iter [48/145] Loss: 3.8277


  5%|▌         | 2/40 [00:26<07:10, 11.33s/it]

Epoch [3/40], Iter [49/145] Loss: 3.7742
Epoch [3/40], Iter [50/145] Loss: 4.0274
Epoch [3/40], Iter [51/145] Loss: 3.9546


  5%|▌         | 2/40 [00:26<07:10, 11.33s/it]

Epoch [3/40], Iter [52/145] Loss: 3.7577
Epoch [3/40], Iter [53/145] Loss: 3.6459
Epoch [3/40], Iter [54/145] Loss: 3.6915


  5%|▌         | 2/40 [00:27<07:10, 11.33s/it]

Epoch [3/40], Iter [55/145] Loss: 3.8572
Epoch [3/40], Iter [56/145] Loss: 3.8992
Epoch [3/40], Iter [57/145] Loss: 3.7570


  5%|▌         | 2/40 [00:27<07:10, 11.33s/it]

Epoch [3/40], Iter [58/145] Loss: 3.7869
Epoch [3/40], Iter [59/145] Loss: 4.0393
Epoch [3/40], Iter [60/145] Loss: 3.6224


  5%|▌         | 2/40 [00:27<07:10, 11.33s/it]

Epoch [3/40], Iter [61/145] Loss: 3.7802
Epoch [3/40], Iter [62/145] Loss: 3.6846
Epoch [3/40], Iter [63/145] Loss: 3.7143


  5%|▌         | 2/40 [00:27<07:10, 11.33s/it]

Epoch [3/40], Iter [64/145] Loss: 3.8007
Epoch [3/40], Iter [65/145] Loss: 4.0333
Epoch [3/40], Iter [66/145] Loss: 4.0895


  5%|▌         | 2/40 [00:28<07:10, 11.33s/it]

Epoch [3/40], Iter [67/145] Loss: 3.9350
Epoch [3/40], Iter [68/145] Loss: 3.9924
Epoch [3/40], Iter [69/145] Loss: 3.7993


  5%|▌         | 2/40 [00:28<07:10, 11.33s/it]

Epoch [3/40], Iter [70/145] Loss: 3.8865
Epoch [3/40], Iter [71/145] Loss: 3.6073
Epoch [3/40], Iter [72/145] Loss: 3.8006


  5%|▌         | 2/40 [00:28<07:10, 11.33s/it]

Epoch [3/40], Iter [73/145] Loss: 3.8100
Epoch [3/40], Iter [74/145] Loss: 3.8588
Epoch [3/40], Iter [75/145] Loss: 3.9282


  5%|▌         | 2/40 [00:28<07:10, 11.33s/it]

Epoch [3/40], Iter [76/145] Loss: 3.5184
Epoch [3/40], Iter [77/145] Loss: 3.9729
Epoch [3/40], Iter [78/145] Loss: 3.8681


  5%|▌         | 2/40 [00:28<07:10, 11.33s/it]

Epoch [3/40], Iter [79/145] Loss: 3.5439
Epoch [3/40], Iter [80/145] Loss: 3.4970
Epoch [3/40], Iter [81/145] Loss: 3.7723


  5%|▌         | 2/40 [00:29<07:10, 11.33s/it]

Epoch [3/40], Iter [82/145] Loss: 3.9177
Epoch [3/40], Iter [83/145] Loss: 3.6915
Epoch [3/40], Iter [84/145] Loss: 3.1470


  5%|▌         | 2/40 [00:29<07:10, 11.33s/it]

Epoch [3/40], Iter [85/145] Loss: 3.8932
Epoch [3/40], Iter [86/145] Loss: 4.1850
Epoch [3/40], Iter [87/145] Loss: 4.0387


  5%|▌         | 2/40 [00:29<07:10, 11.33s/it]

Epoch [3/40], Iter [88/145] Loss: 4.0001
Epoch [3/40], Iter [89/145] Loss: 3.8459
Epoch [3/40], Iter [90/145] Loss: 4.0340


  5%|▌         | 2/40 [00:29<07:10, 11.33s/it]

Epoch [3/40], Iter [91/145] Loss: 3.9795
Epoch [3/40], Iter [92/145] Loss: 3.6785
Epoch [3/40], Iter [93/145] Loss: 3.8781


  5%|▌         | 2/40 [00:30<07:10, 11.33s/it]

Epoch [3/40], Iter [94/145] Loss: 3.8133
Epoch [3/40], Iter [95/145] Loss: 3.9230
Epoch [3/40], Iter [96/145] Loss: 3.9624


  5%|▌         | 2/40 [00:30<07:10, 11.33s/it]

Epoch [3/40], Iter [97/145] Loss: 3.9074
Epoch [3/40], Iter [98/145] Loss: 3.8996
Epoch [3/40], Iter [99/145] Loss: 3.8874


  5%|▌         | 2/40 [00:30<07:10, 11.33s/it]

Epoch [3/40], Iter [100/145] Loss: 3.8394
Epoch [3/40], Iter [101/145] Loss: 3.8481
Epoch [3/40], Iter [102/145] Loss: 3.8762


  5%|▌         | 2/40 [00:30<07:10, 11.33s/it]

Epoch [3/40], Iter [103/145] Loss: 3.5981
Epoch [3/40], Iter [104/145] Loss: 3.8112
Epoch [3/40], Iter [105/145] Loss: 4.1160


  5%|▌         | 2/40 [00:30<07:10, 11.33s/it]

Epoch [3/40], Iter [106/145] Loss: 3.9844
Epoch [3/40], Iter [107/145] Loss: 3.9261
Epoch [3/40], Iter [108/145] Loss: 3.7599


  5%|▌         | 2/40 [00:31<07:10, 11.33s/it]

Epoch [3/40], Iter [109/145] Loss: 3.9674
Epoch [3/40], Iter [110/145] Loss: 3.8322
Epoch [3/40], Iter [111/145] Loss: 3.7911


  5%|▌         | 2/40 [00:31<07:10, 11.33s/it]

Epoch [3/40], Iter [112/145] Loss: 3.9482
Epoch [3/40], Iter [113/145] Loss: 3.7844
Epoch [3/40], Iter [114/145] Loss: 3.7409


  5%|▌         | 2/40 [00:31<07:10, 11.33s/it]

Epoch [3/40], Iter [115/145] Loss: 3.7414
Epoch [3/40], Iter [116/145] Loss: 3.8383
Epoch [3/40], Iter [117/145] Loss: 3.9401


  5%|▌         | 2/40 [00:31<07:10, 11.33s/it]

Epoch [3/40], Iter [118/145] Loss: 3.9527
Epoch [3/40], Iter [119/145] Loss: 3.8487
Epoch [3/40], Iter [120/145] Loss: 3.9297


  5%|▌         | 2/40 [00:32<07:10, 11.33s/it]

Epoch [3/40], Iter [121/145] Loss: 3.7506
Epoch [3/40], Iter [122/145] Loss: 3.8874
Epoch [3/40], Iter [123/145] Loss: 3.8157


  5%|▌         | 2/40 [00:32<07:10, 11.33s/it]

Epoch [3/40], Iter [124/145] Loss: 3.6053
Epoch [3/40], Iter [125/145] Loss: 4.0847
Epoch [3/40], Iter [126/145] Loss: 3.8647


  5%|▌         | 2/40 [00:32<07:10, 11.33s/it]

Epoch [3/40], Iter [127/145] Loss: 3.8290
Epoch [3/40], Iter [128/145] Loss: 3.7509
Epoch [3/40], Iter [129/145] Loss: 3.9150


  5%|▌         | 2/40 [00:32<07:10, 11.33s/it]

Epoch [3/40], Iter [130/145] Loss: 3.6027
Epoch [3/40], Iter [131/145] Loss: 4.0370
Epoch [3/40], Iter [132/145] Loss: 3.7777


  5%|▌         | 2/40 [00:33<07:10, 11.33s/it]

Epoch [3/40], Iter [133/145] Loss: 3.8063
Epoch [3/40], Iter [134/145] Loss: 3.6566
Epoch [3/40], Iter [135/145] Loss: 3.9577


  5%|▌         | 2/40 [00:33<07:10, 11.33s/it]

Epoch [3/40], Iter [136/145] Loss: 4.2439
Epoch [3/40], Iter [137/145] Loss: 4.0038
Epoch [3/40], Iter [138/145] Loss: 3.8338


  5%|▌         | 2/40 [00:33<07:10, 11.33s/it]

Epoch [3/40], Iter [139/145] Loss: 3.7727
Epoch [3/40], Iter [140/145] Loss: 3.7875
Epoch [3/40], Iter [141/145] Loss: 3.9129


  5%|▌         | 2/40 [00:33<07:10, 11.33s/it]

Epoch [3/40], Iter [142/145] Loss: 4.0269
Epoch [3/40], Iter [143/145] Loss: 3.9515
Epoch [3/40], Iter [144/145] Loss: 4.0147


  8%|▊         | 3/40 [00:33<06:55, 11.22s/it]

Epoch [3/40], Iter [145/145] Loss: 3.9111


  8%|▊         | 3/40 [00:34<06:55, 11.22s/it]

Epoch [4/40], Iter [1/145] Loss: 3.7936
Epoch [4/40], Iter [2/145] Loss: 3.9213
Epoch [4/40], Iter [3/145] Loss: 4.0468


  8%|▊         | 3/40 [00:34<06:55, 11.22s/it]

Epoch [4/40], Iter [4/145] Loss: 3.7137
Epoch [4/40], Iter [5/145] Loss: 3.6330
Epoch [4/40], Iter [6/145] Loss: 3.7385


  8%|▊         | 3/40 [00:34<06:55, 11.22s/it]

Epoch [4/40], Iter [7/145] Loss: 3.6872
Epoch [4/40], Iter [8/145] Loss: 3.9249
Epoch [4/40], Iter [9/145] Loss: 3.7905


  8%|▊         | 3/40 [00:34<06:55, 11.22s/it]

Epoch [4/40], Iter [10/145] Loss: 3.7019
Epoch [4/40], Iter [11/145] Loss: 3.8630
Epoch [4/40], Iter [12/145] Loss: 3.6291


  8%|▊         | 3/40 [00:35<06:55, 11.22s/it]

Epoch [4/40], Iter [13/145] Loss: 3.8905
Epoch [4/40], Iter [14/145] Loss: 4.0362
Epoch [4/40], Iter [15/145] Loss: 3.5208


  8%|▊         | 3/40 [00:35<06:55, 11.22s/it]

Epoch [4/40], Iter [16/145] Loss: 3.6464
Epoch [4/40], Iter [17/145] Loss: 4.1188
Epoch [4/40], Iter [18/145] Loss: 3.7082


  8%|▊         | 3/40 [00:35<06:55, 11.22s/it]

Epoch [4/40], Iter [19/145] Loss: 3.6319
Epoch [4/40], Iter [20/145] Loss: 3.9060
Epoch [4/40], Iter [21/145] Loss: 3.6144


  8%|▊         | 3/40 [00:35<06:55, 11.22s/it]

Epoch [4/40], Iter [22/145] Loss: 3.9164
Epoch [4/40], Iter [23/145] Loss: 3.7662
Epoch [4/40], Iter [24/145] Loss: 3.7948


  8%|▊         | 3/40 [00:36<06:55, 11.22s/it]

Epoch [4/40], Iter [25/145] Loss: 3.8099
Epoch [4/40], Iter [26/145] Loss: 3.8944
Epoch [4/40], Iter [27/145] Loss: 3.8939


  8%|▊         | 3/40 [00:36<06:55, 11.22s/it]

Epoch [4/40], Iter [28/145] Loss: 3.6451
Epoch [4/40], Iter [29/145] Loss: 3.7777
Epoch [4/40], Iter [30/145] Loss: 3.9832


  8%|▊         | 3/40 [00:36<06:55, 11.22s/it]

Epoch [4/40], Iter [31/145] Loss: 3.9680
Epoch [4/40], Iter [32/145] Loss: 3.8982
Epoch [4/40], Iter [33/145] Loss: 3.9049


  8%|▊         | 3/40 [00:36<06:55, 11.22s/it]

Epoch [4/40], Iter [34/145] Loss: 3.8060
Epoch [4/40], Iter [35/145] Loss: 3.9153
Epoch [4/40], Iter [36/145] Loss: 3.7755


  8%|▊         | 3/40 [00:36<06:55, 11.22s/it]

Epoch [4/40], Iter [37/145] Loss: 3.4614
Epoch [4/40], Iter [38/145] Loss: 4.0293
Epoch [4/40], Iter [39/145] Loss: 3.7380


  8%|▊         | 3/40 [00:37<06:55, 11.22s/it]

Epoch [4/40], Iter [40/145] Loss: 3.7994
Epoch [4/40], Iter [41/145] Loss: 3.7867
Epoch [4/40], Iter [42/145] Loss: 3.8622


  8%|▊         | 3/40 [00:37<06:55, 11.22s/it]

Epoch [4/40], Iter [43/145] Loss: 3.8890
Epoch [4/40], Iter [44/145] Loss: 3.6666
Epoch [4/40], Iter [45/145] Loss: 3.3836


  8%|▊         | 3/40 [00:37<06:55, 11.22s/it]

Epoch [4/40], Iter [46/145] Loss: 3.7480
Epoch [4/40], Iter [47/145] Loss: 3.8451
Epoch [4/40], Iter [48/145] Loss: 4.1497


  8%|▊         | 3/40 [00:37<06:55, 11.22s/it]

Epoch [4/40], Iter [49/145] Loss: 3.8137
Epoch [4/40], Iter [50/145] Loss: 4.0200
Epoch [4/40], Iter [51/145] Loss: 4.0619


  8%|▊         | 3/40 [00:38<06:55, 11.22s/it]

Epoch [4/40], Iter [52/145] Loss: 3.8712
Epoch [4/40], Iter [53/145] Loss: 3.8361
Epoch [4/40], Iter [54/145] Loss: 3.7066


  8%|▊         | 3/40 [00:38<06:55, 11.22s/it]

Epoch [4/40], Iter [55/145] Loss: 3.9665
Epoch [4/40], Iter [56/145] Loss: 3.7191
Epoch [4/40], Iter [57/145] Loss: 3.8829


  8%|▊         | 3/40 [00:38<06:55, 11.22s/it]

Epoch [4/40], Iter [58/145] Loss: 3.8710
Epoch [4/40], Iter [59/145] Loss: 3.6666
Epoch [4/40], Iter [60/145] Loss: 3.9992


  8%|▊         | 3/40 [00:38<06:55, 11.22s/it]

Epoch [4/40], Iter [61/145] Loss: 3.7612
Epoch [4/40], Iter [62/145] Loss: 3.6798
Epoch [4/40], Iter [63/145] Loss: 3.8691


  8%|▊         | 3/40 [00:38<06:55, 11.22s/it]

Epoch [4/40], Iter [64/145] Loss: 3.8181
Epoch [4/40], Iter [65/145] Loss: 3.7272
Epoch [4/40], Iter [66/145] Loss: 3.8419


  8%|▊         | 3/40 [00:39<06:55, 11.22s/it]

Epoch [4/40], Iter [67/145] Loss: 3.7972
Epoch [4/40], Iter [68/145] Loss: 3.9540
Epoch [4/40], Iter [69/145] Loss: 3.9775


  8%|▊         | 3/40 [00:39<06:55, 11.22s/it]

Epoch [4/40], Iter [70/145] Loss: 4.0097
Epoch [4/40], Iter [71/145] Loss: 3.9437
Epoch [4/40], Iter [72/145] Loss: 3.7958


  8%|▊         | 3/40 [00:39<06:55, 11.22s/it]

Epoch [4/40], Iter [73/145] Loss: 3.9176
Epoch [4/40], Iter [74/145] Loss: 3.9239
Epoch [4/40], Iter [75/145] Loss: 3.8746


  8%|▊         | 3/40 [00:39<06:55, 11.22s/it]

Epoch [4/40], Iter [76/145] Loss: 3.6528
Epoch [4/40], Iter [77/145] Loss: 3.8374
Epoch [4/40], Iter [78/145] Loss: 3.8931


  8%|▊         | 3/40 [00:40<06:55, 11.22s/it]

Epoch [4/40], Iter [79/145] Loss: 3.6961
Epoch [4/40], Iter [80/145] Loss: 3.9797
Epoch [4/40], Iter [81/145] Loss: 3.8400


  8%|▊         | 3/40 [00:40<06:55, 11.22s/it]

Epoch [4/40], Iter [82/145] Loss: 3.6985
Epoch [4/40], Iter [83/145] Loss: 3.9472
Epoch [4/40], Iter [84/145] Loss: 3.6795


  8%|▊         | 3/40 [00:40<06:55, 11.22s/it]

Epoch [4/40], Iter [85/145] Loss: 3.7323
Epoch [4/40], Iter [86/145] Loss: 4.0148
Epoch [4/40], Iter [87/145] Loss: 4.2217


  8%|▊         | 3/40 [00:40<06:55, 11.22s/it]

Epoch [4/40], Iter [88/145] Loss: 3.7937
Epoch [4/40], Iter [89/145] Loss: 4.1948
Epoch [4/40], Iter [90/145] Loss: 3.9450


  8%|▊         | 3/40 [00:40<06:55, 11.22s/it]

Epoch [4/40], Iter [91/145] Loss: 3.9716
Epoch [4/40], Iter [92/145] Loss: 3.6925
Epoch [4/40], Iter [93/145] Loss: 3.7673


  8%|▊         | 3/40 [00:41<06:55, 11.22s/it]

Epoch [4/40], Iter [94/145] Loss: 4.1139
Epoch [4/40], Iter [95/145] Loss: 3.8475
Epoch [4/40], Iter [96/145] Loss: 4.0022


  8%|▊         | 3/40 [00:41<06:55, 11.22s/it]

Epoch [4/40], Iter [97/145] Loss: 3.7746
Epoch [4/40], Iter [98/145] Loss: 3.7929
Epoch [4/40], Iter [99/145] Loss: 3.8008


  8%|▊         | 3/40 [00:41<06:55, 11.22s/it]

Epoch [4/40], Iter [100/145] Loss: 3.8448
Epoch [4/40], Iter [101/145] Loss: 3.8533
Epoch [4/40], Iter [102/145] Loss: 3.9632


  8%|▊         | 3/40 [00:41<06:55, 11.22s/it]

Epoch [4/40], Iter [103/145] Loss: 3.7905
Epoch [4/40], Iter [104/145] Loss: 4.0313
Epoch [4/40], Iter [105/145] Loss: 3.8989


  8%|▊         | 3/40 [00:42<06:55, 11.22s/it]

Epoch [4/40], Iter [106/145] Loss: 3.9323
Epoch [4/40], Iter [107/145] Loss: 4.0778
Epoch [4/40], Iter [108/145] Loss: 3.8519


  8%|▊         | 3/40 [00:42<06:55, 11.22s/it]

Epoch [4/40], Iter [109/145] Loss: 3.9398
Epoch [4/40], Iter [110/145] Loss: 3.7266
Epoch [4/40], Iter [111/145] Loss: 3.8346


  8%|▊         | 3/40 [00:42<06:55, 11.22s/it]

Epoch [4/40], Iter [112/145] Loss: 3.9760
Epoch [4/40], Iter [113/145] Loss: 3.8276
Epoch [4/40], Iter [114/145] Loss: 3.8802


  8%|▊         | 3/40 [00:42<06:55, 11.22s/it]

Epoch [4/40], Iter [115/145] Loss: 3.8300
Epoch [4/40], Iter [116/145] Loss: 3.8403
Epoch [4/40], Iter [117/145] Loss: 3.8260


  8%|▊         | 3/40 [00:43<06:55, 11.22s/it]

Epoch [4/40], Iter [118/145] Loss: 3.8030
Epoch [4/40], Iter [119/145] Loss: 3.8112
Epoch [4/40], Iter [120/145] Loss: 3.9653


  8%|▊         | 3/40 [00:43<06:55, 11.22s/it]

Epoch [4/40], Iter [121/145] Loss: 3.9853
Epoch [4/40], Iter [122/145] Loss: 3.9038
Epoch [4/40], Iter [123/145] Loss: 3.8812


  8%|▊         | 3/40 [00:43<06:55, 11.22s/it]

Epoch [4/40], Iter [124/145] Loss: 3.8409
Epoch [4/40], Iter [125/145] Loss: 4.0429
Epoch [4/40], Iter [126/145] Loss: 3.7054


  8%|▊         | 3/40 [00:43<06:55, 11.22s/it]

Epoch [4/40], Iter [127/145] Loss: 3.8322
Epoch [4/40], Iter [128/145] Loss: 3.8419
Epoch [4/40], Iter [129/145] Loss: 3.8288


  8%|▊         | 3/40 [00:43<06:55, 11.22s/it]

Epoch [4/40], Iter [130/145] Loss: 3.9934
Epoch [4/40], Iter [131/145] Loss: 3.9770
Epoch [4/40], Iter [132/145] Loss: 3.7045


  8%|▊         | 3/40 [00:44<06:55, 11.22s/it]

Epoch [4/40], Iter [133/145] Loss: 3.7110
Epoch [4/40], Iter [134/145] Loss: 3.9934
Epoch [4/40], Iter [135/145] Loss: 3.9366


  8%|▊         | 3/40 [00:44<06:55, 11.22s/it]

Epoch [4/40], Iter [136/145] Loss: 3.6311
Epoch [4/40], Iter [137/145] Loss: 3.8770
Epoch [4/40], Iter [138/145] Loss: 3.6999


  8%|▊         | 3/40 [00:44<06:55, 11.22s/it]

Epoch [4/40], Iter [139/145] Loss: 3.8017
Epoch [4/40], Iter [140/145] Loss: 3.7580
Epoch [4/40], Iter [141/145] Loss: 3.6277


  8%|▊         | 3/40 [00:44<06:55, 11.22s/it]

Epoch [4/40], Iter [142/145] Loss: 3.5708
Epoch [4/40], Iter [143/145] Loss: 3.6594
Epoch [4/40], Iter [144/145] Loss: 3.5386


 10%|█         | 4/40 [00:44<06:42, 11.19s/it]

Epoch [4/40], Iter [145/145] Loss: 3.6384


 10%|█         | 4/40 [00:45<06:42, 11.19s/it]

Epoch [5/40], Iter [1/145] Loss: 3.5706
Epoch [5/40], Iter [2/145] Loss: 3.8714
Epoch [5/40], Iter [3/145] Loss: 4.1020


 10%|█         | 4/40 [00:45<06:42, 11.19s/it]

Epoch [5/40], Iter [4/145] Loss: 3.8398
Epoch [5/40], Iter [5/145] Loss: 3.8499
Epoch [5/40], Iter [6/145] Loss: 4.0474


 10%|█         | 4/40 [00:45<06:42, 11.19s/it]

Epoch [5/40], Iter [7/145] Loss: 3.6655
Epoch [5/40], Iter [8/145] Loss: 3.8418
Epoch [5/40], Iter [9/145] Loss: 3.7549


 10%|█         | 4/40 [00:46<06:42, 11.19s/it]

Epoch [5/40], Iter [10/145] Loss: 4.0598
Epoch [5/40], Iter [11/145] Loss: 3.8631
Epoch [5/40], Iter [12/145] Loss: 3.7827


 10%|█         | 4/40 [00:46<06:42, 11.19s/it]

Epoch [5/40], Iter [13/145] Loss: 3.6277
Epoch [5/40], Iter [14/145] Loss: 3.8320
Epoch [5/40], Iter [15/145] Loss: 3.9691


 10%|█         | 4/40 [00:46<06:42, 11.19s/it]

Epoch [5/40], Iter [16/145] Loss: 3.9394
Epoch [5/40], Iter [17/145] Loss: 3.8794
Epoch [5/40], Iter [18/145] Loss: 4.1149


 10%|█         | 4/40 [00:46<06:42, 11.19s/it]

Epoch [5/40], Iter [19/145] Loss: 3.8475
Epoch [5/40], Iter [20/145] Loss: 3.6170
Epoch [5/40], Iter [21/145] Loss: 3.9594


 10%|█         | 4/40 [00:46<06:42, 11.19s/it]

Epoch [5/40], Iter [22/145] Loss: 3.6315
Epoch [5/40], Iter [23/145] Loss: 3.5721
Epoch [5/40], Iter [24/145] Loss: 3.8521


 10%|█         | 4/40 [00:47<06:42, 11.19s/it]

Epoch [5/40], Iter [25/145] Loss: 3.8036
Epoch [5/40], Iter [26/145] Loss: 3.8575
Epoch [5/40], Iter [27/145] Loss: 3.6697


 10%|█         | 4/40 [00:47<06:42, 11.19s/it]

Epoch [5/40], Iter [28/145] Loss: 3.9150
Epoch [5/40], Iter [29/145] Loss: 4.0361
Epoch [5/40], Iter [30/145] Loss: 4.1701


 10%|█         | 4/40 [00:47<06:42, 11.19s/it]

Epoch [5/40], Iter [31/145] Loss: 3.9778
Epoch [5/40], Iter [32/145] Loss: 3.8784
Epoch [5/40], Iter [33/145] Loss: 3.8635


 10%|█         | 4/40 [00:47<06:42, 11.19s/it]

Epoch [5/40], Iter [34/145] Loss: 3.8044
Epoch [5/40], Iter [35/145] Loss: 3.8096
Epoch [5/40], Iter [36/145] Loss: 4.0406


 10%|█         | 4/40 [00:48<06:42, 11.19s/it]

Epoch [5/40], Iter [37/145] Loss: 3.9186
Epoch [5/40], Iter [38/145] Loss: 4.0494
Epoch [5/40], Iter [39/145] Loss: 3.8703


 10%|█         | 4/40 [00:48<06:42, 11.19s/it]

Epoch [5/40], Iter [40/145] Loss: 3.7892
Epoch [5/40], Iter [41/145] Loss: 4.0471
Epoch [5/40], Iter [42/145] Loss: 3.7393


 10%|█         | 4/40 [00:48<06:42, 11.19s/it]

Epoch [5/40], Iter [43/145] Loss: 3.8143
Epoch [5/40], Iter [44/145] Loss: 3.8467
Epoch [5/40], Iter [45/145] Loss: 4.0026


 10%|█         | 4/40 [00:48<06:42, 11.19s/it]

Epoch [5/40], Iter [46/145] Loss: 3.7146
Epoch [5/40], Iter [47/145] Loss: 3.8515
Epoch [5/40], Iter [48/145] Loss: 3.8510


 10%|█         | 4/40 [00:49<06:42, 11.19s/it]

Epoch [5/40], Iter [49/145] Loss: 3.6215
Epoch [5/40], Iter [50/145] Loss: 3.8224
Epoch [5/40], Iter [51/145] Loss: 3.7252


 10%|█         | 4/40 [00:49<06:42, 11.19s/it]

Epoch [5/40], Iter [52/145] Loss: 3.8563
Epoch [5/40], Iter [53/145] Loss: 3.6740
Epoch [5/40], Iter [54/145] Loss: 3.7284


 10%|█         | 4/40 [00:49<06:42, 11.19s/it]

Epoch [5/40], Iter [55/145] Loss: 3.7552
Epoch [5/40], Iter [56/145] Loss: 3.7842
Epoch [5/40], Iter [57/145] Loss: 3.7235


 10%|█         | 4/40 [00:49<06:42, 11.19s/it]

Epoch [5/40], Iter [58/145] Loss: 3.9489
Epoch [5/40], Iter [59/145] Loss: 4.1140
Epoch [5/40], Iter [60/145] Loss: 3.6132


 10%|█         | 4/40 [00:49<06:42, 11.19s/it]

Epoch [5/40], Iter [61/145] Loss: 3.8056
Epoch [5/40], Iter [62/145] Loss: 3.9610
Epoch [5/40], Iter [63/145] Loss: 3.8034


 10%|█         | 4/40 [00:50<06:42, 11.19s/it]

Epoch [5/40], Iter [64/145] Loss: 3.5676
Epoch [5/40], Iter [65/145] Loss: 4.1210
Epoch [5/40], Iter [66/145] Loss: 3.7153


 10%|█         | 4/40 [00:50<06:42, 11.19s/it]

Epoch [5/40], Iter [67/145] Loss: 3.8955
Epoch [5/40], Iter [68/145] Loss: 4.1043
Epoch [5/40], Iter [69/145] Loss: 4.0466


 10%|█         | 4/40 [00:50<06:42, 11.19s/it]

Epoch [5/40], Iter [70/145] Loss: 3.7727
Epoch [5/40], Iter [71/145] Loss: 3.8483
Epoch [5/40], Iter [72/145] Loss: 3.8129


 10%|█         | 4/40 [00:50<06:42, 11.19s/it]

Epoch [5/40], Iter [73/145] Loss: 3.8177
Epoch [5/40], Iter [74/145] Loss: 3.7360
Epoch [5/40], Iter [75/145] Loss: 3.9587


 10%|█         | 4/40 [00:51<06:42, 11.19s/it]

Epoch [5/40], Iter [76/145] Loss: 3.9211
Epoch [5/40], Iter [77/145] Loss: 3.8004
Epoch [5/40], Iter [78/145] Loss: 3.9918


 10%|█         | 4/40 [00:51<06:42, 11.19s/it]

Epoch [5/40], Iter [79/145] Loss: 3.8689
Epoch [5/40], Iter [80/145] Loss: 3.5870
Epoch [5/40], Iter [81/145] Loss: 3.9900


 10%|█         | 4/40 [00:51<06:42, 11.19s/it]

Epoch [5/40], Iter [82/145] Loss: 3.5255
Epoch [5/40], Iter [83/145] Loss: 3.6464
Epoch [5/40], Iter [84/145] Loss: 3.7308


 10%|█         | 4/40 [00:51<06:42, 11.19s/it]

Epoch [5/40], Iter [85/145] Loss: 3.6006
Epoch [5/40], Iter [86/145] Loss: 3.7240
Epoch [5/40], Iter [87/145] Loss: 4.0245


 10%|█         | 4/40 [00:51<06:42, 11.19s/it]

Epoch [5/40], Iter [88/145] Loss: 3.7531
Epoch [5/40], Iter [89/145] Loss: 3.6305
Epoch [5/40], Iter [90/145] Loss: 3.6602


 10%|█         | 4/40 [00:52<06:42, 11.19s/it]

Epoch [5/40], Iter [91/145] Loss: 4.0382
Epoch [5/40], Iter [92/145] Loss: 3.4985
Epoch [5/40], Iter [93/145] Loss: 3.9192


 10%|█         | 4/40 [00:52<06:42, 11.19s/it]

Epoch [5/40], Iter [94/145] Loss: 3.7401
Epoch [5/40], Iter [95/145] Loss: 3.9421
Epoch [5/40], Iter [96/145] Loss: 3.8110


 10%|█         | 4/40 [00:52<06:42, 11.19s/it]

Epoch [5/40], Iter [97/145] Loss: 4.1358
Epoch [5/40], Iter [98/145] Loss: 3.9604
Epoch [5/40], Iter [99/145] Loss: 4.1040


 10%|█         | 4/40 [00:52<06:42, 11.19s/it]

Epoch [5/40], Iter [100/145] Loss: 3.8920
Epoch [5/40], Iter [101/145] Loss: 4.0241
Epoch [5/40], Iter [102/145] Loss: 3.8394


 10%|█         | 4/40 [00:53<06:42, 11.19s/it]

Epoch [5/40], Iter [103/145] Loss: 3.9114
Epoch [5/40], Iter [104/145] Loss: 3.8541
Epoch [5/40], Iter [105/145] Loss: 3.7908


 10%|█         | 4/40 [00:53<06:42, 11.19s/it]

Epoch [5/40], Iter [106/145] Loss: 3.8763
Epoch [5/40], Iter [107/145] Loss: 3.9488
Epoch [5/40], Iter [108/145] Loss: 3.8978


 10%|█         | 4/40 [00:53<06:42, 11.19s/it]

Epoch [5/40], Iter [109/145] Loss: 4.0470
Epoch [5/40], Iter [110/145] Loss: 3.6268
Epoch [5/40], Iter [111/145] Loss: 3.6001


 10%|█         | 4/40 [00:53<06:42, 11.19s/it]

Epoch [5/40], Iter [112/145] Loss: 3.9503
Epoch [5/40], Iter [113/145] Loss: 3.6341
Epoch [5/40], Iter [114/145] Loss: 3.9840


 10%|█         | 4/40 [00:53<06:42, 11.19s/it]

Epoch [5/40], Iter [115/145] Loss: 3.6783
Epoch [5/40], Iter [116/145] Loss: 4.0765
Epoch [5/40], Iter [117/145] Loss: 3.9679


 10%|█         | 4/40 [00:54<06:42, 11.19s/it]

Epoch [5/40], Iter [118/145] Loss: 3.9012
Epoch [5/40], Iter [119/145] Loss: 3.8123
Epoch [5/40], Iter [120/145] Loss: 3.9088


 10%|█         | 4/40 [00:54<06:42, 11.19s/it]

Epoch [5/40], Iter [121/145] Loss: 3.8327
Epoch [5/40], Iter [122/145] Loss: 3.8330
Epoch [5/40], Iter [123/145] Loss: 3.9236


 10%|█         | 4/40 [00:54<06:42, 11.19s/it]

Epoch [5/40], Iter [124/145] Loss: 3.8947
Epoch [5/40], Iter [125/145] Loss: 3.8582
Epoch [5/40], Iter [126/145] Loss: 3.8288


 10%|█         | 4/40 [00:54<06:42, 11.19s/it]

Epoch [5/40], Iter [127/145] Loss: 3.6529
Epoch [5/40], Iter [128/145] Loss: 3.8834
Epoch [5/40], Iter [129/145] Loss: 3.6324


 10%|█         | 4/40 [00:55<06:42, 11.19s/it]

Epoch [5/40], Iter [130/145] Loss: 3.6268
Epoch [5/40], Iter [131/145] Loss: 3.8471
Epoch [5/40], Iter [132/145] Loss: 3.7671


 10%|█         | 4/40 [00:55<06:42, 11.19s/it]

Epoch [5/40], Iter [133/145] Loss: 3.7910
Epoch [5/40], Iter [134/145] Loss: 3.6292
Epoch [5/40], Iter [135/145] Loss: 3.8503


 10%|█         | 4/40 [00:55<06:42, 11.19s/it]

Epoch [5/40], Iter [136/145] Loss: 3.7219
Epoch [5/40], Iter [137/145] Loss: 3.9437
Epoch [5/40], Iter [138/145] Loss: 3.9685


 10%|█         | 4/40 [00:55<06:42, 11.19s/it]

Epoch [5/40], Iter [139/145] Loss: 3.5289
Epoch [5/40], Iter [140/145] Loss: 3.7892
Epoch [5/40], Iter [141/145] Loss: 3.8606


 10%|█         | 4/40 [00:55<06:42, 11.19s/it]

Epoch [5/40], Iter [142/145] Loss: 3.6139
Epoch [5/40], Iter [143/145] Loss: 3.8367
Epoch [5/40], Iter [144/145] Loss: 3.8496


 12%|█▎        | 5/40 [00:56<06:31, 11.18s/it]

Epoch [5/40], Iter [145/145] Loss: 4.0790


 12%|█▎        | 5/40 [00:56<06:31, 11.18s/it]

Epoch [6/40], Iter [1/145] Loss: 3.6271
Epoch [6/40], Iter [2/145] Loss: 3.7781
Epoch [6/40], Iter [3/145] Loss: 3.8627


 12%|█▎        | 5/40 [00:56<06:31, 11.18s/it]

Epoch [6/40], Iter [4/145] Loss: 3.4732
Epoch [6/40], Iter [5/145] Loss: 3.8289
Epoch [6/40], Iter [6/145] Loss: 3.8955


 12%|█▎        | 5/40 [00:56<06:31, 11.18s/it]

Epoch [6/40], Iter [7/145] Loss: 3.6749
Epoch [6/40], Iter [8/145] Loss: 4.0529
Epoch [6/40], Iter [9/145] Loss: 3.9334


 12%|█▎        | 5/40 [00:57<06:31, 11.18s/it]

Epoch [6/40], Iter [10/145] Loss: 3.9496
Epoch [6/40], Iter [11/145] Loss: 4.1346
Epoch [6/40], Iter [12/145] Loss: 4.0114


 12%|█▎        | 5/40 [00:57<06:31, 11.18s/it]

Epoch [6/40], Iter [13/145] Loss: 3.5702
Epoch [6/40], Iter [14/145] Loss: 3.7310
Epoch [6/40], Iter [15/145] Loss: 3.7932


 12%|█▎        | 5/40 [00:57<06:31, 11.18s/it]

Epoch [6/40], Iter [16/145] Loss: 3.9305
Epoch [6/40], Iter [17/145] Loss: 3.8645
Epoch [6/40], Iter [18/145] Loss: 3.8450


 12%|█▎        | 5/40 [00:57<06:31, 11.18s/it]

Epoch [6/40], Iter [19/145] Loss: 3.9167
Epoch [6/40], Iter [20/145] Loss: 3.7566
Epoch [6/40], Iter [21/145] Loss: 3.8311


 12%|█▎        | 5/40 [00:58<06:31, 11.18s/it]

Epoch [6/40], Iter [22/145] Loss: 3.7593
Epoch [6/40], Iter [23/145] Loss: 3.6346
Epoch [6/40], Iter [24/145] Loss: 3.7786


 12%|█▎        | 5/40 [00:58<06:31, 11.18s/it]

Epoch [6/40], Iter [25/145] Loss: 3.9079
Epoch [6/40], Iter [26/145] Loss: 4.0487
Epoch [6/40], Iter [27/145] Loss: 3.9025


 12%|█▎        | 5/40 [00:58<06:31, 11.18s/it]

Epoch [6/40], Iter [28/145] Loss: 3.9487
Epoch [6/40], Iter [29/145] Loss: 3.7079
Epoch [6/40], Iter [30/145] Loss: 3.7810


 12%|█▎        | 5/40 [00:58<06:31, 11.18s/it]

Epoch [6/40], Iter [31/145] Loss: 3.9033
Epoch [6/40], Iter [32/145] Loss: 3.9534
Epoch [6/40], Iter [33/145] Loss: 3.9204


 12%|█▎        | 5/40 [00:59<06:31, 11.18s/it]

Epoch [6/40], Iter [34/145] Loss: 3.8296
Epoch [6/40], Iter [35/145] Loss: 3.6888
Epoch [6/40], Iter [36/145] Loss: 3.8232


 12%|█▎        | 5/40 [00:59<06:31, 11.18s/it]

Epoch [6/40], Iter [37/145] Loss: 3.8440
Epoch [6/40], Iter [38/145] Loss: 3.7224
Epoch [6/40], Iter [39/145] Loss: 3.7973


 12%|█▎        | 5/40 [00:59<06:31, 11.18s/it]

Epoch [6/40], Iter [40/145] Loss: 3.8845
Epoch [6/40], Iter [41/145] Loss: 3.8181
Epoch [6/40], Iter [42/145] Loss: 3.5657


 12%|█▎        | 5/40 [00:59<06:31, 11.18s/it]

Epoch [6/40], Iter [43/145] Loss: 3.8657
Epoch [6/40], Iter [44/145] Loss: 3.5809
Epoch [6/40], Iter [45/145] Loss: 3.9036


 12%|█▎        | 5/40 [00:59<06:31, 11.18s/it]

Epoch [6/40], Iter [46/145] Loss: 3.9601
Epoch [6/40], Iter [47/145] Loss: 3.9880
Epoch [6/40], Iter [48/145] Loss: 3.8710


 12%|█▎        | 5/40 [01:00<06:31, 11.18s/it]

Epoch [6/40], Iter [49/145] Loss: 3.8700
Epoch [6/40], Iter [50/145] Loss: 3.7892
Epoch [6/40], Iter [51/145] Loss: 3.5797


 12%|█▎        | 5/40 [01:00<06:31, 11.18s/it]

Epoch [6/40], Iter [52/145] Loss: 3.8627
Epoch [6/40], Iter [53/145] Loss: 3.9703
Epoch [6/40], Iter [54/145] Loss: 3.8397


 12%|█▎        | 5/40 [01:00<06:31, 11.18s/it]

Epoch [6/40], Iter [55/145] Loss: 3.9087
Epoch [6/40], Iter [56/145] Loss: 3.7427
Epoch [6/40], Iter [57/145] Loss: 3.4852
Epoch [6/40], Iter [58/145] Loss: 3.9552


 12%|█▎        | 5/40 [01:00<06:31, 11.18s/it]

Epoch [6/40], Iter [59/145] Loss: 3.8778
Epoch [6/40], Iter [60/145] Loss: 3.7604
Epoch [6/40], Iter [61/145] Loss: 3.8791


 12%|█▎        | 5/40 [01:01<06:31, 11.18s/it]

Epoch [6/40], Iter [62/145] Loss: 3.8344
Epoch [6/40], Iter [63/145] Loss: 4.1252
Epoch [6/40], Iter [64/145] Loss: 3.8347


 12%|█▎        | 5/40 [01:01<06:31, 11.18s/it]

Epoch [6/40], Iter [65/145] Loss: 3.6615
Epoch [6/40], Iter [66/145] Loss: 3.8342
Epoch [6/40], Iter [67/145] Loss: 3.7547


 12%|█▎        | 5/40 [01:01<06:31, 11.18s/it]

Epoch [6/40], Iter [68/145] Loss: 3.9625
Epoch [6/40], Iter [69/145] Loss: 3.8574
Epoch [6/40], Iter [70/145] Loss: 3.9397


 12%|█▎        | 5/40 [01:01<06:31, 11.18s/it]

Epoch [6/40], Iter [71/145] Loss: 3.9657
Epoch [6/40], Iter [72/145] Loss: 4.0719
Epoch [6/40], Iter [73/145] Loss: 3.7179


 12%|█▎        | 5/40 [01:01<06:31, 11.18s/it]

Epoch [6/40], Iter [74/145] Loss: 3.8850
Epoch [6/40], Iter [75/145] Loss: 3.9200
Epoch [6/40], Iter [76/145] Loss: 3.8831


 12%|█▎        | 5/40 [01:02<06:31, 11.18s/it]

Epoch [6/40], Iter [77/145] Loss: 3.8388
Epoch [6/40], Iter [78/145] Loss: 3.9499
Epoch [6/40], Iter [79/145] Loss: 4.0016


 12%|█▎        | 5/40 [01:02<06:31, 11.18s/it]

Epoch [6/40], Iter [80/145] Loss: 3.8005
Epoch [6/40], Iter [81/145] Loss: 3.8905
Epoch [6/40], Iter [82/145] Loss: 4.0184


 12%|█▎        | 5/40 [01:02<06:31, 11.18s/it]

Epoch [6/40], Iter [83/145] Loss: 4.0298
Epoch [6/40], Iter [84/145] Loss: 3.9837
Epoch [6/40], Iter [85/145] Loss: 3.5427


 12%|█▎        | 5/40 [01:02<06:31, 11.18s/it]

Epoch [6/40], Iter [86/145] Loss: 3.9333
Epoch [6/40], Iter [87/145] Loss: 3.8775
Epoch [6/40], Iter [88/145] Loss: 3.7457


 12%|█▎        | 5/40 [01:02<06:31, 11.18s/it]

Epoch [6/40], Iter [89/145] Loss: 3.6175
Epoch [6/40], Iter [90/145] Loss: 3.9272
Epoch [6/40], Iter [91/145] Loss: 3.6615


 12%|█▎        | 5/40 [01:03<06:31, 11.18s/it]

Epoch [6/40], Iter [92/145] Loss: 4.0700
Epoch [6/40], Iter [93/145] Loss: 3.7747
Epoch [6/40], Iter [94/145] Loss: 3.9569


 12%|█▎        | 5/40 [01:03<06:31, 11.18s/it]

Epoch [6/40], Iter [95/145] Loss: 3.9208
Epoch [6/40], Iter [96/145] Loss: 3.8116
Epoch [6/40], Iter [97/145] Loss: 3.7569


 12%|█▎        | 5/40 [01:03<06:31, 11.18s/it]

Epoch [6/40], Iter [98/145] Loss: 3.6045
Epoch [6/40], Iter [99/145] Loss: 3.8031
Epoch [6/40], Iter [100/145] Loss: 3.8314


 12%|█▎        | 5/40 [01:03<06:31, 11.18s/it]

Epoch [6/40], Iter [101/145] Loss: 3.7031
Epoch [6/40], Iter [102/145] Loss: 3.6795
Epoch [6/40], Iter [103/145] Loss: 3.7394


 12%|█▎        | 5/40 [01:04<06:31, 11.18s/it]

Epoch [6/40], Iter [104/145] Loss: 3.5051
Epoch [6/40], Iter [105/145] Loss: 3.7901
Epoch [6/40], Iter [106/145] Loss: 3.8353


 12%|█▎        | 5/40 [01:04<06:31, 11.18s/it]

Epoch [6/40], Iter [107/145] Loss: 3.7282
Epoch [6/40], Iter [108/145] Loss: 3.7816
Epoch [6/40], Iter [109/145] Loss: 4.0745


 12%|█▎        | 5/40 [01:04<06:31, 11.18s/it]

Epoch [6/40], Iter [110/145] Loss: 3.9353
Epoch [6/40], Iter [111/145] Loss: 3.9004
Epoch [6/40], Iter [112/145] Loss: 3.7912


 12%|█▎        | 5/40 [01:04<06:31, 11.18s/it]

Epoch [6/40], Iter [113/145] Loss: 3.7711
Epoch [6/40], Iter [114/145] Loss: 3.7892
Epoch [6/40], Iter [115/145] Loss: 3.7683


 12%|█▎        | 5/40 [01:04<06:31, 11.18s/it]

Epoch [6/40], Iter [116/145] Loss: 3.7634
Epoch [6/40], Iter [117/145] Loss: 4.0464
Epoch [6/40], Iter [118/145] Loss: 3.8465


 12%|█▎        | 5/40 [01:05<06:31, 11.18s/it]

Epoch [6/40], Iter [119/145] Loss: 3.9525
Epoch [6/40], Iter [120/145] Loss: 4.1081
Epoch [6/40], Iter [121/145] Loss: 3.8293


 12%|█▎        | 5/40 [01:05<06:31, 11.18s/it]

Epoch [6/40], Iter [122/145] Loss: 3.7257
Epoch [6/40], Iter [123/145] Loss: 3.7433
Epoch [6/40], Iter [124/145] Loss: 3.8976


 12%|█▎        | 5/40 [01:05<06:31, 11.18s/it]

Epoch [6/40], Iter [125/145] Loss: 3.9838
Epoch [6/40], Iter [126/145] Loss: 3.7889
Epoch [6/40], Iter [127/145] Loss: 3.6454


 12%|█▎        | 5/40 [01:05<06:31, 11.18s/it]

Epoch [6/40], Iter [128/145] Loss: 3.7914
Epoch [6/40], Iter [129/145] Loss: 3.9135
Epoch [6/40], Iter [130/145] Loss: 4.0696


 12%|█▎        | 5/40 [01:06<06:31, 11.18s/it]

Epoch [6/40], Iter [131/145] Loss: 3.9228
Epoch [6/40], Iter [132/145] Loss: 3.8043
Epoch [6/40], Iter [133/145] Loss: 4.0086


 12%|█▎        | 5/40 [01:06<06:31, 11.18s/it]

Epoch [6/40], Iter [134/145] Loss: 3.7322
Epoch [6/40], Iter [135/145] Loss: 3.9164
Epoch [6/40], Iter [136/145] Loss: 3.7322


 12%|█▎        | 5/40 [01:06<06:31, 11.18s/it]

Epoch [6/40], Iter [137/145] Loss: 3.6790
Epoch [6/40], Iter [138/145] Loss: 3.9110
Epoch [6/40], Iter [139/145] Loss: 3.6834


 12%|█▎        | 5/40 [01:06<06:31, 11.18s/it]

Epoch [6/40], Iter [140/145] Loss: 3.9581
Epoch [6/40], Iter [141/145] Loss: 3.9012
Epoch [6/40], Iter [142/145] Loss: 3.8518


 15%|█▌        | 6/40 [01:06<06:16, 11.06s/it]

Epoch [6/40], Iter [143/145] Loss: 3.7490
Epoch [6/40], Iter [144/145] Loss: 3.6583
Epoch [6/40], Iter [145/145] Loss: 3.9886


 15%|█▌        | 6/40 [01:07<06:16, 11.06s/it]

Epoch [7/40], Iter [1/145] Loss: 3.9442
Epoch [7/40], Iter [2/145] Loss: 3.7968
Epoch [7/40], Iter [3/145] Loss: 3.4528


 15%|█▌        | 6/40 [01:07<06:16, 11.06s/it]

Epoch [7/40], Iter [4/145] Loss: 3.7282
Epoch [7/40], Iter [5/145] Loss: 4.0123
Epoch [7/40], Iter [6/145] Loss: 3.9133


 15%|█▌        | 6/40 [01:07<06:16, 11.06s/it]

Epoch [7/40], Iter [7/145] Loss: 3.9506
Epoch [7/40], Iter [8/145] Loss: 3.8966
Epoch [7/40], Iter [9/145] Loss: 3.6148


 15%|█▌        | 6/40 [01:08<06:16, 11.06s/it]

Epoch [7/40], Iter [10/145] Loss: 3.8171
Epoch [7/40], Iter [11/145] Loss: 4.0728
Epoch [7/40], Iter [12/145] Loss: 3.5469


 15%|█▌        | 6/40 [01:08<06:16, 11.06s/it]

Epoch [7/40], Iter [13/145] Loss: 3.9013
Epoch [7/40], Iter [14/145] Loss: 3.7876
Epoch [7/40], Iter [15/145] Loss: 4.0270


 15%|█▌        | 6/40 [01:08<06:16, 11.06s/it]

Epoch [7/40], Iter [16/145] Loss: 4.1419
Epoch [7/40], Iter [17/145] Loss: 3.8170
Epoch [7/40], Iter [18/145] Loss: 3.9446


 15%|█▌        | 6/40 [01:08<06:16, 11.06s/it]

Epoch [7/40], Iter [19/145] Loss: 3.9848
Epoch [7/40], Iter [20/145] Loss: 3.8875
Epoch [7/40], Iter [21/145] Loss: 3.7482


 15%|█▌        | 6/40 [01:08<06:16, 11.06s/it]

Epoch [7/40], Iter [22/145] Loss: 3.9307
Epoch [7/40], Iter [23/145] Loss: 3.6882
Epoch [7/40], Iter [24/145] Loss: 3.8575


 15%|█▌        | 6/40 [01:09<06:16, 11.06s/it]

Epoch [7/40], Iter [25/145] Loss: 3.9820
Epoch [7/40], Iter [26/145] Loss: 3.7940
Epoch [7/40], Iter [27/145] Loss: 3.8702


 15%|█▌        | 6/40 [01:09<06:16, 11.06s/it]

Epoch [7/40], Iter [28/145] Loss: 3.7475
Epoch [7/40], Iter [29/145] Loss: 3.8574
Epoch [7/40], Iter [30/145] Loss: 3.8082


 15%|█▌        | 6/40 [01:09<06:16, 11.06s/it]

Epoch [7/40], Iter [31/145] Loss: 3.9483
Epoch [7/40], Iter [32/145] Loss: 3.5462
Epoch [7/40], Iter [33/145] Loss: 3.5274


 15%|█▌        | 6/40 [01:09<06:16, 11.06s/it]

Epoch [7/40], Iter [34/145] Loss: 3.5423
Epoch [7/40], Iter [35/145] Loss: 3.6517
Epoch [7/40], Iter [36/145] Loss: 3.4849


 15%|█▌        | 6/40 [01:10<06:16, 11.06s/it]

Epoch [7/40], Iter [37/145] Loss: 3.5388
Epoch [7/40], Iter [38/145] Loss: 4.1438
Epoch [7/40], Iter [39/145] Loss: 3.6513


 15%|█▌        | 6/40 [01:10<06:16, 11.06s/it]

Epoch [7/40], Iter [40/145] Loss: 3.9730
Epoch [7/40], Iter [41/145] Loss: 4.1119
Epoch [7/40], Iter [42/145] Loss: 3.6793


 15%|█▌        | 6/40 [01:10<06:16, 11.06s/it]

Epoch [7/40], Iter [43/145] Loss: 3.6663
Epoch [7/40], Iter [44/145] Loss: 3.8384
Epoch [7/40], Iter [45/145] Loss: 3.7196


 15%|█▌        | 6/40 [01:10<06:16, 11.06s/it]

Epoch [7/40], Iter [46/145] Loss: 3.5404
Epoch [7/40], Iter [47/145] Loss: 4.0887
Epoch [7/40], Iter [48/145] Loss: 3.8538


 15%|█▌        | 6/40 [01:10<06:16, 11.06s/it]

Epoch [7/40], Iter [49/145] Loss: 4.1617
Epoch [7/40], Iter [50/145] Loss: 3.8413
Epoch [7/40], Iter [51/145] Loss: 3.9179


 15%|█▌        | 6/40 [01:11<06:16, 11.06s/it]

Epoch [7/40], Iter [52/145] Loss: 3.7999
Epoch [7/40], Iter [53/145] Loss: 3.9631
Epoch [7/40], Iter [54/145] Loss: 3.6742


 15%|█▌        | 6/40 [01:11<06:16, 11.06s/it]

Epoch [7/40], Iter [55/145] Loss: 3.9900
Epoch [7/40], Iter [56/145] Loss: 3.8382
Epoch [7/40], Iter [57/145] Loss: 3.9308


 15%|█▌        | 6/40 [01:11<06:16, 11.06s/it]

Epoch [7/40], Iter [58/145] Loss: 3.8771
Epoch [7/40], Iter [59/145] Loss: 3.9650
Epoch [7/40], Iter [60/145] Loss: 3.8092


 15%|█▌        | 6/40 [01:11<06:16, 11.06s/it]

Epoch [7/40], Iter [61/145] Loss: 4.0470
Epoch [7/40], Iter [62/145] Loss: 3.9761
Epoch [7/40], Iter [63/145] Loss: 3.9051


 15%|█▌        | 6/40 [01:12<06:16, 11.06s/it]

Epoch [7/40], Iter [64/145] Loss: 3.6998
Epoch [7/40], Iter [65/145] Loss: 3.8636
Epoch [7/40], Iter [66/145] Loss: 3.7109


 15%|█▌        | 6/40 [01:12<06:16, 11.06s/it]

Epoch [7/40], Iter [67/145] Loss: 4.0502
Epoch [7/40], Iter [68/145] Loss: 3.8765
Epoch [7/40], Iter [69/145] Loss: 3.7660


 15%|█▌        | 6/40 [01:12<06:16, 11.06s/it]

Epoch [7/40], Iter [70/145] Loss: 3.9006
Epoch [7/40], Iter [71/145] Loss: 3.8238
Epoch [7/40], Iter [72/145] Loss: 3.9366


 15%|█▌        | 6/40 [01:12<06:16, 11.06s/it]

Epoch [7/40], Iter [73/145] Loss: 3.7873
Epoch [7/40], Iter [74/145] Loss: 3.7787
Epoch [7/40], Iter [75/145] Loss: 3.7529


 15%|█▌        | 6/40 [01:13<06:16, 11.06s/it]

Epoch [7/40], Iter [76/145] Loss: 4.0811
Epoch [7/40], Iter [77/145] Loss: 3.5893
Epoch [7/40], Iter [78/145] Loss: 3.9726


 15%|█▌        | 6/40 [01:13<06:16, 11.06s/it]

Epoch [7/40], Iter [79/145] Loss: 3.5680
Epoch [7/40], Iter [80/145] Loss: 3.9825
Epoch [7/40], Iter [81/145] Loss: 3.6903


 15%|█▌        | 6/40 [01:13<06:16, 11.06s/it]

Epoch [7/40], Iter [82/145] Loss: 4.0903
Epoch [7/40], Iter [83/145] Loss: 3.6752
Epoch [7/40], Iter [84/145] Loss: 4.0513


 15%|█▌        | 6/40 [01:13<06:16, 11.06s/it]

Epoch [7/40], Iter [85/145] Loss: 3.8673
Epoch [7/40], Iter [86/145] Loss: 3.9969
Epoch [7/40], Iter [87/145] Loss: 3.8789


 15%|█▌        | 6/40 [01:13<06:16, 11.06s/it]

Epoch [7/40], Iter [88/145] Loss: 3.9841
Epoch [7/40], Iter [89/145] Loss: 3.7893
Epoch [7/40], Iter [90/145] Loss: 3.8634


 15%|█▌        | 6/40 [01:14<06:16, 11.06s/it]

Epoch [7/40], Iter [91/145] Loss: 3.8085
Epoch [7/40], Iter [92/145] Loss: 3.9612
Epoch [7/40], Iter [93/145] Loss: 3.8436


 15%|█▌        | 6/40 [01:14<06:16, 11.06s/it]

Epoch [7/40], Iter [94/145] Loss: 3.7831
Epoch [7/40], Iter [95/145] Loss: 3.7865
Epoch [7/40], Iter [96/145] Loss: 3.6955


 15%|█▌        | 6/40 [01:14<06:16, 11.06s/it]

Epoch [7/40], Iter [97/145] Loss: 3.6699
Epoch [7/40], Iter [98/145] Loss: 3.7961
Epoch [7/40], Iter [99/145] Loss: 3.9369


 15%|█▌        | 6/40 [01:14<06:16, 11.06s/it]

Epoch [7/40], Iter [100/145] Loss: 4.0828
Epoch [7/40], Iter [101/145] Loss: 4.0869
Epoch [7/40], Iter [102/145] Loss: 3.5402


 15%|█▌        | 6/40 [01:15<06:16, 11.06s/it]

Epoch [7/40], Iter [103/145] Loss: 3.8699
Epoch [7/40], Iter [104/145] Loss: 3.6565
Epoch [7/40], Iter [105/145] Loss: 3.8572


 15%|█▌        | 6/40 [01:15<06:16, 11.06s/it]

Epoch [7/40], Iter [106/145] Loss: 3.7350
Epoch [7/40], Iter [107/145] Loss: 3.5980
Epoch [7/40], Iter [108/145] Loss: 3.6430


 15%|█▌        | 6/40 [01:15<06:16, 11.06s/it]

Epoch [7/40], Iter [109/145] Loss: 3.8071
Epoch [7/40], Iter [110/145] Loss: 3.7476
Epoch [7/40], Iter [111/145] Loss: 3.8461


 15%|█▌        | 6/40 [01:15<06:16, 11.06s/it]

Epoch [7/40], Iter [112/145] Loss: 3.8815
Epoch [7/40], Iter [113/145] Loss: 4.0221
Epoch [7/40], Iter [114/145] Loss: 3.6902


 15%|█▌        | 6/40 [01:16<06:16, 11.06s/it]

Epoch [7/40], Iter [115/145] Loss: 3.5688
Epoch [7/40], Iter [116/145] Loss: 3.6229
Epoch [7/40], Iter [117/145] Loss: 3.8274


 15%|█▌        | 6/40 [01:16<06:16, 11.06s/it]

Epoch [7/40], Iter [118/145] Loss: 3.6656
Epoch [7/40], Iter [119/145] Loss: 3.9730
Epoch [7/40], Iter [120/145] Loss: 3.5510


 15%|█▌        | 6/40 [01:16<06:16, 11.06s/it]

Epoch [7/40], Iter [121/145] Loss: 3.7879
Epoch [7/40], Iter [122/145] Loss: 3.9220
Epoch [7/40], Iter [123/145] Loss: 3.8390


 15%|█▌        | 6/40 [01:16<06:16, 11.06s/it]

Epoch [7/40], Iter [124/145] Loss: 3.7219
Epoch [7/40], Iter [125/145] Loss: 4.0576
Epoch [7/40], Iter [126/145] Loss: 4.1713


 15%|█▌        | 6/40 [01:16<06:16, 11.06s/it]

Epoch [7/40], Iter [127/145] Loss: 3.8072
Epoch [7/40], Iter [128/145] Loss: 3.8335
Epoch [7/40], Iter [129/145] Loss: 3.8831


 15%|█▌        | 6/40 [01:17<06:16, 11.06s/it]

Epoch [7/40], Iter [130/145] Loss: 3.9417
Epoch [7/40], Iter [131/145] Loss: 4.0369
Epoch [7/40], Iter [132/145] Loss: 3.9001


 15%|█▌        | 6/40 [01:17<06:16, 11.06s/it]

Epoch [7/40], Iter [133/145] Loss: 3.8175
Epoch [7/40], Iter [134/145] Loss: 4.0251
Epoch [7/40], Iter [135/145] Loss: 3.7351


 15%|█▌        | 6/40 [01:17<06:16, 11.06s/it]

Epoch [7/40], Iter [136/145] Loss: 3.8821
Epoch [7/40], Iter [137/145] Loss: 3.7537
Epoch [7/40], Iter [138/145] Loss: 3.6579


 15%|█▌        | 6/40 [01:17<06:16, 11.06s/it]

Epoch [7/40], Iter [139/145] Loss: 3.8878
Epoch [7/40], Iter [140/145] Loss: 3.8146
Epoch [7/40], Iter [141/145] Loss: 3.7871


 15%|█▌        | 6/40 [01:18<06:16, 11.06s/it]

Epoch [7/40], Iter [142/145] Loss: 3.6241
Epoch [7/40], Iter [143/145] Loss: 3.9074
Epoch [7/40], Iter [144/145] Loss: 3.9244


 18%|█▊        | 7/40 [01:18<06:06, 11.11s/it]

Epoch [7/40], Iter [145/145] Loss: 4.0181


 18%|█▊        | 7/40 [01:18<06:06, 11.11s/it]

Epoch [8/40], Iter [1/145] Loss: 3.6786
Epoch [8/40], Iter [2/145] Loss: 3.9157
Epoch [8/40], Iter [3/145] Loss: 3.7057


 18%|█▊        | 7/40 [01:18<06:06, 11.11s/it]

Epoch [8/40], Iter [4/145] Loss: 4.0031
Epoch [8/40], Iter [5/145] Loss: 3.8015
Epoch [8/40], Iter [6/145] Loss: 3.4316


 18%|█▊        | 7/40 [01:19<06:06, 11.11s/it]

Epoch [8/40], Iter [7/145] Loss: 4.0279
Epoch [8/40], Iter [8/145] Loss: 4.0171
Epoch [8/40], Iter [9/145] Loss: 3.9163


 18%|█▊        | 7/40 [01:19<06:06, 11.11s/it]

Epoch [8/40], Iter [10/145] Loss: 3.9662
Epoch [8/40], Iter [11/145] Loss: 3.7209
Epoch [8/40], Iter [12/145] Loss: 3.7993


 18%|█▊        | 7/40 [01:19<06:06, 11.11s/it]

Epoch [8/40], Iter [13/145] Loss: 3.6925
Epoch [8/40], Iter [14/145] Loss: 3.7519
Epoch [8/40], Iter [15/145] Loss: 3.5664


 18%|█▊        | 7/40 [01:19<06:06, 11.11s/it]

Epoch [8/40], Iter [16/145] Loss: 3.6744
Epoch [8/40], Iter [17/145] Loss: 3.6753
Epoch [8/40], Iter [18/145] Loss: 3.5459


 18%|█▊        | 7/40 [01:19<06:06, 11.11s/it]

Epoch [8/40], Iter [19/145] Loss: 3.9997
Epoch [8/40], Iter [20/145] Loss: 3.9869
Epoch [8/40], Iter [21/145] Loss: 4.0632


 18%|█▊        | 7/40 [01:20<06:06, 11.11s/it]

Epoch [8/40], Iter [22/145] Loss: 3.8985
Epoch [8/40], Iter [23/145] Loss: 3.8874
Epoch [8/40], Iter [24/145] Loss: 3.6870


 18%|█▊        | 7/40 [01:20<06:06, 11.11s/it]

Epoch [8/40], Iter [25/145] Loss: 4.1190
Epoch [8/40], Iter [26/145] Loss: 3.6733
Epoch [8/40], Iter [27/145] Loss: 3.6425


 18%|█▊        | 7/40 [01:20<06:06, 11.11s/it]

Epoch [8/40], Iter [28/145] Loss: 3.8985
Epoch [8/40], Iter [29/145] Loss: 3.7777
Epoch [8/40], Iter [30/145] Loss: 3.6869


 18%|█▊        | 7/40 [01:20<06:06, 11.11s/it]

Epoch [8/40], Iter [31/145] Loss: 3.8838
Epoch [8/40], Iter [32/145] Loss: 3.7185
Epoch [8/40], Iter [33/145] Loss: 3.8005


 18%|█▊        | 7/40 [01:21<06:06, 11.11s/it]

Epoch [8/40], Iter [34/145] Loss: 3.7744
Epoch [8/40], Iter [35/145] Loss: 3.9010
Epoch [8/40], Iter [36/145] Loss: 3.8932


 18%|█▊        | 7/40 [01:21<06:06, 11.11s/it]

Epoch [8/40], Iter [37/145] Loss: 4.0027
Epoch [8/40], Iter [38/145] Loss: 3.7289
Epoch [8/40], Iter [39/145] Loss: 4.0710


 18%|█▊        | 7/40 [01:21<06:06, 11.11s/it]

Epoch [8/40], Iter [40/145] Loss: 3.9074
Epoch [8/40], Iter [41/145] Loss: 3.7913
Epoch [8/40], Iter [42/145] Loss: 3.8318


 18%|█▊        | 7/40 [01:21<06:06, 11.11s/it]

Epoch [8/40], Iter [43/145] Loss: 3.7082
Epoch [8/40], Iter [44/145] Loss: 3.9051
Epoch [8/40], Iter [45/145] Loss: 3.7118


 18%|█▊        | 7/40 [01:21<06:06, 11.11s/it]

Epoch [8/40], Iter [46/145] Loss: 3.7223
Epoch [8/40], Iter [47/145] Loss: 3.8812
Epoch [8/40], Iter [48/145] Loss: 3.8559


 18%|█▊        | 7/40 [01:22<06:06, 11.11s/it]

Epoch [8/40], Iter [49/145] Loss: 3.9494
Epoch [8/40], Iter [50/145] Loss: 3.8913
Epoch [8/40], Iter [51/145] Loss: 3.7029


 18%|█▊        | 7/40 [01:22<06:06, 11.11s/it]

Epoch [8/40], Iter [52/145] Loss: 3.8870
Epoch [8/40], Iter [53/145] Loss: 3.8466
Epoch [8/40], Iter [54/145] Loss: 3.8537


 18%|█▊        | 7/40 [01:22<06:06, 11.11s/it]

Epoch [8/40], Iter [55/145] Loss: 3.7520
Epoch [8/40], Iter [56/145] Loss: 4.2116
Epoch [8/40], Iter [57/145] Loss: 3.6242


 18%|█▊        | 7/40 [01:22<06:06, 11.11s/it]

Epoch [8/40], Iter [58/145] Loss: 3.9680
Epoch [8/40], Iter [59/145] Loss: 3.9760
Epoch [8/40], Iter [60/145] Loss: 4.0471


 18%|█▊        | 7/40 [01:23<06:06, 11.11s/it]

Epoch [8/40], Iter [61/145] Loss: 4.0048
Epoch [8/40], Iter [62/145] Loss: 3.9096
Epoch [8/40], Iter [63/145] Loss: 4.0074


 18%|█▊        | 7/40 [01:23<06:06, 11.11s/it]

Epoch [8/40], Iter [64/145] Loss: 3.8960
Epoch [8/40], Iter [65/145] Loss: 3.7148
Epoch [8/40], Iter [66/145] Loss: 3.7369


 18%|█▊        | 7/40 [01:23<06:06, 11.11s/it]

Epoch [8/40], Iter [67/145] Loss: 3.8994
Epoch [8/40], Iter [68/145] Loss: 3.7092
Epoch [8/40], Iter [69/145] Loss: 3.6271


 18%|█▊        | 7/40 [01:23<06:06, 11.11s/it]

Epoch [8/40], Iter [70/145] Loss: 3.6442
Epoch [8/40], Iter [71/145] Loss: 3.6851
Epoch [8/40], Iter [72/145] Loss: 4.2206


 18%|█▊        | 7/40 [01:24<06:06, 11.11s/it]

Epoch [8/40], Iter [73/145] Loss: 4.0638
Epoch [8/40], Iter [74/145] Loss: 4.1416
Epoch [8/40], Iter [75/145] Loss: 3.6248


 18%|█▊        | 7/40 [01:24<06:06, 11.11s/it]

Epoch [8/40], Iter [76/145] Loss: 3.8754
Epoch [8/40], Iter [77/145] Loss: 4.0432
Epoch [8/40], Iter [78/145] Loss: 3.9513


 18%|█▊        | 7/40 [01:24<06:06, 11.11s/it]

Epoch [8/40], Iter [79/145] Loss: 3.7703
Epoch [8/40], Iter [80/145] Loss: 3.7646
Epoch [8/40], Iter [81/145] Loss: 3.9071


 18%|█▊        | 7/40 [01:24<06:06, 11.11s/it]

Epoch [8/40], Iter [82/145] Loss: 4.0348
Epoch [8/40], Iter [83/145] Loss: 3.7644
Epoch [8/40], Iter [84/145] Loss: 3.5048


 18%|█▊        | 7/40 [01:25<06:06, 11.11s/it]

Epoch [8/40], Iter [85/145] Loss: 3.7681
Epoch [8/40], Iter [86/145] Loss: 3.9332
Epoch [8/40], Iter [87/145] Loss: 3.8442


 18%|█▊        | 7/40 [01:25<06:06, 11.11s/it]

Epoch [8/40], Iter [88/145] Loss: 3.9361
Epoch [8/40], Iter [89/145] Loss: 3.7911
Epoch [8/40], Iter [90/145] Loss: 3.7260


 18%|█▊        | 7/40 [01:25<06:06, 11.11s/it]

Epoch [8/40], Iter [91/145] Loss: 3.5260
Epoch [8/40], Iter [92/145] Loss: 3.9834
Epoch [8/40], Iter [93/145] Loss: 3.8852


 18%|█▊        | 7/40 [01:25<06:06, 11.11s/it]

Epoch [8/40], Iter [94/145] Loss: 3.9225
Epoch [8/40], Iter [95/145] Loss: 3.6923
Epoch [8/40], Iter [96/145] Loss: 3.9202


 18%|█▊        | 7/40 [01:26<06:06, 11.11s/it]

Epoch [8/40], Iter [97/145] Loss: 3.7039
Epoch [8/40], Iter [98/145] Loss: 3.9728
Epoch [8/40], Iter [99/145] Loss: 3.7808


 18%|█▊        | 7/40 [01:26<06:06, 11.11s/it]

Epoch [8/40], Iter [100/145] Loss: 3.7951
Epoch [8/40], Iter [101/145] Loss: 3.8957
Epoch [8/40], Iter [102/145] Loss: 3.8465


 18%|█▊        | 7/40 [01:26<06:06, 11.11s/it]

Epoch [8/40], Iter [103/145] Loss: 3.8021
Epoch [8/40], Iter [104/145] Loss: 3.8133
Epoch [8/40], Iter [105/145] Loss: 3.8474


 18%|█▊        | 7/40 [01:26<06:06, 11.11s/it]

Epoch [8/40], Iter [106/145] Loss: 3.9645
Epoch [8/40], Iter [107/145] Loss: 3.6775
Epoch [8/40], Iter [108/145] Loss: 3.6999


 18%|█▊        | 7/40 [01:27<06:06, 11.11s/it]

Epoch [8/40], Iter [109/145] Loss: 4.0241
Epoch [8/40], Iter [110/145] Loss: 3.9820
Epoch [8/40], Iter [111/145] Loss: 3.8147


 18%|█▊        | 7/40 [01:27<06:06, 11.11s/it]

Epoch [8/40], Iter [112/145] Loss: 3.7847
Epoch [8/40], Iter [113/145] Loss: 4.0573
Epoch [8/40], Iter [114/145] Loss: 3.8870


 18%|█▊        | 7/40 [01:27<06:06, 11.11s/it]

Epoch [8/40], Iter [115/145] Loss: 3.7495
Epoch [8/40], Iter [116/145] Loss: 3.8920
Epoch [8/40], Iter [117/145] Loss: 3.6559


 18%|█▊        | 7/40 [01:27<06:06, 11.11s/it]

Epoch [8/40], Iter [118/145] Loss: 3.9199
Epoch [8/40], Iter [119/145] Loss: 3.9166
Epoch [8/40], Iter [120/145] Loss: 3.6434


 18%|█▊        | 7/40 [01:28<06:06, 11.11s/it]

Epoch [8/40], Iter [121/145] Loss: 3.7086
Epoch [8/40], Iter [122/145] Loss: 3.9205
Epoch [8/40], Iter [123/145] Loss: 3.7998


 18%|█▊        | 7/40 [01:28<06:06, 11.11s/it]

Epoch [8/40], Iter [124/145] Loss: 4.0021
Epoch [8/40], Iter [125/145] Loss: 3.7125
Epoch [8/40], Iter [126/145] Loss: 3.8756


 18%|█▊        | 7/40 [01:28<06:06, 11.11s/it]

Epoch [8/40], Iter [127/145] Loss: 3.8313
Epoch [8/40], Iter [128/145] Loss: 3.9109
Epoch [8/40], Iter [129/145] Loss: 3.7463


 18%|█▊        | 7/40 [01:28<06:06, 11.11s/it]

Epoch [8/40], Iter [130/145] Loss: 3.8518
Epoch [8/40], Iter [131/145] Loss: 3.9662
Epoch [8/40], Iter [132/145] Loss: 3.8127


 18%|█▊        | 7/40 [01:28<06:06, 11.11s/it]

Epoch [8/40], Iter [133/145] Loss: 3.5271
Epoch [8/40], Iter [134/145] Loss: 3.6862
Epoch [8/40], Iter [135/145] Loss: 3.8516


 18%|█▊        | 7/40 [01:29<06:06, 11.11s/it]

Epoch [8/40], Iter [136/145] Loss: 4.0360
Epoch [8/40], Iter [137/145] Loss: 3.9031
Epoch [8/40], Iter [138/145] Loss: 3.9353


 18%|█▊        | 7/40 [01:29<06:06, 11.11s/it]

Epoch [8/40], Iter [139/145] Loss: 3.8070
Epoch [8/40], Iter [140/145] Loss: 4.1250
Epoch [8/40], Iter [141/145] Loss: 3.9766


 18%|█▊        | 7/40 [01:29<06:06, 11.11s/it]

Epoch [8/40], Iter [142/145] Loss: 3.7967
Epoch [8/40], Iter [143/145] Loss: 3.7940
Epoch [8/40], Iter [144/145] Loss: 3.8288


 20%|██        | 8/40 [01:29<06:01, 11.30s/it]

Epoch [8/40], Iter [145/145] Loss: 3.7554


 20%|██        | 8/40 [01:30<06:01, 11.30s/it]

Epoch [9/40], Iter [1/145] Loss: 3.8622
Epoch [9/40], Iter [2/145] Loss: 3.5922
Epoch [9/40], Iter [3/145] Loss: 3.8337


 20%|██        | 8/40 [01:30<06:01, 11.30s/it]

Epoch [9/40], Iter [4/145] Loss: 3.8508
Epoch [9/40], Iter [5/145] Loss: 3.8983
Epoch [9/40], Iter [6/145] Loss: 3.8833


 20%|██        | 8/40 [01:30<06:01, 11.30s/it]

Epoch [9/40], Iter [7/145] Loss: 3.9762
Epoch [9/40], Iter [8/145] Loss: 3.5592
Epoch [9/40], Iter [9/145] Loss: 4.0338


 20%|██        | 8/40 [01:31<06:01, 11.30s/it]

Epoch [9/40], Iter [10/145] Loss: 3.8278
Epoch [9/40], Iter [11/145] Loss: 3.7693
Epoch [9/40], Iter [12/145] Loss: 3.8787


 20%|██        | 8/40 [01:31<06:01, 11.30s/it]

Epoch [9/40], Iter [13/145] Loss: 3.6143
Epoch [9/40], Iter [14/145] Loss: 4.0598
Epoch [9/40], Iter [15/145] Loss: 3.8025


 20%|██        | 8/40 [01:31<06:01, 11.30s/it]

Epoch [9/40], Iter [16/145] Loss: 3.7868
Epoch [9/40], Iter [17/145] Loss: 4.0211
Epoch [9/40], Iter [18/145] Loss: 3.7728


 20%|██        | 8/40 [01:31<06:01, 11.30s/it]

Epoch [9/40], Iter [19/145] Loss: 3.8109
Epoch [9/40], Iter [20/145] Loss: 3.7581
Epoch [9/40], Iter [21/145] Loss: 4.1815


 20%|██        | 8/40 [01:32<06:01, 11.30s/it]

Epoch [9/40], Iter [22/145] Loss: 3.8118
Epoch [9/40], Iter [23/145] Loss: 3.8386
Epoch [9/40], Iter [24/145] Loss: 3.9113


 20%|██        | 8/40 [01:32<06:01, 11.30s/it]

Epoch [9/40], Iter [25/145] Loss: 3.8485
Epoch [9/40], Iter [26/145] Loss: 3.7764
Epoch [9/40], Iter [27/145] Loss: 3.8763


 20%|██        | 8/40 [01:32<06:09, 11.56s/it]


Epoch [9/40], Iter [28/145] Loss: 3.8278
Epoch [9/40], Iter [29/145] Loss: 3.7948


KeyboardInterrupt: 