In [1]:
import torch
import torch.nn as nn

In [2]:
class StemBlock(nn.Module):
    
    def __init__(self, in_ch=3, out_ch=16):
        # in_ch: default is 3.
        # out_ch: default is 16.
        super(StemBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(out_ch, out_ch//2, kernel_size=1, stride=1)
        self.conv3 = nn.Conv2d(out_ch//2, out_ch, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(out_ch * 2, out_ch, kernel_size=3, stride=1, padding=1)
        
        self.maxPooling = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.bn1 = nn.BatchNorm2d(num_features=out_ch)
        self.bn2 = nn.BatchNorm2d(num_features=out_ch//2)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(self.bn1(x))
        
        # x1
        x1 = self.conv2(x)
        x1 = self.relu(self.bn2(x1))
        x1 = self.conv3(x1)
        # [H/4, W/4, C]
        x1 = self.relu(self.bn1(x1))
        
        # x2
        # [H/4, W/4, C]
        x2 = self.maxPooling(x)
        
        x = torch.cat((x1, x2), 1)
        x = self.conv4(x)
        x = self.relu(self.bn1(x))
        return x

In [3]:
class ContextEmbeddingBlock(nn.Module):
    
    def __init__(self, in_ch=128, out_ch=128, stride=1):
        
        super(ContextEmbeddingBlock, self).__init__()
        self.gapPooling = nn.AdaptiveAvgPool2d((1,1))
        self.bn = nn.BatchNorm2d(num_features=in_ch)
        self.conv1 = nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=1)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1)
        
    def forward(self, x):
        x1 = self.gapPooling(x)
        # [None, C, 1, 1]
        if not x1.size()[0] == 1:
            x1 = self.bn(x1)
        x1 = self.conv1(x1)
        if not x1.size()[0] == 1:
            x1 = self.relu(self.bn(x1))
        else:
            x1 = self.relu(x1)
        
        x = torch.add(x1, x)
        x = self.conv2(x)
        return x

In [4]:
class DWConv(nn.Module):
    
    def __init__(self, in_ch, out_ch, stride=1):
        super(DWConv, self).__init__()
        self.depth_conv = nn.Conv2d(in_ch, in_ch, 
                                    kernel_size=3, 
                                    stride=stride,
                                    padding=1,
                                    groups=in_ch)
        
        self.point_conv = nn.Conv2d(in_ch, out_ch, 
                                    kernel_size=1, 
                                    stride=1, 
                                    groups=1)
        
    def forward(self, x):
        x = self.depth_conv(x)
        x = self.point_conv(x)
        return x


class GatherAndExpansionLayer(nn.Module):
    
    def __init__(self, in_ch, out_ch, stride=1):
        super(GatherAndExpansionLayer, self).__init__()
        error = "GatherAndExpansionLayer's stride only support 1 or 2."
        assert stride in [1, 2], error
        self.stride = stride
        if self.stride == 2:
            self.conv2_1 = nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=1, padding=1)
            self.conv2_2 = DWConv(in_ch, 6 * in_ch, stride=2)
            self.conv2_3 = DWConv(6 * in_ch, 6 * in_ch, stride=1)
            self.conv2_4 = nn.Conv2d(6 * in_ch, out_ch, kernel_size=1)
            self.conv2_5 = DWConv(in_ch, out_ch, stride=2)
            self.conv2_6 = nn.Conv2d(out_ch, out_ch, kernel_size=1, stride=1)
        else:
            self.conv1_1 = nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=self.stride, padding=1)
            self.conv1_2 = DWConv(in_ch, 6 * in_ch, stride=self.stride)
            self.conv1_3 = nn.Conv2d(6 * in_ch, out_ch, kernel_size=1, stride=self.stride)
        self.bn1 = nn.BatchNorm2d(num_features=in_ch)
        self.bn2 = nn.BatchNorm2d(num_features=in_ch * 6)
        self.bn3 = nn.BatchNorm2d(num_features=out_ch)
        self.relu = nn.ReLU()
            
    def forward(self, x):
        if self.stride == 2:
            x1 = self.conv2_1(x)
            x1 = self.relu(self.bn1(x1))
            x1 = self.conv2_2(x1)
            x1 = self.bn2(x1)
            x1 = self.conv2_3(x1)
            x1 = self.bn2(x1)
            x1 = self.conv2_4(x1)
            x1 = self.bn3(x1)
            
            x2 = self.conv2_5(x)
            x2 = self.bn3(x2)
            x2 = self.conv2_6(x2)
            x2 = self.bn3(x2)
        else:
            x1 = self.conv1_1(x)
            x1 = self.relu(self.bn1(x1))
            x1 = self.conv1_2(x1)
            x1 = self.bn2(x1)
            x1 = self.conv1_3(x1)
            x1 = self.bn3(x1)
            
            x2 = x
            
        x = torch.add(x1, x2)
        x = self.relu(x)
        return x

In [5]:
class DetailBranch(nn.Module):
    
    def __init__(self):
        super(DetailBranch, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.conv5 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        
        self.bn1 = nn.BatchNorm2d(num_features=64)
        self.bn2 = nn.BatchNorm2d(num_features=128)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # S1
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn1(self.conv2(x)))
        # S2
        x = self.relu(self.bn1(self.conv3(x)))
        x = self.relu(self.bn1(self.conv2(x)))
        x = self.relu(self.bn1(self.conv2(x)))
        # S3
        x = self.relu(self.bn2(self.conv4(x)))
        x = self.relu(self.bn2(self.conv5(x)))
        x = self.relu(self.bn2(self.conv5(x)))
        
        return x

In [6]:
class SemanticBranch(nn.Module):
    
    def __init__(self, is_train=False):
        super(SemanticBranch, self).__init__()
        self.is_train = is_train
        # S1 and S2
        self.stem = StemBlock(in_ch=3, out_ch=16)
        # S3
        self.ge1 = GatherAndExpansionLayer(in_ch=16, out_ch=32, stride=2)
        self.ge2 = GatherAndExpansionLayer(in_ch=32, out_ch=32, stride=1)
        # S4
        self.ge3 = GatherAndExpansionLayer(in_ch=32, out_ch=64, stride=2)
        self.ge4 = GatherAndExpansionLayer(in_ch=64, out_ch=64, stride=1)
        # S5
        self.ge5 = GatherAndExpansionLayer(in_ch=64, out_ch=128, stride=2)
        self.ge6 = GatherAndExpansionLayer(in_ch=128, out_ch=128, stride=1)
        self.ce = ContextEmbeddingBlock(in_ch=128, out_ch=128, stride=1)
        
    def forward(self, x):
        # S1 and S2
        out1 = self.stem(x)
        # out1: [None, 16, 128, 256]
        
        # S3
        x = self.ge1(out1)
        out2 = self.ge2(x)
        # out2: [None, 32, 64, 128]
        
        # S4
        x = self.ge3(out2)
        out3 = self.ge4(x)
        # out3: [None, 64, 32, 64]

        # S5
        x = self.ge5(out3)
        x = self.ge6(x)
        x = self.ge6(x)
        out4 = self.ge6(x)
        # out4: [None, 128, 16, 32]

        # x: [None, 128, 16, 32]
        x = self.ce(out4)
        if self.is_train:
            return x, out1, out2, out3, out4
        else:
            return x

In [7]:
class SegHeadBooster(nn.Module):
    
    def __init__(self, in_ch, t_ch, out_ch, size):
        super(SegHeadBooster, self).__init__()
        self.conv1 = nn.Conv2d(in_ch, t_ch, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(t_ch, out_ch, kernel_size=1, stride=1, bias=False)
        self.bn = nn.BatchNorm2d(num_features=t_ch)
        self.relu = nn.ReLU()
        self.size = size
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(self.bn(x))
        x = self.conv2(x)
        x = nn.functional.interpolate(x, size=self.size, mode='bilinear')
        return x

In [8]:
class BiseNet_V2(nn.Module):
    
    def __init__(self, num_classes=19, is_train=False):
        super(BiseNet_V2, self).__init__()
        self.is_train = is_train
        # Common
        self.dw = DWConv(in_ch=128, out_ch=128, stride=1)
        self.bn = nn.BatchNorm2d(num_features=128)
        self.conv1_1 = nn.Conv2d(128, 128, kernel_size=1, stride=1)
        self.sigmoid = nn.Sigmoid()
        
        # Detail Branch
        self.detailBranch = DetailBranch()
        self.conv3_3_2 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1)
        self.avgPooling = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
        
        # Semantic Branch
        self.semanticBranch = SemanticBranch(is_train=self.is_train)
        self.conv3_3_1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        
        # Seg Head
        size = (512, 1024)
        self.segHead0 = SegHeadBooster(in_ch=128, t_ch=64, out_ch=num_classes, size=size)
        self.segHead1 = SegHeadBooster(in_ch=16, t_ch=64, out_ch=num_classes, size=size)
        self.segHead2 = SegHeadBooster(in_ch=32, t_ch=64, out_ch=num_classes, size=size)
        self.segHead3 = SegHeadBooster(in_ch=64, t_ch=64, out_ch=num_classes, size=size)
        self.segHead4 = SegHeadBooster(in_ch=128, t_ch=64, out_ch=num_classes, size=size)
        
        # init weight
        self.init_weight()
        
    def forward(self, x):
        # Detail Branch
        dx = self.detailBranch(x)
        dx1 = self.bn(self.dw(dx))
        dx1 = self.conv1_1(dx1)
        
        dx2 = self.bn(self.conv3_3_2(dx))
        dx2 = self.avgPooling(dx2)
        
        # Semantic Branch
        if self.is_train:
            sx, out1, out2, out3, out4 = self.semanticBranch(x)
            out1 = self.segHead1(out1)
            out2 = self.segHead2(out2)
            out3 = self.segHead3(out3)
            out4 = self.segHead4(out4)
        else:
            sx = self.semanticBranch(x)
        sx1 = self.bn(self.dw(sx))
        sx1 = self.sigmoid(self.conv1_1(sx1))
        
        sx2 = self.bn(self.conv3_3_1(sx))
        sx2 = nn.functional.interpolate(sx2, scale_factor=4, mode='bilinear')
        sx2 = self.sigmoid(sx2)
        
        # Aggregation Layer: element-wise product
        d_out = torch.mul(dx1, sx2)
        s_out = torch.mul(dx2, sx1)
        
        s_out_ = nn.functional.interpolate(s_out, scale_factor=4, mode='bilinear')
        
        out = d_out.add(s_out_)
        # out: [None, 128, 64, 128]
        out0 = self.bn(self.conv3_3_1(out))
        out0 = self.segHead0(out0)
        if self.is_train:
            return out0, out1, out2, out3, out4
        else:
            return out0
    
    def init_weight(self):
        # Kaiming Normal
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

In [9]:
from cityscapes import CityScapes
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from loss import OhemCELoss
from tqdm import tqdm
import os

In [10]:
def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1,
                      max_iter=300, power=0.9):
    
    '''Copy from ooooverflow/BiSeNet
    Args:
        init_lr: is base learning rate
        iter: is a current iteration
        lr_decay_iter: how frequently decay occurs, default is 1
        max_iter: is number of maximum iterations
        power: is a polymomial power
    '''
    lr = init_lr * (1 - iter/max_iter) ** power
    optimizer.param_groups[0]['lr'] = lr
    return lr

In [11]:
data_dir = '/media/commaai-03/Data/dataset/Cityscapes'
save_dir = '/home/commaai-03/Mikoy/learn/DeepLearning/pytorch/papers/BiseNet_V2/models'
learning_rate = 5e-2
num_epochs = 10000
batch_size = 16
thresh = 0.7
n_mid = (1024 * 512) // 2

In [12]:
ds_train = CityScapes(data_dir, cropsize=(2048, 1024), mode='train')
dataloader_train = DataLoader(
    ds_train,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    drop_last=True)
ds_val = CityScapes(data_dir, cropsize=(2048, 1024), mode='val')
dataloader_val = DataLoader(
    ds_val,
    batch_size=1,
    shuffle=False,
    num_workers=8)

In [None]:
diter = iter(dataloader_train)
im, lb = next(diter)

In [None]:
im.shape

In [13]:
net = BiseNet_V2(is_train=True)
net = torch.nn.DataParallel(net).cuda()

optimizer = torch.optim.SGD(params=net.parameters(),
                            lr=learning_rate,
                            momentum=0.9,
                            weight_decay=5e-4)


writer = SummaryWriter(log_dir='./logs/', comment='Bv2_Batch_16')
loss_func = OhemCELoss(thresh=thresh,
                       n_min=n_mid,
                       ignore_lb=255)

max_miou = 0
step = 0
for epoch in range(num_epochs):
    lr = poly_lr_scheduler(optimizer, learning_rate, iter=epoch, max_iter=num_epochs)
    net.train()
    tq = tqdm(total=len(dataloader_train) * batch_size)
    tq.set_description('[epoch]: %d, [lr]: %f' %(epoch, lr))
    loss_record = []
    for i, (data, label) in enumerate(dataloader_train):
        data = data.cuda()
        label = label.cuda()
        label = torch.squeeze(label, 1)
        optimizer.zero_grad()
        out0, out1, out2, out3, out4 = net(data)
        loss0 = loss_func(out0, label)
        loss1 = loss_func(out1, label)
        loss2 = loss_func(out2, label)
        loss3 = loss_func(out3, label)
        loss4 = loss_func(out4, label)
        loss = loss0 + loss1 + loss2 + loss3 + loss4
        tq.update(batch_size)
        tq.set_postfix(loss='%.5f' % loss)
        
        loss.backward()
        optimizer.step()
        step += 1
        writer.add_scalar('loss_step', loss, step)
        loss_record.append(loss.item())
    
    tq.close()
    loss_avg = np.mean(loss_record)
    writer.add_scalar('epoch/loss_epoch_train', float(loss_avg), epoch)
    print('[Info]: Loss for train: %.5f' % loss_avg)
    if epoch % 50 == 0 and epoch != 0:
        if not os.path.isdir(save_dir):
            os.mkdir(save_dir)
        torch.save(net.module.state_dict(),
                   os.path.join(save_dir, 'latest_dice_loss.pth'))
    
    # Validation after

  "See the documentation of nn.Upsample for details.".format(mode))
[epoch]: 0, [lr]: 0.050000: 100%|██████████| 11888/11888 [04:31<00:00, 43.76it/s, loss=6.18206] 
[epoch]: 1, [lr]: 0.049995:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 8.05211


[epoch]: 1, [lr]: 0.049995: 100%|██████████| 11888/11888 [04:26<00:00, 44.68it/s, loss=6.88038] 
[epoch]: 2, [lr]: 0.049991:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 7.39447


[epoch]: 2, [lr]: 0.049991: 100%|██████████| 11888/11888 [04:25<00:00, 44.83it/s, loss=7.37313] 
[epoch]: 3, [lr]: 0.049986:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 7.28419


[epoch]: 3, [lr]: 0.049986: 100%|██████████| 11888/11888 [04:37<00:00, 42.87it/s, loss=7.99342] 
[epoch]: 4, [lr]: 0.049982:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 7.19630


[epoch]: 4, [lr]: 0.049982: 100%|██████████| 11888/11888 [04:31<00:00, 43.74it/s, loss=7.63812] 
[epoch]: 5, [lr]: 0.049977:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 7.16199


[epoch]: 5, [lr]: 0.049977: 100%|██████████| 11888/11888 [04:42<00:00, 42.06it/s, loss=7.93318] 
[epoch]: 6, [lr]: 0.049973:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 7.07838


[epoch]: 6, [lr]: 0.049973: 100%|██████████| 11888/11888 [04:42<00:00, 42.14it/s, loss=6.75403] 
[epoch]: 7, [lr]: 0.049968:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 7.00063


[epoch]: 7, [lr]: 0.049968: 100%|██████████| 11888/11888 [04:27<00:00, 44.52it/s, loss=6.00324] 
[epoch]: 8, [lr]: 0.049964:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.97568


[epoch]: 8, [lr]: 0.049964: 100%|██████████| 11888/11888 [04:21<00:00, 45.41it/s, loss=6.40938] 
[epoch]: 9, [lr]: 0.049959:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.96574


[epoch]: 9, [lr]: 0.049959: 100%|██████████| 11888/11888 [04:20<00:00, 45.57it/s, loss=6.12463] 
[epoch]: 10, [lr]: 0.049955:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.96668


[epoch]: 10, [lr]: 0.049955: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.76232] 
[epoch]: 11, [lr]: 0.049950:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.90049


[epoch]: 11, [lr]: 0.049950: 100%|██████████| 11888/11888 [04:24<00:00, 44.86it/s, loss=6.11315] 
[epoch]: 12, [lr]: 0.049946:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.89602


[epoch]: 12, [lr]: 0.049946: 100%|██████████| 11888/11888 [04:30<00:00, 43.96it/s, loss=6.96922] 
[epoch]: 13, [lr]: 0.049941:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.89792


[epoch]: 13, [lr]: 0.049941: 100%|██████████| 11888/11888 [04:30<00:00, 43.87it/s, loss=6.92871] 
[epoch]: 14, [lr]: 0.049937:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.86713


[epoch]: 14, [lr]: 0.049937: 100%|██████████| 11888/11888 [04:26<00:00, 44.54it/s, loss=6.70642] 
[epoch]: 15, [lr]: 0.049932:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.85620


[epoch]: 15, [lr]: 0.049932: 100%|██████████| 11888/11888 [04:25<00:00, 44.73it/s, loss=5.73553] 
[epoch]: 16, [lr]: 0.049928:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.82959


[epoch]: 16, [lr]: 0.049928: 100%|██████████| 11888/11888 [04:25<00:00, 44.74it/s, loss=6.61011] 
[epoch]: 17, [lr]: 0.049923:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.79086


[epoch]: 17, [lr]: 0.049923: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.37003] 
[epoch]: 18, [lr]: 0.049919:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.78478


[epoch]: 18, [lr]: 0.049919: 100%|██████████| 11888/11888 [04:22<00:00, 45.21it/s, loss=6.40636] 
[epoch]: 19, [lr]: 0.049914:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.78287


[epoch]: 19, [lr]: 0.049914: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=6.49606] 
[epoch]: 20, [lr]: 0.049910:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.77629


[epoch]: 20, [lr]: 0.049910: 100%|██████████| 11888/11888 [04:22<00:00, 45.22it/s, loss=6.10880] 
[epoch]: 21, [lr]: 0.049905:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.77845


[epoch]: 21, [lr]: 0.049905: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=7.67036] 
[epoch]: 22, [lr]: 0.049901:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.74931


[epoch]: 22, [lr]: 0.049901: 100%|██████████| 11888/11888 [04:22<00:00, 45.22it/s, loss=6.45875] 
[epoch]: 23, [lr]: 0.049896:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.78605


[epoch]: 23, [lr]: 0.049896: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.61819] 
[epoch]: 24, [lr]: 0.049892:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.71238


[epoch]: 24, [lr]: 0.049892: 100%|██████████| 11888/11888 [04:22<00:00, 45.23it/s, loss=6.14727] 
[epoch]: 25, [lr]: 0.049887:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.71884


[epoch]: 25, [lr]: 0.049887: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.33621] 
[epoch]: 26, [lr]: 0.049883:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.73147


[epoch]: 26, [lr]: 0.049883: 100%|██████████| 11888/11888 [04:22<00:00, 45.23it/s, loss=6.57106] 
[epoch]: 27, [lr]: 0.049878:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.73985


[epoch]: 27, [lr]: 0.049878: 100%|██████████| 11888/11888 [04:22<00:00, 45.23it/s, loss=6.20305] 
[epoch]: 28, [lr]: 0.049874:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.71273


[epoch]: 28, [lr]: 0.049874: 100%|██████████| 11888/11888 [04:22<00:00, 45.23it/s, loss=6.88631] 
[epoch]: 29, [lr]: 0.049869:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.74471


[epoch]: 29, [lr]: 0.049869: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.31909] 
[epoch]: 30, [lr]: 0.049865:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.68758


[epoch]: 30, [lr]: 0.049865: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=6.91620] 
[epoch]: 31, [lr]: 0.049860:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.70621


[epoch]: 31, [lr]: 0.049860: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.28882] 
[epoch]: 32, [lr]: 0.049856:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.68748


[epoch]: 32, [lr]: 0.049856: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=6.23078] 
[epoch]: 33, [lr]: 0.049851:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.72705


[epoch]: 33, [lr]: 0.049851: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=5.97286] 
[epoch]: 34, [lr]: 0.049847:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.68494


[epoch]: 34, [lr]: 0.049847: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=7.03783] 
[epoch]: 35, [lr]: 0.049842:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.71231


[epoch]: 35, [lr]: 0.049842: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=7.55538] 
[epoch]: 36, [lr]: 0.049838:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.66716


[epoch]: 36, [lr]: 0.049838: 100%|██████████| 11888/11888 [04:22<00:00, 45.23it/s, loss=7.18053] 
[epoch]: 37, [lr]: 0.049833:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.68833


[epoch]: 37, [lr]: 0.049833: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=6.94825] 
[epoch]: 38, [lr]: 0.049829:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.66217


[epoch]: 38, [lr]: 0.049829: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.19923] 
[epoch]: 39, [lr]: 0.049824:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.64600


[epoch]: 39, [lr]: 0.049824: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.59910] 
[epoch]: 40, [lr]: 0.049820:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.63703


[epoch]: 40, [lr]: 0.049820: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=6.24076] 
[epoch]: 41, [lr]: 0.049815:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.65293


[epoch]: 41, [lr]: 0.049815: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=7.26626] 
[epoch]: 42, [lr]: 0.049811:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.69157


[epoch]: 42, [lr]: 0.049811: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.07544] 
[epoch]: 43, [lr]: 0.049806:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.65196


[epoch]: 43, [lr]: 0.049806: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.33453] 
[epoch]: 44, [lr]: 0.049802:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.65100


[epoch]: 44, [lr]: 0.049802: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=6.77580] 
[epoch]: 45, [lr]: 0.049797:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62956


[epoch]: 45, [lr]: 0.049797: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.28678] 
[epoch]: 46, [lr]: 0.049793:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.68610


[epoch]: 46, [lr]: 0.049793: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=5.89646] 
[epoch]: 47, [lr]: 0.049788:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.68243


[epoch]: 47, [lr]: 0.049788: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.00979] 
[epoch]: 48, [lr]: 0.049784:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59160


[epoch]: 48, [lr]: 0.049784: 100%|██████████| 11888/11888 [04:22<00:00, 45.22it/s, loss=6.78359] 
[epoch]: 49, [lr]: 0.049779:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.65457


[epoch]: 49, [lr]: 0.049779: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.12327] 
[epoch]: 50, [lr]: 0.049775:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62730


[epoch]: 50, [lr]: 0.049775: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.29937] 
[epoch]: 51, [lr]: 0.049770:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60710


[epoch]: 51, [lr]: 0.049770: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.33683] 
[epoch]: 52, [lr]: 0.049766:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61989


[epoch]: 52, [lr]: 0.049766: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.64379] 
[epoch]: 53, [lr]: 0.049761:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62627


[epoch]: 53, [lr]: 0.049761: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.08387] 
[epoch]: 54, [lr]: 0.049757:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61983


[epoch]: 54, [lr]: 0.049757: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.68194] 
[epoch]: 55, [lr]: 0.049752:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.65010


[epoch]: 55, [lr]: 0.049752: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.38764]
[epoch]: 56, [lr]: 0.049748:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57869


[epoch]: 56, [lr]: 0.049748: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.86229] 
[epoch]: 57, [lr]: 0.049743:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62427


[epoch]: 57, [lr]: 0.049743: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.11056]
[epoch]: 58, [lr]: 0.049739:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62903


[epoch]: 58, [lr]: 0.049739: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.23869] 
[epoch]: 59, [lr]: 0.049734:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59140


[epoch]: 59, [lr]: 0.049734: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.30883] 
[epoch]: 60, [lr]: 0.049730:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59700


[epoch]: 60, [lr]: 0.049730: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=5.95773] 
[epoch]: 61, [lr]: 0.049725:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62314


[epoch]: 61, [lr]: 0.049725: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=7.26837] 
[epoch]: 62, [lr]: 0.049721:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62921


[epoch]: 62, [lr]: 0.049721: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.77708] 
[epoch]: 63, [lr]: 0.049716:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62180


[epoch]: 63, [lr]: 0.049716: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.88893] 
[epoch]: 64, [lr]: 0.049712:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62360


[epoch]: 64, [lr]: 0.049712: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.83014]
[epoch]: 65, [lr]: 0.049707:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60560


[epoch]: 65, [lr]: 0.049707: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.02233] 
[epoch]: 66, [lr]: 0.049703:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56641


[epoch]: 66, [lr]: 0.049703: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.53984] 
[epoch]: 67, [lr]: 0.049698:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56871


[epoch]: 67, [lr]: 0.049698: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.01996]
[epoch]: 68, [lr]: 0.049694:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59107


[epoch]: 68, [lr]: 0.049694: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.51361] 
[epoch]: 69, [lr]: 0.049689:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55246


[epoch]: 69, [lr]: 0.049689: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.21216] 
[epoch]: 70, [lr]: 0.049685:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55431


[epoch]: 70, [lr]: 0.049685: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.93074] 
[epoch]: 71, [lr]: 0.049680:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60039


[epoch]: 71, [lr]: 0.049680: 100%|██████████| 11888/11888 [04:22<00:00, 45.24it/s, loss=6.02076] 
[epoch]: 72, [lr]: 0.049676:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58830


[epoch]: 72, [lr]: 0.049676: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.35306] 
[epoch]: 73, [lr]: 0.049671:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60231


[epoch]: 73, [lr]: 0.049671: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.61613] 
[epoch]: 74, [lr]: 0.049667:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57895


[epoch]: 74, [lr]: 0.049667: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.29550] 
[epoch]: 75, [lr]: 0.049662:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58835


[epoch]: 75, [lr]: 0.049662: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.50589] 
[epoch]: 76, [lr]: 0.049658:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58996


[epoch]: 76, [lr]: 0.049658: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=5.99470] 
[epoch]: 77, [lr]: 0.049653:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57964


[epoch]: 77, [lr]: 0.049653: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.27892] 
[epoch]: 78, [lr]: 0.049649:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60291


[epoch]: 78, [lr]: 0.049649: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.90608] 
[epoch]: 79, [lr]: 0.049644:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56514


[epoch]: 79, [lr]: 0.049644: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.46355] 
[epoch]: 80, [lr]: 0.049640:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54521


[epoch]: 80, [lr]: 0.049640: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=7.37038] 
[epoch]: 81, [lr]: 0.049635:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61836


[epoch]: 81, [lr]: 0.049635: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.02978] 
[epoch]: 82, [lr]: 0.049631:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53606


[epoch]: 82, [lr]: 0.049631: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=7.08320] 
[epoch]: 83, [lr]: 0.049626:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.63475


[epoch]: 83, [lr]: 0.049626: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.43050] 
[epoch]: 84, [lr]: 0.049622:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56822


[epoch]: 84, [lr]: 0.049622: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.47689] 
[epoch]: 85, [lr]: 0.049617:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60288


[epoch]: 85, [lr]: 0.049617: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.85845] 
[epoch]: 86, [lr]: 0.049613:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55154


[epoch]: 86, [lr]: 0.049613: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=5.85194] 
[epoch]: 87, [lr]: 0.049608:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59762


[epoch]: 87, [lr]: 0.049608: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=5.95886] 
[epoch]: 88, [lr]: 0.049604:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60914


[epoch]: 88, [lr]: 0.049604: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.10476] 
[epoch]: 89, [lr]: 0.049599:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57742


[epoch]: 89, [lr]: 0.049599: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.47902] 
[epoch]: 90, [lr]: 0.049595:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60015


[epoch]: 90, [lr]: 0.049595: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=5.93670] 
[epoch]: 91, [lr]: 0.049590:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54461


[epoch]: 91, [lr]: 0.049590: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.56984] 
[epoch]: 92, [lr]: 0.049586:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56461


[epoch]: 92, [lr]: 0.049586: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.88743] 
[epoch]: 93, [lr]: 0.049581:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54242


[epoch]: 93, [lr]: 0.049581: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=5.97647] 
[epoch]: 94, [lr]: 0.049577:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57921


[epoch]: 94, [lr]: 0.049577: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.29807] 
[epoch]: 95, [lr]: 0.049572:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57615


[epoch]: 95, [lr]: 0.049572: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.51373] 
[epoch]: 96, [lr]: 0.049568:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56838


[epoch]: 96, [lr]: 0.049568: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.20601] 
[epoch]: 97, [lr]: 0.049563:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59201


[epoch]: 97, [lr]: 0.049563: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=5.70359] 
[epoch]: 98, [lr]: 0.049559:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56085


[epoch]: 98, [lr]: 0.049559: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=5.74577] 
[epoch]: 99, [lr]: 0.049554:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60128


[epoch]: 99, [lr]: 0.049554: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.32745] 
[epoch]: 100, [lr]: 0.049550:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58369


[epoch]: 100, [lr]: 0.049550: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.86290] 
[epoch]: 101, [lr]: 0.049545:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60227


[epoch]: 101, [lr]: 0.049545: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.53597] 
[epoch]: 102, [lr]: 0.049541:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61127


[epoch]: 102, [lr]: 0.049541: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.07519] 
[epoch]: 103, [lr]: 0.049536:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54813


[epoch]: 103, [lr]: 0.049536: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.85499] 
[epoch]: 104, [lr]: 0.049532:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55291


[epoch]: 104, [lr]: 0.049532: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.20506] 
[epoch]: 105, [lr]: 0.049527:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59601


[epoch]: 105, [lr]: 0.049527: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.46210] 
[epoch]: 106, [lr]: 0.049523:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59689


[epoch]: 106, [lr]: 0.049523: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.13478]
[epoch]: 107, [lr]: 0.049518:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53963


[epoch]: 107, [lr]: 0.049518: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.52221] 
[epoch]: 108, [lr]: 0.049514:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58708


[epoch]: 108, [lr]: 0.049514: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.64545] 
[epoch]: 109, [lr]: 0.049509:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58857


[epoch]: 109, [lr]: 0.049509: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.50882] 
[epoch]: 110, [lr]: 0.049505:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57699


[epoch]: 110, [lr]: 0.049505: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=5.83920] 
[epoch]: 111, [lr]: 0.049500:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55683


[epoch]: 111, [lr]: 0.049500: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.91801] 
[epoch]: 112, [lr]: 0.049496:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54333


[epoch]: 112, [lr]: 0.049496: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.53269] 
[epoch]: 113, [lr]: 0.049491:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60439


[epoch]: 113, [lr]: 0.049491: 100%|██████████| 11888/11888 [04:22<00:00, 45.26it/s, loss=6.75549] 
[epoch]: 114, [lr]: 0.049487:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55222


[epoch]: 114, [lr]: 0.049487: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.35106] 
[epoch]: 115, [lr]: 0.049482:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58956


[epoch]: 115, [lr]: 0.049482: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.15899] 
[epoch]: 116, [lr]: 0.049478:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56657


[epoch]: 116, [lr]: 0.049478: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=5.85951] 
[epoch]: 117, [lr]: 0.049473:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56032


[epoch]: 117, [lr]: 0.049473: 100%|██████████| 11888/11888 [04:22<00:00, 45.25it/s, loss=5.92071] 
[epoch]: 118, [lr]: 0.049469:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57414


[epoch]: 118, [lr]: 0.049469: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.92325] 
[epoch]: 119, [lr]: 0.049464:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57327


[epoch]: 119, [lr]: 0.049464: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.18809]
[epoch]: 120, [lr]: 0.049460:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55016


[epoch]: 120, [lr]: 0.049460: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.38278] 
[epoch]: 121, [lr]: 0.049455:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55378


[epoch]: 121, [lr]: 0.049455: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.18168] 
[epoch]: 122, [lr]: 0.049451:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57669


[epoch]: 122, [lr]: 0.049451: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=5.97655] 
[epoch]: 123, [lr]: 0.049446:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57990


[epoch]: 123, [lr]: 0.049446: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.06906] 
[epoch]: 124, [lr]: 0.049442:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55519


[epoch]: 124, [lr]: 0.049442: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.28892] 
[epoch]: 125, [lr]: 0.049437:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57199


[epoch]: 125, [lr]: 0.049437: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.38904] 
[epoch]: 126, [lr]: 0.049433:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54973


[epoch]: 126, [lr]: 0.049433: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=5.79772]
[epoch]: 127, [lr]: 0.049428:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.51589


[epoch]: 127, [lr]: 0.049428: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.94496] 
[epoch]: 128, [lr]: 0.049424:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59871


[epoch]: 128, [lr]: 0.049424: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.38510] 
[epoch]: 129, [lr]: 0.049419:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54273


[epoch]: 129, [lr]: 0.049419: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.10695] 
[epoch]: 130, [lr]: 0.049415:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56846


[epoch]: 130, [lr]: 0.049415: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.76866] 
[epoch]: 131, [lr]: 0.049410:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57301


[epoch]: 131, [lr]: 0.049410: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=6.44134] 
[epoch]: 132, [lr]: 0.049406:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52549


[epoch]: 132, [lr]: 0.049406: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=5.82385] 
[epoch]: 133, [lr]: 0.049401:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56665


[epoch]: 133, [lr]: 0.049401: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.38660] 
[epoch]: 134, [lr]: 0.049397:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56416


[epoch]: 134, [lr]: 0.049397: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.66995] 
[epoch]: 135, [lr]: 0.049392:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55056


[epoch]: 135, [lr]: 0.049392: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.60943] 
[epoch]: 136, [lr]: 0.049388:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57451


[epoch]: 136, [lr]: 0.049388: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=5.62045] 
[epoch]: 137, [lr]: 0.049383:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57583


[epoch]: 137, [lr]: 0.049383: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.17672] 
[epoch]: 138, [lr]: 0.049379:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55965


[epoch]: 138, [lr]: 0.049379: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=5.86171] 
[epoch]: 139, [lr]: 0.049374:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55936


[epoch]: 139, [lr]: 0.049374: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.91098] 
[epoch]: 140, [lr]: 0.049370:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54614


[epoch]: 140, [lr]: 0.049370: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.72077] 
[epoch]: 141, [lr]: 0.049365:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58683


[epoch]: 141, [lr]: 0.049365: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=7.02446]
[epoch]: 142, [lr]: 0.049361:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57760


[epoch]: 142, [lr]: 0.049361: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.46538] 
[epoch]: 143, [lr]: 0.049356:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55262


[epoch]: 143, [lr]: 0.049356: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=5.70193] 
[epoch]: 144, [lr]: 0.049352:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57573


[epoch]: 144, [lr]: 0.049352: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.32255] 
[epoch]: 145, [lr]: 0.049347:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58147


[epoch]: 145, [lr]: 0.049347: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.07694] 
[epoch]: 146, [lr]: 0.049343:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57570


[epoch]: 146, [lr]: 0.049343: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.17617] 
[epoch]: 147, [lr]: 0.049338:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58949


[epoch]: 147, [lr]: 0.049338: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=5.33523] 
[epoch]: 148, [lr]: 0.049334:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56850


[epoch]: 148, [lr]: 0.049334: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=5.68584] 
[epoch]: 149, [lr]: 0.049329:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53808


[epoch]: 149, [lr]: 0.049329: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.02473] 
[epoch]: 150, [lr]: 0.049324:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55622


[epoch]: 150, [lr]: 0.049324: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.95108] 
[epoch]: 151, [lr]: 0.049320:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54524


[epoch]: 151, [lr]: 0.049320: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.95672] 
[epoch]: 152, [lr]: 0.049315:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55412


[epoch]: 152, [lr]: 0.049315: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.73197] 
[epoch]: 153, [lr]: 0.049311:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54806


[epoch]: 153, [lr]: 0.049311: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=5.68546] 
[epoch]: 154, [lr]: 0.049306:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55732


[epoch]: 154, [lr]: 0.049306: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=5.72044] 
[epoch]: 155, [lr]: 0.049302:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57806


[epoch]: 155, [lr]: 0.049302: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.30135] 
[epoch]: 156, [lr]: 0.049297:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53455


[epoch]: 156, [lr]: 0.049297: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.74074] 
[epoch]: 157, [lr]: 0.049293:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52061


[epoch]: 157, [lr]: 0.049293: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.71761] 
[epoch]: 158, [lr]: 0.049288:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55431


[epoch]: 158, [lr]: 0.049288: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.60884]
[epoch]: 159, [lr]: 0.049284:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53134


[epoch]: 159, [lr]: 0.049284: 100%|██████████| 11888/11888 [04:21<00:00, 45.37it/s, loss=5.86867] 
[epoch]: 160, [lr]: 0.049279:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.50416


[epoch]: 160, [lr]: 0.049279: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.98843] 
[epoch]: 161, [lr]: 0.049275:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59406


[epoch]: 161, [lr]: 0.049275: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.93400] 
[epoch]: 162, [lr]: 0.049270:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56142


[epoch]: 162, [lr]: 0.049270: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=6.01094] 
[epoch]: 163, [lr]: 0.049266:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54764


[epoch]: 163, [lr]: 0.049266: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.73531] 
[epoch]: 164, [lr]: 0.049261:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57290


[epoch]: 164, [lr]: 0.049261: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.96161] 
[epoch]: 165, [lr]: 0.049257:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54194


[epoch]: 165, [lr]: 0.049257: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.59240] 
[epoch]: 166, [lr]: 0.049252:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57108


[epoch]: 166, [lr]: 0.049252: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=5.27607] 
[epoch]: 167, [lr]: 0.049248:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53202


[epoch]: 167, [lr]: 0.049248: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.27470] 
[epoch]: 168, [lr]: 0.049243:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54273


[epoch]: 168, [lr]: 0.049243: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.89312] 
[epoch]: 169, [lr]: 0.049239:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54784


[epoch]: 169, [lr]: 0.049239: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.11050] 
[epoch]: 170, [lr]: 0.049234:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54002


[epoch]: 170, [lr]: 0.049234: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.12994] 
[epoch]: 171, [lr]: 0.049230:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61804


[epoch]: 171, [lr]: 0.049230: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.15023] 
[epoch]: 172, [lr]: 0.049225:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55359


[epoch]: 172, [lr]: 0.049225: 100%|██████████| 11888/11888 [04:22<00:00, 45.36it/s, loss=5.79835] 
[epoch]: 173, [lr]: 0.049221:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57888


[epoch]: 173, [lr]: 0.049221: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.41308] 
[epoch]: 174, [lr]: 0.049216:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57177


[epoch]: 174, [lr]: 0.049216: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.53299]
[epoch]: 175, [lr]: 0.049212:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55731


[epoch]: 175, [lr]: 0.049212: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=5.82640] 
[epoch]: 176, [lr]: 0.049207:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56548


[epoch]: 176, [lr]: 0.049207: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.75585] 
[epoch]: 177, [lr]: 0.049203:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57268


[epoch]: 177, [lr]: 0.049203: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.59033] 
[epoch]: 178, [lr]: 0.049198:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.51062


[epoch]: 178, [lr]: 0.049198: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.60514] 
[epoch]: 179, [lr]: 0.049194:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.49886


[epoch]: 179, [lr]: 0.049194: 100%|██████████| 11888/11888 [04:22<00:00, 45.36it/s, loss=5.43416] 
[epoch]: 180, [lr]: 0.049189:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52756


[epoch]: 180, [lr]: 0.049189: 100%|██████████| 11888/11888 [04:22<00:00, 45.27it/s, loss=6.26252] 
[epoch]: 181, [lr]: 0.049185:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.59482


[epoch]: 181, [lr]: 0.049185: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.78364] 
[epoch]: 182, [lr]: 0.049180:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57624


[epoch]: 182, [lr]: 0.049180: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=6.29573] 
[epoch]: 183, [lr]: 0.049176:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56162


[epoch]: 183, [lr]: 0.049176: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.10991] 
[epoch]: 184, [lr]: 0.049171:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61568


[epoch]: 184, [lr]: 0.049171: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=6.02763] 
[epoch]: 185, [lr]: 0.049167:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55625


[epoch]: 185, [lr]: 0.049167: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.06220] 
[epoch]: 186, [lr]: 0.049162:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52236


[epoch]: 186, [lr]: 0.049162: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.34381]
[epoch]: 187, [lr]: 0.049158:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54478


[epoch]: 187, [lr]: 0.049158: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=5.85447] 
[epoch]: 188, [lr]: 0.049153:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52504


[epoch]: 188, [lr]: 0.049153: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.69340] 
[epoch]: 189, [lr]: 0.049149:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55588


[epoch]: 189, [lr]: 0.049149: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=5.94954] 
[epoch]: 190, [lr]: 0.049144:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60309


[epoch]: 190, [lr]: 0.049144: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.06613] 
[epoch]: 191, [lr]: 0.049140:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53453


[epoch]: 191, [lr]: 0.049140: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.65979]
[epoch]: 192, [lr]: 0.049135:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53555


[epoch]: 192, [lr]: 0.049135: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.98796] 
[epoch]: 193, [lr]: 0.049131:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55863


[epoch]: 193, [lr]: 0.049131: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.46637] 
[epoch]: 194, [lr]: 0.049126:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55008


[epoch]: 194, [lr]: 0.049126: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=6.66124] 
[epoch]: 195, [lr]: 0.049122:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.62787


[epoch]: 195, [lr]: 0.049122: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=5.85298] 
[epoch]: 196, [lr]: 0.049117:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54865


[epoch]: 196, [lr]: 0.049117: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=5.83818] 
[epoch]: 197, [lr]: 0.049113:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56904


[epoch]: 197, [lr]: 0.049113: 100%|██████████| 11888/11888 [04:22<00:00, 45.30it/s, loss=5.97364] 
[epoch]: 198, [lr]: 0.049108:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56552


[epoch]: 198, [lr]: 0.049108: 100%|██████████| 11888/11888 [04:22<00:00, 45.28it/s, loss=6.39781] 
[epoch]: 199, [lr]: 0.049104:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.60201


[epoch]: 199, [lr]: 0.049104: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.63535] 
[epoch]: 200, [lr]: 0.049099:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54227


[epoch]: 200, [lr]: 0.049099: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.28526] 
[epoch]: 201, [lr]: 0.049095:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55661


[epoch]: 201, [lr]: 0.049095: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.02672] 
[epoch]: 202, [lr]: 0.049090:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55438


[epoch]: 202, [lr]: 0.049090: 100%|██████████| 11888/11888 [04:22<00:00, 45.29it/s, loss=6.01726] 
[epoch]: 203, [lr]: 0.049086:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58287


[epoch]: 203, [lr]: 0.049086: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.34441] 
[epoch]: 204, [lr]: 0.049081:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.51527


[epoch]: 204, [lr]: 0.049081: 100%|██████████| 11888/11888 [04:22<00:00, 45.31it/s, loss=6.21427] 
[epoch]: 205, [lr]: 0.049077:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52132


[epoch]: 205, [lr]: 0.049077: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.57981] 
[epoch]: 206, [lr]: 0.049072:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58859


[epoch]: 206, [lr]: 0.049072: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.03378] 
[epoch]: 207, [lr]: 0.049068:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55046


[epoch]: 207, [lr]: 0.049068: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=6.15142] 
[epoch]: 208, [lr]: 0.049063:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.57070


[epoch]: 208, [lr]: 0.049063: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.51509] 
[epoch]: 209, [lr]: 0.049059:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55385


[epoch]: 209, [lr]: 0.049059: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=6.25464] 
[epoch]: 210, [lr]: 0.049054:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54178


[epoch]: 210, [lr]: 0.049054: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.54338] 
[epoch]: 211, [lr]: 0.049049:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.51601


[epoch]: 211, [lr]: 0.049049: 100%|██████████| 11888/11888 [04:22<00:00, 45.32it/s, loss=5.92918] 
[epoch]: 212, [lr]: 0.049045:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56245


[epoch]: 212, [lr]: 0.049045: 100%|██████████| 11888/11888 [04:22<00:00, 45.36it/s, loss=6.07116]
[epoch]: 213, [lr]: 0.049040:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.52412


[epoch]: 213, [lr]: 0.049040: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.15161] 
[epoch]: 214, [lr]: 0.049036:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.53486


[epoch]: 214, [lr]: 0.049036: 100%|██████████| 11888/11888 [04:22<00:00, 45.34it/s, loss=6.04214] 
[epoch]: 215, [lr]: 0.049031:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.56285


[epoch]: 215, [lr]: 0.049031: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.36380] 
[epoch]: 216, [lr]: 0.049027:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.54969


[epoch]: 216, [lr]: 0.049027: 100%|██████████| 11888/11888 [04:22<00:00, 45.36it/s, loss=5.64940] 
[epoch]: 217, [lr]: 0.049022:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55105


[epoch]: 217, [lr]: 0.049022: 100%|██████████| 11888/11888 [04:22<00:00, 45.33it/s, loss=6.43960] 
[epoch]: 218, [lr]: 0.049018:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.61193


[epoch]: 218, [lr]: 0.049018: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=5.93199] 
[epoch]: 219, [lr]: 0.049013:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.55835


[epoch]: 219, [lr]: 0.049013: 100%|██████████| 11888/11888 [04:22<00:00, 45.36it/s, loss=6.71556] 
[epoch]: 220, [lr]: 0.049009:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.49478


[epoch]: 220, [lr]: 0.049009: 100%|██████████| 11888/11888 [04:22<00:00, 45.35it/s, loss=6.03030] 
[epoch]: 221, [lr]: 0.049004:   0%|          | 0/11888 [00:00<?, ?it/s]

[Info]: Loss for train: 6.58070


[epoch]: 221, [lr]: 0.049004:  92%|█████████▏| 10912/11888 [04:00<00:21, 45.04it/s, loss=6.36852]

KeyboardInterrupt: 

In [None]:
?SummaryWriter