In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import torchvision
import pandas as pd
from prunable_modules import MaskedLinearLayer, MaskedConvLayer, ResBlock

import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

if torch.cuda.is_available() == True:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

print(device)

cuda:0


In [2]:
def load_imagenette(BS):
    train_transforms = torchvision.transforms.Compose([
        torchvision.transforms.ColorJitter(hue=.05, saturation=.05),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.RandomRotation(20),
        torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    val_transforms = torchvision.transforms.Compose([
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


    train_path='/home/florian/data/imagenette2/train'
    imagenette_train = torchvision.datasets.ImageFolder(
        root=train_path,
        transform=train_transforms
    )
    val_path='/home/florian/data/imagenette2/val'
    imagenette_val = torchvision.datasets.ImageFolder(
        root=val_path,
        transform=val_transforms
    )

    train_loader = torch.utils.data.DataLoader(imagenette_train,
                                              batch_size=BS,
                                              shuffle=True)
    val_loader = torch.utils.data.DataLoader(imagenette_val,
                                              batch_size=BS,
                                              shuffle=True)
    return train_loader, val_loader

In [3]:
train_loader, val_loader = load_imagenette(4)

In [4]:
class ImageNetResNet(nn.Module):
    def __init__(self):
        super(ImageNetResNet, self).__init__()
        self.c1 = MaskedConvLayer((64, 3, 7, 7), padding=(3,3),bias=False, stride=2)
        self.p1 = nn.MaxPool2d((3,3), stride=(2,2), padding=(1))
        self.r1 = ResBlock(64,64,padding=1)
        self.r2 = ResBlock(64,64,padding=1)
        self.r3 = ResBlock(64,128,stride=2,padding=1)
        self.r4 = ResBlock(128,128,padding=1)
        self.r5 = ResBlock(128,256,stride=2,padding=1)
        self.r6 = ResBlock(256,256,padding=1)
        self.r7 = ResBlock(256,512,stride=2,padding=1)
        self.r8 = ResBlock(512,512,padding=1)
        self.p2 = nn.AvgPool2d(7)
        self.d1 = MaskedLinearLayer((512,10), activation=None)
        self.conv_weights, self.conv_masks, self.fully_connected_weights, self.fully_connected_masks = self.identify_layers()
        self.train_stats = pd.DataFrame(columns=('epoch', 'train_loss', 'train_accuracy', 'validation_loss', 'validation_accuracy', 'duration', 'criterion', 'optimizer', 'method', 'learning_rate', 'batchsize'))
    
    def forward(self, inputs):
        x = self.c1(inputs)
        x = self.p1(x)
        x = self.r1(x)
        x = self.r2(x)
        x = self.r3(x)
        x = self.r4(x)
        x = self.r5(x)
        x = self.r6(x)
        x = self.r7(x)
        x = self.r8(x)
        x = self.p2(x)
        x = x.view(x.shape[0], x.shape[1])
        x = self.d1(x)
        return (x)
    
    def identify_layers(self):
        conv_weights, conv_masks, fully_connected_weights, fully_connected_masks = [], [], [], []
        for key in self.state_dict().keys():
            if key[:1] == 'c' and 'weights' in key or key[:1] == 'r' and key[3] == 'c' and 'weights' in key:
                conv_weights.append(key)
            if key[:1] == 'c' and 'mask' in key or key[:1] == 'r' and key[3] == 'c' and 'mask' in key:
                conv_masks.append(key)
            if key[:2] == 'fc' and 'weights' in key:
                fully_connected_weights.append(key)
            if key[:2] == 'fc' and 'mask' in key:
                fully_connected_masks.append(key)
        print(conv_masks)
        return conv_weights, conv_masks, fully_connected_weights, fully_connected_masks


    def prune_magnitude_local_unstruct(self, ratio, device, scope='layer'):
        def prune_conv_layers_layerwise(self, ratio, params, device):
            for i, _ in enumerate(self.conv_weights):
                weight_layer = params[self.conv_weights[i]]
                mask_layer = params[self.conv_masks[i]]
                shape = weight_layer.shape
                flat_weights = weight_layer.view(-1)
                flat_masks = mask_layer.view(-1)
                no_of_weights_to_prune = int(round(ratio * len(flat_weights)))
                indices_to_delete = torch.argsort(torch.abs(flat_weights))[:no_of_weights_to_prune]
                for idx_to_delete in indices_to_delete:
                    flat_masks[idx_to_delete] = 0
                    flat_weights[idx_to_delete] = 0
                params[self.conv_weights[i]] = flat_weights.view(shape)
                params[self.conv_masks[i]] = flat_masks.view(shape)
            return params

        def prune_conv_layers_filterwise(self, ratio, params, device):
            for i, _ in enumerate(self.conv_weights):
                weight_layer = params[self.conv_weights[i]]
                mask_layer = params[self.conv_masks[i]]
                for j,_ in enumerate(weight_layer):
                    filt = weight_layer[j]
                    filter_mask = mask_layer[j]
                    shape = weight_layer[j].shape
                    flat_weights = filt.view(-1)
                    flat_mask = filter_mask.view(-1)
                    no_of_weights_to_prune = int(round(ratio * len(filt)))
                    indices_to_delete = torch.argsort(torch.abs(filt))[:no_of_weights_to_prune]
                    for idx_to_delete in indices_to_delete:
                        flat_mask[idx_to_delete] = 0
                        flat_weights[idx_to_delete] = 0

                    weight_layer[j] = flat_weights.view(shape)
                    mask_layer[j] = flat_mask.view(shape)

                params[self.conv_weights[i]] = weight_layer.view(shape)
                params[self.conv_masks[i]] = mask_layer.view(shape)
            return params

        def prune_dense_layers(self, ratio, params, device):
            for i, _ in enumerate(self.fully_connected_weights):
                weight_layer = params[self.fully_connected_weights[i]]
                mask_layer = params[self.fully_connected_masks[i]]
                flat_weights = weight_layer.view(-1)
                flat_mask = mask_layer.view(-1)
                shape = weight_layer.shape

                no_of_weights_to_prune = int(round(len(flat_weights)*ratio))
                indices_to_delete = torch.argsort(torch.abs(flat_weights))[:no_of_weights_to_prune]
                for idx_to_delete in indices_to_delete:
                    flat_mask[idx_to_delete] = 0
                    flat_weights[idx_to_delete] = 0
                params[self.fully_connected_weights[i]] = flat_weights.view(shape)
                params[self.fully_connected_masks[i]] = flat_mask.view(shape)
            return params

        params = self.state_dict()
        if scope == 'layer':
            params = prune_conv_layers_layerwise(self,ratio, params, device)
        if scope == 'filter':
            params = prune_conv_layers_filterwise(self,ratio, params, device)
        if scope != 'filter' and scope != 'layer':
            raise Exception('scope should be one of "layer" and "filter"')
        params = prune_dense_layers(self,ratio, params, device)
        self.load_state_dict(params)
        return params

In [5]:
model = ImageNetResNet().to(device)

['c1.mask', 'r1.c1.mask', 'r1.c2.mask', 'r2.c1.mask', 'r2.c2.mask', 'r3.c1.mask', 'r3.c2.mask', 'r3.c3.mask', 'r4.c1.mask', 'r4.c2.mask', 'r5.c1.mask', 'r5.c2.mask', 'r5.c3.mask', 'r6.c1.mask', 'r6.c2.mask', 'r7.c1.mask', 'r7.c2.mask', 'r7.c3.mask', 'r8.c1.mask', 'r8.c2.mask']


In [6]:
model.conv_masks

['c1.mask',
 'r1.c1.mask',
 'r1.c2.mask',
 'r2.c1.mask',
 'r2.c2.mask',
 'r3.c1.mask',
 'r3.c2.mask',
 'r3.c3.mask',
 'r4.c1.mask',
 'r4.c2.mask',
 'r5.c1.mask',
 'r5.c2.mask',
 'r5.c3.mask',
 'r6.c1.mask',
 'r6.c2.mask',
 'r7.c1.mask',
 'r7.c2.mask',
 'r7.c3.mask',
 'r8.c1.mask',
 'r8.c2.mask']

In [7]:
def _evaluate_model(model, data_loader, device, criterion):
    correct = 0
    total = 0
    acc_loss = 0.0
    avg_loss = 0.0
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(data_loader):
            #print(i)
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            if criterion != None:
                loss = criterion(outputs, labels)
                acc_loss += loss.item() 
                avg_loss = acc_loss / (i+1)
            #print(outputs)
            _, predicted = torch.max(outputs.data, 1)
            #print(predicted)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    model.train()
    return accuracy, avg_loss

def fit(model, train_loader, val_loader, epochs, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    total_time = 0
    epochs_trained = 0
    train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist = [], [], [], []
    for epoch in range(epochs):  # loop over the dataset multiple times
        t0 = time.time()
        acc_epoch_loss, avg_epoch_loss, epoch_accuracy, acc_epoch_accuracy = 0.0, 0.0, 0.0, 0.0
        
        
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            batchsize = labels.size(0)
            correct = (predicted == labels).sum().item()
            accuracy = 100 * correct / batchsize
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

                # print statistics
            acc_epoch_loss += loss.item() 
            avg_epoch_loss = acc_epoch_loss / (i+1)
            acc_epoch_accuracy += accuracy
            avg_epoch_accuracy = acc_epoch_accuracy / (i+1)
            if i%10 == 0:
                print('[%d, %5d] loss: %.5f, train_accuracy: %.2f' %(epoch + 1, i + 1, loss.item(), accuracy))
        t1 = time.time()
        total_time += t1 - t0
        accuracy, loss = _evaluate_model(model, val_loader, device, criterion)
        #print('duration:', t1-t0,'- train loss: ',avg_epoch_loss,' - train accuracy: ',avg_epoch_accuracy,' - validation accuracy: ', accuracy,' - validation loss: ', loss)
        print('duration: %d s - train loss: %.5f - train accuracy: %.2f - validation loss: %.2f - validation accuracy: %.2f ' %(t1-t0, avg_epoch_loss, avg_epoch_accuracy, loss, accuracy))
        train_loss_hist.append(avg_epoch_loss)
        train_acc_hist.append(avg_epoch_accuracy)
        val_loss_hist.append(loss)
        val_acc_hist.append(accuracy)
        data = {
            'epoch': epoch+1,
            'train_loss':avg_epoch_loss, 
            'train_accuracy':avg_epoch_accuracy,
            'validation_loss':loss,
            'validation_accuracy':accuracy,
            'duration':total_time,
            'criterion':criterion,
            'optimizer':optimizer,
            'method': 'standard',
            'batchsize': len(next(iter(train_loader))[1])
        }
        
    model.train_stats = model.train_stats.append(data, ignore_index=True)
    print('Finished Training')
    
    return model.train_stats

In [None]:
fit(model, train_loader, val_loader, 100, device)

[1,     1] loss: 4.12376, train_accuracy: 0.00
[1,    11] loss: 15.62767, train_accuracy: 25.00
[1,    21] loss: 39.67447, train_accuracy: 0.00
[1,    31] loss: 4.23959, train_accuracy: 50.00
[1,    41] loss: 17.00320, train_accuracy: 25.00
[1,    51] loss: 8.30997, train_accuracy: 25.00
[1,    61] loss: 5.45977, train_accuracy: 50.00
[1,    71] loss: 9.59836, train_accuracy: 0.00
[1,    81] loss: 7.96904, train_accuracy: 0.00
[1,    91] loss: 18.74900, train_accuracy: 0.00
[1,   101] loss: 3.24320, train_accuracy: 0.00
[1,   111] loss: 7.06145, train_accuracy: 50.00
[1,   121] loss: 15.36932, train_accuracy: 0.00
[1,   131] loss: 4.37726, train_accuracy: 0.00
[1,   141] loss: 5.24490, train_accuracy: 0.00
[1,   151] loss: 4.12448, train_accuracy: 25.00
[1,   161] loss: 3.55426, train_accuracy: 0.00
[1,   171] loss: 4.13177, train_accuracy: 0.00
[1,   181] loss: 2.73530, train_accuracy: 0.00
[1,   191] loss: 3.20715, train_accuracy: 25.00
[1,   201] loss: 2.31165, train_accuracy: 0.00


[1,  1731] loss: 2.54098, train_accuracy: 25.00
[1,  1741] loss: 2.44580, train_accuracy: 0.00
[1,  1751] loss: 1.66224, train_accuracy: 50.00
[1,  1761] loss: 2.57576, train_accuracy: 0.00
[1,  1771] loss: 2.43079, train_accuracy: 25.00
[1,  1781] loss: 2.01007, train_accuracy: 50.00
[1,  1791] loss: 2.90962, train_accuracy: 0.00
[1,  1801] loss: 2.44271, train_accuracy: 0.00
[1,  1811] loss: 1.98937, train_accuracy: 25.00
[1,  1821] loss: 2.52596, train_accuracy: 0.00
[1,  1831] loss: 2.39386, train_accuracy: 25.00
[1,  1841] loss: 2.80871, train_accuracy: 0.00
[1,  1851] loss: 1.81144, train_accuracy: 25.00
[1,  1861] loss: 1.76412, train_accuracy: 50.00
[1,  1871] loss: 2.04619, train_accuracy: 25.00
[1,  1881] loss: 2.03544, train_accuracy: 50.00
[1,  1891] loss: 2.19703, train_accuracy: 0.00
[1,  1901] loss: 1.19845, train_accuracy: 75.00
[1,  1911] loss: 1.88800, train_accuracy: 25.00
[1,  1921] loss: 2.84360, train_accuracy: 0.00
[1,  1931] loss: 2.37020, train_accuracy: 0.00
[

[2,  1061] loss: 1.91623, train_accuracy: 50.00
[2,  1071] loss: 2.70406, train_accuracy: 0.00
[2,  1081] loss: 2.24242, train_accuracy: 25.00
[2,  1091] loss: 1.91012, train_accuracy: 50.00
[2,  1101] loss: 2.16532, train_accuracy: 25.00
[2,  1111] loss: 2.11285, train_accuracy: 0.00
[2,  1121] loss: 1.92495, train_accuracy: 25.00
[2,  1131] loss: 2.13520, train_accuracy: 25.00
[2,  1141] loss: 2.04000, train_accuracy: 25.00
[2,  1151] loss: 2.35257, train_accuracy: 25.00
[2,  1161] loss: 2.38391, train_accuracy: 25.00
[2,  1171] loss: 2.17807, train_accuracy: 25.00
[2,  1181] loss: 2.18248, train_accuracy: 50.00
[2,  1191] loss: 1.87423, train_accuracy: 25.00
[2,  1201] loss: 1.56311, train_accuracy: 25.00
[2,  1211] loss: 1.87362, train_accuracy: 25.00
[2,  1221] loss: 1.93858, train_accuracy: 25.00
[2,  1231] loss: 1.82313, train_accuracy: 50.00
[2,  1241] loss: 1.49416, train_accuracy: 50.00
[2,  1251] loss: 2.52058, train_accuracy: 25.00
[2,  1261] loss: 1.67767, train_accuracy: 

[3,   391] loss: 2.17783, train_accuracy: 25.00
[3,   401] loss: 1.79315, train_accuracy: 50.00
[3,   411] loss: 2.16403, train_accuracy: 50.00
[3,   421] loss: 1.71988, train_accuracy: 0.00
[3,   431] loss: 1.66856, train_accuracy: 50.00
[3,   441] loss: 2.55641, train_accuracy: 0.00
[3,   451] loss: 2.65008, train_accuracy: 0.00
[3,   461] loss: 1.87365, train_accuracy: 25.00
[3,   471] loss: 2.11637, train_accuracy: 25.00
[3,   481] loss: 1.73038, train_accuracy: 50.00
[3,   491] loss: 1.96851, train_accuracy: 50.00
[3,   501] loss: 2.33217, train_accuracy: 0.00
[3,   511] loss: 2.01887, train_accuracy: 25.00
[3,   521] loss: 1.98082, train_accuracy: 25.00
[3,   531] loss: 2.37121, train_accuracy: 25.00
[3,   541] loss: 1.97533, train_accuracy: 50.00
[3,   551] loss: 2.37359, train_accuracy: 25.00
[3,   561] loss: 2.49579, train_accuracy: 0.00
[3,   571] loss: 1.62357, train_accuracy: 50.00
[3,   581] loss: 1.92344, train_accuracy: 50.00
[3,   591] loss: 2.12815, train_accuracy: 0.0

[3,  2111] loss: 1.50578, train_accuracy: 50.00
[3,  2121] loss: 1.26499, train_accuracy: 75.00
[3,  2131] loss: 1.89153, train_accuracy: 50.00
[3,  2141] loss: 2.45731, train_accuracy: 50.00
[3,  2151] loss: 1.66221, train_accuracy: 25.00
[3,  2161] loss: 1.76801, train_accuracy: 75.00
[3,  2171] loss: 2.34832, train_accuracy: 25.00
[3,  2181] loss: 2.31950, train_accuracy: 0.00
[3,  2191] loss: 1.32651, train_accuracy: 50.00
[3,  2201] loss: 1.83860, train_accuracy: 25.00
[3,  2211] loss: 1.88450, train_accuracy: 25.00
[3,  2221] loss: 2.99585, train_accuracy: 0.00
[3,  2231] loss: 1.74416, train_accuracy: 25.00
[3,  2241] loss: 1.70675, train_accuracy: 25.00
[3,  2251] loss: 2.17605, train_accuracy: 50.00
[3,  2261] loss: 1.94428, train_accuracy: 25.00
[3,  2271] loss: 1.48613, train_accuracy: 50.00
[3,  2281] loss: 1.55180, train_accuracy: 25.00
[3,  2291] loss: 2.00865, train_accuracy: 50.00
[3,  2301] loss: 1.40871, train_accuracy: 50.00
[3,  2311] loss: 1.96187, train_accuracy: 

[4,  1441] loss: 1.84077, train_accuracy: 50.00
[4,  1451] loss: 1.73508, train_accuracy: 75.00
[4,  1461] loss: 1.82680, train_accuracy: 50.00
[4,  1471] loss: 1.27516, train_accuracy: 50.00
[4,  1481] loss: 2.64224, train_accuracy: 25.00
[4,  1491] loss: 1.88392, train_accuracy: 25.00
[4,  1501] loss: 1.84301, train_accuracy: 25.00
[4,  1511] loss: 2.19664, train_accuracy: 0.00
[4,  1521] loss: 1.90953, train_accuracy: 50.00
[4,  1531] loss: 2.19487, train_accuracy: 25.00
[4,  1541] loss: 2.20263, train_accuracy: 25.00
[4,  1551] loss: 1.98188, train_accuracy: 25.00
[4,  1561] loss: 1.29748, train_accuracy: 75.00
[4,  1571] loss: 1.37238, train_accuracy: 75.00
[4,  1581] loss: 2.13812, train_accuracy: 25.00
[4,  1591] loss: 0.95780, train_accuracy: 75.00
[4,  1601] loss: 2.35255, train_accuracy: 25.00
[4,  1611] loss: 1.99707, train_accuracy: 50.00
[4,  1621] loss: 1.23085, train_accuracy: 75.00
[4,  1631] loss: 1.84934, train_accuracy: 50.00
[4,  1641] loss: 2.22637, train_accuracy:

[5,   761] loss: 1.02555, train_accuracy: 50.00
[5,   771] loss: 1.12982, train_accuracy: 75.00
[5,   781] loss: 1.72223, train_accuracy: 75.00
[5,   791] loss: 3.26432, train_accuracy: 25.00
[5,   801] loss: 2.15339, train_accuracy: 0.00
[5,   811] loss: 1.32239, train_accuracy: 25.00
[5,   821] loss: 2.16683, train_accuracy: 25.00
[5,   831] loss: 1.82568, train_accuracy: 50.00
[5,   841] loss: 1.59973, train_accuracy: 25.00
[5,   851] loss: 2.04471, train_accuracy: 25.00
[5,   861] loss: 2.31625, train_accuracy: 25.00
[5,   871] loss: 1.27672, train_accuracy: 75.00
[5,   881] loss: 2.61873, train_accuracy: 25.00
[5,   891] loss: 1.48174, train_accuracy: 50.00
[5,   901] loss: 1.78355, train_accuracy: 75.00
[5,   911] loss: 2.53572, train_accuracy: 25.00
[5,   921] loss: 1.57634, train_accuracy: 75.00
[5,   931] loss: 3.12279, train_accuracy: 0.00
[5,   941] loss: 2.33070, train_accuracy: 0.00
[5,   951] loss: 1.73964, train_accuracy: 50.00
[5,   961] loss: 1.57788, train_accuracy: 5

[6,    81] loss: 1.24914, train_accuracy: 75.00
[6,    91] loss: 1.84048, train_accuracy: 25.00
[6,   101] loss: 1.75762, train_accuracy: 50.00
[6,   111] loss: 1.04557, train_accuracy: 100.00
[6,   121] loss: 1.80461, train_accuracy: 25.00
[6,   131] loss: 1.73609, train_accuracy: 25.00
[6,   141] loss: 1.43350, train_accuracy: 75.00
[6,   151] loss: 2.22312, train_accuracy: 25.00
[6,   161] loss: 2.06788, train_accuracy: 0.00
[6,   171] loss: 1.79012, train_accuracy: 50.00
[6,   181] loss: 1.99684, train_accuracy: 25.00
[6,   191] loss: 1.35032, train_accuracy: 50.00
[6,   201] loss: 2.15294, train_accuracy: 0.00
[6,   211] loss: 1.50345, train_accuracy: 25.00
[6,   221] loss: 1.57666, train_accuracy: 50.00
[6,   231] loss: 1.14799, train_accuracy: 75.00
[6,   241] loss: 1.01306, train_accuracy: 75.00
[6,   251] loss: 2.65979, train_accuracy: 0.00
[6,   261] loss: 1.87746, train_accuracy: 50.00
[6,   271] loss: 1.93978, train_accuracy: 75.00
[6,   281] loss: 1.92062, train_accuracy: 

[6,  1801] loss: 2.89368, train_accuracy: 0.00
[6,  1811] loss: 1.72175, train_accuracy: 25.00
[6,  1821] loss: 2.23784, train_accuracy: 0.00
[6,  1831] loss: 1.39141, train_accuracy: 50.00
[6,  1841] loss: 2.66924, train_accuracy: 25.00
[6,  1851] loss: 1.44587, train_accuracy: 50.00
[6,  1861] loss: 1.38060, train_accuracy: 25.00
[6,  1871] loss: 1.79892, train_accuracy: 25.00
[6,  1881] loss: 2.54758, train_accuracy: 0.00
[6,  1891] loss: 1.78070, train_accuracy: 25.00
[6,  1901] loss: 1.29856, train_accuracy: 50.00
[6,  1911] loss: 1.79888, train_accuracy: 50.00
[6,  1921] loss: 2.72427, train_accuracy: 0.00
[6,  1931] loss: 1.59491, train_accuracy: 50.00
[6,  1941] loss: 2.79303, train_accuracy: 25.00
[6,  1951] loss: 1.62110, train_accuracy: 50.00
[6,  1961] loss: 2.06803, train_accuracy: 25.00
[6,  1971] loss: 2.02304, train_accuracy: 25.00
[6,  1981] loss: 1.68035, train_accuracy: 50.00
[6,  1991] loss: 2.25506, train_accuracy: 25.00
[6,  2001] loss: 2.53135, train_accuracy: 25

[7,  1121] loss: 1.87385, train_accuracy: 25.00
[7,  1131] loss: 1.66386, train_accuracy: 25.00
[7,  1141] loss: 1.96625, train_accuracy: 50.00
[7,  1151] loss: 2.50319, train_accuracy: 0.00
[7,  1161] loss: 2.50581, train_accuracy: 25.00
[7,  1171] loss: 0.72869, train_accuracy: 75.00
[7,  1181] loss: 0.76183, train_accuracy: 50.00
[7,  1191] loss: 1.31693, train_accuracy: 50.00
[7,  1201] loss: 3.08093, train_accuracy: 0.00
[7,  1211] loss: 1.77050, train_accuracy: 0.00
[7,  1221] loss: 1.85676, train_accuracy: 50.00
[7,  1231] loss: 1.51135, train_accuracy: 25.00
[7,  1241] loss: 0.71679, train_accuracy: 100.00
[7,  1251] loss: 1.98337, train_accuracy: 25.00
[7,  1261] loss: 2.08280, train_accuracy: 25.00
[7,  1271] loss: 1.92526, train_accuracy: 50.00
[7,  1281] loss: 1.76533, train_accuracy: 25.00
[7,  1291] loss: 1.25519, train_accuracy: 75.00
[7,  1301] loss: 1.87164, train_accuracy: 50.00
[7,  1311] loss: 2.13210, train_accuracy: 0.00
[7,  1321] loss: 2.20139, train_accuracy: 0

[8,   441] loss: 0.94756, train_accuracy: 75.00
[8,   451] loss: 2.17849, train_accuracy: 25.00
[8,   461] loss: 1.96713, train_accuracy: 25.00
[8,   471] loss: 1.10416, train_accuracy: 75.00
[8,   481] loss: 2.09378, train_accuracy: 25.00
[8,   491] loss: 0.89233, train_accuracy: 75.00
[8,   501] loss: 1.70383, train_accuracy: 50.00
[8,   511] loss: 2.12626, train_accuracy: 0.00
[8,   521] loss: 3.84009, train_accuracy: 25.00
[8,   531] loss: 2.15277, train_accuracy: 25.00
[8,   541] loss: 1.94210, train_accuracy: 0.00
[8,   551] loss: 1.68004, train_accuracy: 50.00
[8,   561] loss: 1.28785, train_accuracy: 50.00
[8,   571] loss: 1.76143, train_accuracy: 50.00
[8,   581] loss: 2.09387, train_accuracy: 0.00
[8,   591] loss: 1.38180, train_accuracy: 25.00
[8,   601] loss: 1.30327, train_accuracy: 25.00
[8,   611] loss: 2.11213, train_accuracy: 25.00
[8,   621] loss: 1.75533, train_accuracy: 50.00
[8,   631] loss: 0.99361, train_accuracy: 75.00
[8,   641] loss: 1.06413, train_accuracy: 5

[8,  2151] loss: 1.32529, train_accuracy: 50.00
[8,  2161] loss: 0.79457, train_accuracy: 75.00
[8,  2171] loss: 2.98314, train_accuracy: 0.00
[8,  2181] loss: 2.26039, train_accuracy: 50.00
[8,  2191] loss: 0.96904, train_accuracy: 75.00
[8,  2201] loss: 2.33334, train_accuracy: 25.00
[8,  2211] loss: 1.75051, train_accuracy: 25.00
[8,  2221] loss: 1.69661, train_accuracy: 25.00
[8,  2231] loss: 1.16816, train_accuracy: 75.00
[8,  2241] loss: 1.19072, train_accuracy: 75.00
[8,  2251] loss: 1.68422, train_accuracy: 25.00
[8,  2261] loss: 2.34722, train_accuracy: 0.00
[8,  2271] loss: 1.31420, train_accuracy: 25.00
[8,  2281] loss: 1.71587, train_accuracy: 25.00
[8,  2291] loss: 2.02096, train_accuracy: 0.00
[8,  2301] loss: 1.95655, train_accuracy: 25.00
[8,  2311] loss: 1.77535, train_accuracy: 25.00
[8,  2321] loss: 1.33006, train_accuracy: 25.00
[8,  2331] loss: 1.42260, train_accuracy: 75.00
[8,  2341] loss: 1.18720, train_accuracy: 75.00
[8,  2351] loss: 2.14019, train_accuracy: 5

[9,  1471] loss: 1.69407, train_accuracy: 50.00
[9,  1481] loss: 1.13046, train_accuracy: 75.00
[9,  1491] loss: 2.34067, train_accuracy: 50.00
[9,  1501] loss: 1.49177, train_accuracy: 50.00
[9,  1511] loss: 2.28854, train_accuracy: 25.00
[9,  1521] loss: 2.25186, train_accuracy: 25.00
[9,  1531] loss: 0.96839, train_accuracy: 50.00
[9,  1541] loss: 1.13830, train_accuracy: 25.00
[9,  1551] loss: 0.71834, train_accuracy: 75.00
[9,  1561] loss: 1.51877, train_accuracy: 25.00
[9,  1571] loss: 2.01129, train_accuracy: 25.00
[9,  1581] loss: 1.56298, train_accuracy: 50.00
[9,  1591] loss: 1.03366, train_accuracy: 75.00
[9,  1601] loss: 0.94639, train_accuracy: 75.00
[9,  1611] loss: 0.95599, train_accuracy: 75.00
[9,  1621] loss: 1.93985, train_accuracy: 50.00
[9,  1631] loss: 0.53323, train_accuracy: 100.00
[9,  1641] loss: 0.72733, train_accuracy: 100.00
[9,  1651] loss: 1.00460, train_accuracy: 75.00
[9,  1661] loss: 2.44189, train_accuracy: 25.00
[9,  1671] loss: 1.09449, train_accura

[10,   771] loss: 2.16513, train_accuracy: 50.00
[10,   781] loss: 1.04611, train_accuracy: 75.00
[10,   791] loss: 2.62345, train_accuracy: 25.00
[10,   801] loss: 0.85919, train_accuracy: 75.00
[10,   811] loss: 1.91466, train_accuracy: 0.00
[10,   821] loss: 1.32682, train_accuracy: 50.00
[10,   831] loss: 1.37557, train_accuracy: 50.00
[10,   841] loss: 1.81091, train_accuracy: 25.00
[10,   851] loss: 1.72372, train_accuracy: 75.00
[10,   861] loss: 2.73719, train_accuracy: 0.00
[10,   871] loss: 1.04247, train_accuracy: 75.00
[10,   881] loss: 1.61969, train_accuracy: 50.00
[10,   891] loss: 1.90711, train_accuracy: 50.00
[10,   901] loss: 1.84005, train_accuracy: 50.00
[10,   911] loss: 1.55599, train_accuracy: 25.00
[10,   921] loss: 0.44762, train_accuracy: 100.00
[10,   931] loss: 1.45576, train_accuracy: 75.00
[10,   941] loss: 0.87911, train_accuracy: 75.00
[10,   951] loss: 0.61739, train_accuracy: 75.00
[10,   961] loss: 1.23879, train_accuracy: 50.00
[10,   971] loss: 1.3

[11,    51] loss: 2.21557, train_accuracy: 25.00
[11,    61] loss: 2.09251, train_accuracy: 50.00
[11,    71] loss: 1.29664, train_accuracy: 75.00
[11,    81] loss: 0.84470, train_accuracy: 50.00
[11,    91] loss: 1.28565, train_accuracy: 50.00
[11,   101] loss: 1.26918, train_accuracy: 75.00
[11,   111] loss: 3.48972, train_accuracy: 0.00
[11,   121] loss: 1.34973, train_accuracy: 25.00
[11,   131] loss: 3.54580, train_accuracy: 0.00
[11,   141] loss: 1.04006, train_accuracy: 50.00
[11,   151] loss: 2.74453, train_accuracy: 50.00
[11,   161] loss: 1.52805, train_accuracy: 75.00
[11,   171] loss: 1.57242, train_accuracy: 50.00
[11,   181] loss: 3.12601, train_accuracy: 0.00
[11,   191] loss: 1.09990, train_accuracy: 50.00
[11,   201] loss: 0.98037, train_accuracy: 75.00
[11,   211] loss: 0.90358, train_accuracy: 50.00
[11,   221] loss: 1.05476, train_accuracy: 50.00
[11,   231] loss: 1.21021, train_accuracy: 50.00
[11,   241] loss: 1.79165, train_accuracy: 25.00
[11,   251] loss: 1.620

[11,  1731] loss: 0.92071, train_accuracy: 75.00
[11,  1741] loss: 1.90180, train_accuracy: 25.00
[11,  1751] loss: 0.36495, train_accuracy: 100.00
[11,  1761] loss: 1.02704, train_accuracy: 75.00
[11,  1771] loss: 1.46079, train_accuracy: 25.00
[11,  1781] loss: 1.86532, train_accuracy: 25.00
[11,  1791] loss: 1.30237, train_accuracy: 75.00
[11,  1801] loss: 2.40682, train_accuracy: 0.00
[11,  1811] loss: 0.73605, train_accuracy: 75.00
[11,  1821] loss: 1.31110, train_accuracy: 50.00
[11,  1831] loss: 1.14576, train_accuracy: 50.00
[11,  1841] loss: 1.30320, train_accuracy: 50.00
[11,  1851] loss: 0.36476, train_accuracy: 75.00
[11,  1861] loss: 2.08082, train_accuracy: 50.00
[11,  1871] loss: 1.59125, train_accuracy: 50.00
[11,  1881] loss: 0.49860, train_accuracy: 100.00
[11,  1891] loss: 1.07079, train_accuracy: 50.00
[11,  1901] loss: 1.82403, train_accuracy: 25.00
[11,  1911] loss: 1.80005, train_accuracy: 25.00
[11,  1921] loss: 0.85881, train_accuracy: 75.00
[11,  1931] loss: 0

[12,  1011] loss: 0.91870, train_accuracy: 100.00
[12,  1021] loss: 1.44814, train_accuracy: 50.00
[12,  1031] loss: 0.38134, train_accuracy: 100.00
[12,  1041] loss: 0.32563, train_accuracy: 100.00
[12,  1051] loss: 1.05101, train_accuracy: 75.00
[12,  1061] loss: 1.00943, train_accuracy: 50.00
[12,  1071] loss: 3.40158, train_accuracy: 25.00
[12,  1081] loss: 2.22170, train_accuracy: 25.00
[12,  1091] loss: 2.48579, train_accuracy: 25.00
[12,  1101] loss: 0.50821, train_accuracy: 75.00
[12,  1111] loss: 0.91242, train_accuracy: 50.00
[12,  1121] loss: 1.72290, train_accuracy: 25.00
[12,  1131] loss: 1.60002, train_accuracy: 50.00
[12,  1141] loss: 1.28906, train_accuracy: 25.00
[12,  1151] loss: 2.36405, train_accuracy: 25.00
[12,  1161] loss: 0.92871, train_accuracy: 100.00
[12,  1171] loss: 1.26702, train_accuracy: 25.00
[12,  1181] loss: 1.74991, train_accuracy: 50.00
[12,  1191] loss: 1.49926, train_accuracy: 25.00
[12,  1201] loss: 1.25729, train_accuracy: 50.00
[12,  1211] loss

[13,   291] loss: 0.73337, train_accuracy: 75.00
[13,   301] loss: 1.24255, train_accuracy: 50.00
[13,   311] loss: 2.26292, train_accuracy: 25.00
[13,   321] loss: 1.92774, train_accuracy: 25.00
[13,   331] loss: 1.61872, train_accuracy: 50.00
[13,   341] loss: 1.04596, train_accuracy: 75.00
[13,   351] loss: 1.10501, train_accuracy: 75.00
[13,   361] loss: 2.88003, train_accuracy: 25.00
[13,   371] loss: 2.02568, train_accuracy: 50.00
[13,   381] loss: 0.50178, train_accuracy: 100.00
[13,   391] loss: 1.37165, train_accuracy: 50.00
[13,   401] loss: 1.74700, train_accuracy: 50.00
[13,   411] loss: 0.77905, train_accuracy: 75.00
[13,   421] loss: 2.27970, train_accuracy: 75.00
[13,   431] loss: 1.68832, train_accuracy: 50.00
[13,   441] loss: 0.74823, train_accuracy: 75.00
[13,   451] loss: 1.44530, train_accuracy: 50.00
[13,   461] loss: 0.86793, train_accuracy: 75.00
[13,   471] loss: 0.72718, train_accuracy: 75.00
[13,   481] loss: 1.89621, train_accuracy: 50.00
[13,   491] loss: 2

[13,  1961] loss: 0.35804, train_accuracy: 100.00
[13,  1971] loss: 0.79516, train_accuracy: 75.00
[13,  1981] loss: 1.76232, train_accuracy: 50.00
[13,  1991] loss: 2.39420, train_accuracy: 25.00
[13,  2001] loss: 1.19485, train_accuracy: 50.00
[13,  2011] loss: 1.57326, train_accuracy: 50.00
[13,  2021] loss: 0.83274, train_accuracy: 75.00
[13,  2031] loss: 0.59683, train_accuracy: 100.00
[13,  2041] loss: 1.00653, train_accuracy: 50.00
[13,  2051] loss: 1.19753, train_accuracy: 75.00
[13,  2061] loss: 1.02542, train_accuracy: 75.00
[13,  2071] loss: 1.51882, train_accuracy: 75.00
[13,  2081] loss: 2.35109, train_accuracy: 0.00
[13,  2091] loss: 1.37702, train_accuracy: 25.00
[13,  2101] loss: 0.94859, train_accuracy: 75.00
[13,  2111] loss: 1.03147, train_accuracy: 75.00
[13,  2121] loss: 1.54473, train_accuracy: 25.00
[13,  2131] loss: 1.07675, train_accuracy: 50.00
[13,  2141] loss: 0.86237, train_accuracy: 75.00
[13,  2151] loss: 2.15588, train_accuracy: 25.00
[13,  2161] loss: 1

[14,  1241] loss: 0.80311, train_accuracy: 75.00
[14,  1251] loss: 1.24811, train_accuracy: 50.00
[14,  1261] loss: 1.02181, train_accuracy: 50.00
[14,  1271] loss: 1.50692, train_accuracy: 50.00
[14,  1281] loss: 1.41863, train_accuracy: 75.00
[14,  1291] loss: 2.47688, train_accuracy: 25.00
[14,  1301] loss: 0.79722, train_accuracy: 100.00
[14,  1311] loss: 0.66246, train_accuracy: 100.00
[14,  1321] loss: 0.89849, train_accuracy: 75.00
[14,  1331] loss: 1.00616, train_accuracy: 50.00
[14,  1341] loss: 0.92053, train_accuracy: 75.00
[14,  1351] loss: 1.13705, train_accuracy: 50.00
[14,  1361] loss: 2.36265, train_accuracy: 50.00
[14,  1371] loss: 0.43628, train_accuracy: 100.00
[14,  1381] loss: 1.72821, train_accuracy: 50.00
[14,  1391] loss: 2.38313, train_accuracy: 0.00
[14,  1401] loss: 0.60044, train_accuracy: 75.00
[14,  1411] loss: 0.95011, train_accuracy: 75.00
[14,  1421] loss: 1.29641, train_accuracy: 50.00
[14,  1431] loss: 1.75742, train_accuracy: 50.00
[14,  1441] loss: 

[15,   521] loss: 2.08321, train_accuracy: 50.00
[15,   531] loss: 1.23095, train_accuracy: 50.00
[15,   541] loss: 1.43777, train_accuracy: 25.00
[15,   551] loss: 1.83609, train_accuracy: 0.00
[15,   561] loss: 1.51754, train_accuracy: 75.00
[15,   571] loss: 1.05165, train_accuracy: 50.00
[15,   581] loss: 0.48840, train_accuracy: 75.00
[15,   591] loss: 1.18683, train_accuracy: 50.00
[15,   601] loss: 0.84333, train_accuracy: 100.00
[15,   611] loss: 2.17054, train_accuracy: 50.00
[15,   621] loss: 0.70568, train_accuracy: 100.00
[15,   631] loss: 0.80485, train_accuracy: 75.00
[15,   641] loss: 3.19759, train_accuracy: 0.00
[15,   651] loss: 1.12163, train_accuracy: 50.00
[15,   661] loss: 1.20995, train_accuracy: 50.00
[15,   671] loss: 1.15095, train_accuracy: 50.00
[15,   681] loss: 1.86502, train_accuracy: 25.00
[15,   691] loss: 1.83452, train_accuracy: 50.00
[15,   701] loss: 0.94171, train_accuracy: 75.00
[15,   711] loss: 0.90438, train_accuracy: 75.00
[15,   721] loss: 1.

[15,  2191] loss: 0.98345, train_accuracy: 75.00
[15,  2201] loss: 0.88995, train_accuracy: 75.00
[15,  2211] loss: 0.59681, train_accuracy: 100.00
[15,  2221] loss: 1.82130, train_accuracy: 25.00
[15,  2231] loss: 1.07381, train_accuracy: 50.00
[15,  2241] loss: 1.85447, train_accuracy: 50.00
[15,  2251] loss: 1.02420, train_accuracy: 75.00
[15,  2261] loss: 1.33141, train_accuracy: 75.00
[15,  2271] loss: 0.61048, train_accuracy: 75.00
[15,  2281] loss: 1.27534, train_accuracy: 25.00
[15,  2291] loss: 0.32822, train_accuracy: 100.00
[15,  2301] loss: 1.11228, train_accuracy: 75.00
[15,  2311] loss: 2.10458, train_accuracy: 25.00
[15,  2321] loss: 0.97703, train_accuracy: 50.00
[15,  2331] loss: 0.78301, train_accuracy: 75.00
[15,  2341] loss: 0.20950, train_accuracy: 100.00
[15,  2351] loss: 3.20020, train_accuracy: 25.00
[15,  2361] loss: 2.77041, train_accuracy: 0.00
duration: 195 s - train loss: 1.29593 - train accuracy: 57.53 - validation loss: 1.12 - validation accuracy: 66.17 


[16,  1471] loss: 0.26259, train_accuracy: 100.00
[16,  1481] loss: 1.65278, train_accuracy: 25.00
[16,  1491] loss: 1.00252, train_accuracy: 75.00
[16,  1501] loss: 0.62024, train_accuracy: 100.00
[16,  1511] loss: 1.31704, train_accuracy: 75.00
[16,  1521] loss: 0.80020, train_accuracy: 75.00
[16,  1531] loss: 1.10738, train_accuracy: 50.00
[16,  1541] loss: 0.68860, train_accuracy: 75.00
[16,  1551] loss: 0.69409, train_accuracy: 75.00
[16,  1561] loss: 2.42394, train_accuracy: 50.00
[16,  1571] loss: 0.82491, train_accuracy: 75.00
[16,  1581] loss: 0.98057, train_accuracy: 75.00
[16,  1591] loss: 0.63047, train_accuracy: 75.00
[16,  1601] loss: 0.69351, train_accuracy: 75.00
[16,  1611] loss: 1.61666, train_accuracy: 50.00
[16,  1621] loss: 3.10327, train_accuracy: 25.00
[16,  1631] loss: 1.09185, train_accuracy: 50.00
[16,  1641] loss: 1.37692, train_accuracy: 50.00
[16,  1651] loss: 2.29938, train_accuracy: 25.00
[16,  1661] loss: 2.11656, train_accuracy: 0.00
[16,  1671] loss: 0

[17,   751] loss: 0.54065, train_accuracy: 100.00
[17,   761] loss: 0.26654, train_accuracy: 100.00
[17,   771] loss: 0.46378, train_accuracy: 100.00
[17,   781] loss: 0.58823, train_accuracy: 50.00
[17,   791] loss: 0.96039, train_accuracy: 75.00
[17,   801] loss: 0.88031, train_accuracy: 75.00
[17,   811] loss: 1.73408, train_accuracy: 25.00
[17,   821] loss: 2.09545, train_accuracy: 50.00
[17,   831] loss: 0.67871, train_accuracy: 100.00
[17,   841] loss: 0.63180, train_accuracy: 100.00
[17,   851] loss: 1.20222, train_accuracy: 75.00
[17,   861] loss: 1.09848, train_accuracy: 75.00
[17,   871] loss: 1.72252, train_accuracy: 50.00
[17,   881] loss: 0.51751, train_accuracy: 75.00
[17,   891] loss: 1.90879, train_accuracy: 50.00
[17,   901] loss: 0.83520, train_accuracy: 75.00
[17,   911] loss: 1.79977, train_accuracy: 75.00
[17,   921] loss: 1.80784, train_accuracy: 25.00
[17,   931] loss: 0.73295, train_accuracy: 75.00
[17,   941] loss: 0.63186, train_accuracy: 75.00
[17,   951] los

[18,    31] loss: 1.29502, train_accuracy: 50.00
[18,    41] loss: 1.74337, train_accuracy: 25.00
[18,    51] loss: 0.64878, train_accuracy: 75.00
[18,    61] loss: 0.48261, train_accuracy: 100.00
[18,    71] loss: 1.47997, train_accuracy: 50.00
[18,    81] loss: 3.27305, train_accuracy: 25.00
[18,    91] loss: 2.25648, train_accuracy: 25.00
[18,   101] loss: 1.95074, train_accuracy: 50.00
[18,   111] loss: 1.36822, train_accuracy: 75.00
[18,   121] loss: 1.91646, train_accuracy: 50.00
[18,   131] loss: 0.80578, train_accuracy: 75.00
[18,   141] loss: 0.81787, train_accuracy: 75.00
[18,   151] loss: 1.93533, train_accuracy: 25.00
[18,   161] loss: 1.42312, train_accuracy: 50.00
[18,   171] loss: 2.51083, train_accuracy: 25.00
[18,   181] loss: 0.72138, train_accuracy: 75.00
[18,   191] loss: 1.41862, train_accuracy: 50.00
[18,   201] loss: 1.38393, train_accuracy: 50.00
[18,   211] loss: 0.27308, train_accuracy: 100.00
[18,   221] loss: 1.97172, train_accuracy: 50.00
[18,   231] loss: 

[18,  1701] loss: 1.39519, train_accuracy: 50.00
[18,  1711] loss: 0.72967, train_accuracy: 75.00
[18,  1721] loss: 2.22860, train_accuracy: 50.00
[18,  1731] loss: 1.48558, train_accuracy: 50.00
[18,  1741] loss: 1.45588, train_accuracy: 75.00
[18,  1751] loss: 0.59442, train_accuracy: 75.00
[18,  1761] loss: 0.42924, train_accuracy: 75.00
[18,  1771] loss: 0.62977, train_accuracy: 75.00
[18,  1781] loss: 1.45847, train_accuracy: 50.00
[18,  1791] loss: 1.89712, train_accuracy: 25.00
[18,  1801] loss: 1.20550, train_accuracy: 75.00
[18,  1811] loss: 2.26530, train_accuracy: 25.00
[18,  1821] loss: 1.13341, train_accuracy: 50.00
[18,  1831] loss: 1.70136, train_accuracy: 25.00
[18,  1841] loss: 1.05319, train_accuracy: 50.00
[18,  1851] loss: 0.85161, train_accuracy: 75.00
[18,  1861] loss: 0.45291, train_accuracy: 100.00
[18,  1871] loss: 1.54262, train_accuracy: 25.00
[18,  1881] loss: 0.31373, train_accuracy: 100.00
[18,  1891] loss: 1.75747, train_accuracy: 25.00
[18,  1901] loss: 

[19,   981] loss: 2.79491, train_accuracy: 25.00
[19,   991] loss: 1.86697, train_accuracy: 25.00
[19,  1001] loss: 1.97534, train_accuracy: 50.00
[19,  1011] loss: 0.64275, train_accuracy: 75.00
[19,  1021] loss: 1.24245, train_accuracy: 75.00
[19,  1031] loss: 0.85988, train_accuracy: 50.00
[19,  1041] loss: 1.40149, train_accuracy: 50.00
[19,  1051] loss: 0.76900, train_accuracy: 75.00
[19,  1061] loss: 1.76273, train_accuracy: 50.00
[19,  1071] loss: 0.72306, train_accuracy: 75.00
[19,  1081] loss: 0.51754, train_accuracy: 75.00
[19,  1091] loss: 0.81855, train_accuracy: 75.00
[19,  1101] loss: 0.94461, train_accuracy: 75.00
[19,  1111] loss: 1.05932, train_accuracy: 50.00
[19,  1121] loss: 0.89134, train_accuracy: 50.00
[19,  1131] loss: 0.37203, train_accuracy: 75.00
[19,  1141] loss: 1.22950, train_accuracy: 75.00
[19,  1151] loss: 1.00503, train_accuracy: 50.00
[19,  1161] loss: 3.16775, train_accuracy: 25.00
[19,  1171] loss: 0.45134, train_accuracy: 75.00
[19,  1181] loss: 1.

[20,   261] loss: 0.66822, train_accuracy: 50.00
[20,   271] loss: 1.33752, train_accuracy: 50.00
[20,   281] loss: 1.53971, train_accuracy: 50.00
[20,   291] loss: 1.20128, train_accuracy: 50.00
[20,   301] loss: 1.50802, train_accuracy: 50.00
[20,   311] loss: 1.15857, train_accuracy: 25.00
[20,   321] loss: 1.22485, train_accuracy: 50.00
[20,   331] loss: 0.34848, train_accuracy: 100.00
[20,   341] loss: 0.54635, train_accuracy: 100.00
[20,   351] loss: 0.18080, train_accuracy: 100.00
[20,   361] loss: 0.51430, train_accuracy: 100.00
[20,   371] loss: 0.86438, train_accuracy: 75.00
[20,   381] loss: 0.76898, train_accuracy: 50.00
[20,   391] loss: 2.26720, train_accuracy: 25.00
[20,   401] loss: 0.94138, train_accuracy: 50.00
[20,   411] loss: 0.67093, train_accuracy: 75.00
[20,   421] loss: 0.24555, train_accuracy: 100.00
[20,   431] loss: 0.65715, train_accuracy: 75.00
[20,   441] loss: 0.22547, train_accuracy: 100.00
[20,   451] loss: 2.90530, train_accuracy: 50.00
[20,   461] lo

[20,  1931] loss: 1.95216, train_accuracy: 50.00
[20,  1941] loss: 1.11215, train_accuracy: 75.00
[20,  1951] loss: 1.52311, train_accuracy: 75.00
[20,  1961] loss: 0.66061, train_accuracy: 75.00
[20,  1971] loss: 1.96197, train_accuracy: 75.00
[20,  1981] loss: 0.31330, train_accuracy: 100.00
[20,  1991] loss: 1.51952, train_accuracy: 50.00
[20,  2001] loss: 1.39401, train_accuracy: 50.00
[20,  2011] loss: 1.76439, train_accuracy: 25.00
[20,  2021] loss: 1.20347, train_accuracy: 50.00
[20,  2031] loss: 0.80608, train_accuracy: 100.00
[20,  2041] loss: 1.57763, train_accuracy: 50.00
[20,  2051] loss: 0.63361, train_accuracy: 50.00
[20,  2061] loss: 1.58614, train_accuracy: 50.00
[20,  2071] loss: 1.20612, train_accuracy: 75.00
[20,  2081] loss: 0.35277, train_accuracy: 75.00
[20,  2091] loss: 3.31061, train_accuracy: 25.00
[20,  2101] loss: 1.43337, train_accuracy: 75.00
[20,  2111] loss: 0.68457, train_accuracy: 75.00
[20,  2121] loss: 1.82506, train_accuracy: 25.00
[20,  2131] loss: 

[21,  1211] loss: 1.70249, train_accuracy: 50.00
[21,  1221] loss: 1.39318, train_accuracy: 50.00
[21,  1231] loss: 0.21778, train_accuracy: 100.00
[21,  1241] loss: 1.92667, train_accuracy: 25.00
[21,  1251] loss: 0.53899, train_accuracy: 100.00
[21,  1261] loss: 0.41395, train_accuracy: 75.00
[21,  1271] loss: 1.34018, train_accuracy: 50.00
[21,  1281] loss: 0.84591, train_accuracy: 50.00
[21,  1291] loss: 1.15734, train_accuracy: 75.00
[21,  1301] loss: 0.99124, train_accuracy: 50.00
[21,  1311] loss: 1.61698, train_accuracy: 0.00
[21,  1321] loss: 0.41633, train_accuracy: 100.00
[21,  1331] loss: 1.92137, train_accuracy: 50.00
[21,  1341] loss: 0.68674, train_accuracy: 75.00
[21,  1351] loss: 1.33791, train_accuracy: 25.00
[21,  1361] loss: 1.73680, train_accuracy: 25.00
[21,  1371] loss: 1.11273, train_accuracy: 50.00
[21,  1381] loss: 1.25734, train_accuracy: 75.00
[21,  1391] loss: 1.93724, train_accuracy: 25.00
[21,  1401] loss: 2.08822, train_accuracy: 50.00
[21,  1411] loss: 

[22,   491] loss: 0.44439, train_accuracy: 100.00
[22,   501] loss: 0.88314, train_accuracy: 50.00
[22,   511] loss: 0.86276, train_accuracy: 75.00
[22,   521] loss: 0.28835, train_accuracy: 100.00
[22,   531] loss: 1.05894, train_accuracy: 50.00
[22,   541] loss: 1.75077, train_accuracy: 0.00
[22,   551] loss: 0.39571, train_accuracy: 100.00
[22,   561] loss: 0.97572, train_accuracy: 50.00
[22,   571] loss: 0.61593, train_accuracy: 100.00
[22,   581] loss: 1.73601, train_accuracy: 50.00
[22,   591] loss: 1.25293, train_accuracy: 25.00
[22,   601] loss: 0.36766, train_accuracy: 100.00
[22,   611] loss: 0.86295, train_accuracy: 75.00
[22,   621] loss: 0.99127, train_accuracy: 50.00
[22,   631] loss: 0.65686, train_accuracy: 75.00
[22,   641] loss: 0.41303, train_accuracy: 100.00
[22,   651] loss: 0.33891, train_accuracy: 100.00
[22,   661] loss: 0.23342, train_accuracy: 100.00
[22,   671] loss: 1.66013, train_accuracy: 75.00
[22,   681] loss: 1.79470, train_accuracy: 50.00
[22,   691] l

[22,  2161] loss: 0.99997, train_accuracy: 75.00
[22,  2171] loss: 1.06310, train_accuracy: 75.00
[22,  2181] loss: 0.51636, train_accuracy: 100.00
[22,  2191] loss: 0.48578, train_accuracy: 100.00
[22,  2201] loss: 1.24845, train_accuracy: 50.00
[22,  2211] loss: 0.33211, train_accuracy: 100.00
[22,  2221] loss: 0.42281, train_accuracy: 75.00
[22,  2231] loss: 1.34181, train_accuracy: 75.00
[22,  2241] loss: 0.62959, train_accuracy: 75.00
[22,  2251] loss: 1.07303, train_accuracy: 75.00
[22,  2261] loss: 0.15764, train_accuracy: 100.00
[22,  2271] loss: 0.90206, train_accuracy: 75.00
[22,  2281] loss: 1.26609, train_accuracy: 75.00
[22,  2291] loss: 1.47688, train_accuracy: 50.00
[22,  2301] loss: 0.80588, train_accuracy: 75.00
[22,  2311] loss: 1.10949, train_accuracy: 50.00
[22,  2321] loss: 0.57951, train_accuracy: 75.00
[22,  2331] loss: 0.30137, train_accuracy: 100.00
[22,  2341] loss: 2.84707, train_accuracy: 25.00
[22,  2351] loss: 0.37834, train_accuracy: 100.00
[22,  2361] lo

[23,  1441] loss: 1.04006, train_accuracy: 75.00
[23,  1451] loss: 2.66952, train_accuracy: 25.00
[23,  1461] loss: 1.06781, train_accuracy: 75.00
[23,  1471] loss: 0.73605, train_accuracy: 75.00
[23,  1481] loss: 2.16592, train_accuracy: 50.00
[23,  1491] loss: 1.68553, train_accuracy: 75.00
[23,  1501] loss: 0.53983, train_accuracy: 100.00
[23,  1511] loss: 1.25893, train_accuracy: 50.00
[23,  1521] loss: 0.65636, train_accuracy: 75.00
[23,  1531] loss: 1.10949, train_accuracy: 50.00
[23,  1541] loss: 1.45246, train_accuracy: 50.00
[23,  1551] loss: 0.34161, train_accuracy: 75.00
[23,  1561] loss: 0.77614, train_accuracy: 75.00
[23,  1571] loss: 2.07434, train_accuracy: 50.00
[23,  1581] loss: 1.51816, train_accuracy: 25.00
[23,  1591] loss: 2.02334, train_accuracy: 25.00
[23,  1601] loss: 0.24580, train_accuracy: 100.00
[23,  1611] loss: 0.31738, train_accuracy: 75.00
[23,  1621] loss: 1.22959, train_accuracy: 75.00
[23,  1631] loss: 1.18483, train_accuracy: 75.00
[23,  1641] loss: 

[24,   721] loss: 1.44743, train_accuracy: 75.00
[24,   731] loss: 1.15660, train_accuracy: 50.00
[24,   741] loss: 1.66306, train_accuracy: 25.00
[24,   751] loss: 0.09651, train_accuracy: 100.00
[24,   761] loss: 0.96085, train_accuracy: 50.00
[24,   771] loss: 0.66886, train_accuracy: 100.00
[24,   781] loss: 2.38747, train_accuracy: 25.00
[24,   791] loss: 0.39366, train_accuracy: 100.00
[24,   801] loss: 1.44020, train_accuracy: 50.00
[24,   811] loss: 3.21952, train_accuracy: 25.00
[24,   821] loss: 2.53178, train_accuracy: 25.00
[24,   831] loss: 0.86566, train_accuracy: 50.00
[24,   841] loss: 1.58528, train_accuracy: 50.00
[24,   851] loss: 1.63973, train_accuracy: 50.00
[24,   861] loss: 0.65620, train_accuracy: 100.00
[24,   871] loss: 2.35964, train_accuracy: 0.00
[24,   881] loss: 0.65073, train_accuracy: 75.00
[24,   891] loss: 1.00166, train_accuracy: 25.00
[24,   901] loss: 1.85366, train_accuracy: 50.00
[24,   911] loss: 1.19213, train_accuracy: 50.00
[24,   921] loss:

[25,    11] loss: 0.88183, train_accuracy: 75.00
[25,    21] loss: 1.58380, train_accuracy: 50.00
[25,    31] loss: 0.56064, train_accuracy: 100.00
[25,    41] loss: 1.21349, train_accuracy: 75.00
[25,    51] loss: 1.60905, train_accuracy: 25.00
[25,    61] loss: 2.16892, train_accuracy: 50.00
[25,    71] loss: 2.01386, train_accuracy: 50.00
[25,    81] loss: 1.20961, train_accuracy: 50.00
[25,    91] loss: 1.56123, train_accuracy: 50.00
[25,   101] loss: 0.94206, train_accuracy: 75.00
[25,   111] loss: 1.24560, train_accuracy: 50.00
[25,   121] loss: 0.71203, train_accuracy: 75.00
[25,   131] loss: 1.32113, train_accuracy: 50.00
[25,   141] loss: 0.66068, train_accuracy: 100.00
[25,   151] loss: 0.48120, train_accuracy: 100.00
[25,   161] loss: 2.06703, train_accuracy: 0.00
[25,   171] loss: 0.60074, train_accuracy: 75.00
[25,   181] loss: 0.95654, train_accuracy: 75.00
[25,   191] loss: 0.82354, train_accuracy: 75.00
[25,   201] loss: 0.54398, train_accuracy: 75.00
[25,   211] loss: 

[25,  1681] loss: 3.82656, train_accuracy: 25.00
[25,  1691] loss: 1.19777, train_accuracy: 50.00
[25,  1701] loss: 0.45073, train_accuracy: 75.00
[25,  1711] loss: 2.64261, train_accuracy: 25.00
[25,  1721] loss: 0.96326, train_accuracy: 50.00
[25,  1731] loss: 2.15805, train_accuracy: 25.00
[25,  1741] loss: 0.31408, train_accuracy: 100.00
[25,  1751] loss: 0.91296, train_accuracy: 50.00
[25,  1761] loss: 1.31156, train_accuracy: 50.00
[25,  1771] loss: 0.86730, train_accuracy: 75.00
[25,  1781] loss: 1.32144, train_accuracy: 75.00
[25,  1791] loss: 0.36013, train_accuracy: 100.00
[25,  1801] loss: 1.29158, train_accuracy: 75.00
[25,  1811] loss: 1.29481, train_accuracy: 25.00
[25,  1821] loss: 0.90951, train_accuracy: 75.00
[25,  1831] loss: 2.91313, train_accuracy: 25.00
[25,  1841] loss: 1.21117, train_accuracy: 75.00
[25,  1851] loss: 1.09317, train_accuracy: 50.00
[25,  1861] loss: 0.58587, train_accuracy: 100.00
[25,  1871] loss: 0.46853, train_accuracy: 100.00
[25,  1881] loss