In [1]:
# imports
import utils # from The Google Research Authors
import torch as t, torch.nn as nn, torch.nn.functional as tnnF, torch.distributions as tdist
from torch.utils.data import DataLoader, Dataset
import torchvision as tv, torchvision.transforms as tr
import os
import sys
import argparse
#import ipdb
import numpy as np
import wideresnet # from The Google Research Authors
import json
import re

In [2]:
# Sampling
from tqdm import tqdm
t.backends.cudnn.benchmark = True
t.backends.cudnn.enabled = True
seed = 5555

# images RGB 32x32
im_sz = 32
n_ch = 3

In [3]:
# get random subset of data
class DataSubset(Dataset):
    def __init__(self, base_dataset, inds=None, size=-1):
        self.base_dataset = base_dataset
        if inds is None:
            inds = np.random.choice(list(range(len(base_dataset))), size, replace=False)
        self.inds = inds

    def __getitem__(self, index):
        base_ind = self.inds[index]
        return self.base_dataset[base_ind]

    def __len__(self):
        return len(self.inds)

In [4]:
# setup Wide_ResNet
# Uses The Google Research Authors, file wideresnet.py
class F(nn.Module):
    def __init__(self, depth=28, width=2, norm=None, dropout_rate=0.0, n_classes=10):
        super(F, self).__init__()
        self.f = wideresnet.Wide_ResNet(depth, width, norm=norm, dropout_rate=dropout_rate)
        self.energy_output = nn.Linear(self.f.last_dim, 1)
        self.class_output = nn.Linear(self.f.last_dim, n_classes)

    def forward(self, x, y=None):
        penult_z = self.f(x)
        return self.energy_output(penult_z).squeeze()

    def classify(self, x):
        penult_z = self.f(x)
        return self.class_output(penult_z).squeeze()

In [5]:
# Energies if y=none
# EBM energy calculated as logsumexp of logits
class CCF(F):
    def __init__(self, depth=28, width=2, norm=None, dropout_rate=0.0, n_classes=10):
        super(CCF, self).__init__(depth, width, norm=norm, dropout_rate=dropout_rate, n_classes=n_classes)

    def forward(self, x, y=None):
        logits = self.classify(x)
        if y is None:
            return logits.logsumexp(1)
        else:
            # gathers the logits along dim 1 with indeces y
            return t.gather(logits, 1, y[:, None])

In [6]:
# various utilities
def cycle(loader):
    while True:
        for data in loader:
            yield data

def grad_norm(m):
    total_norm = 0
    for p in m.parameters():
        param_grad = p.grad
        if param_grad is not None:
            param_norm = param_grad.data.norm(2) ** 2
            total_norm += param_norm
    total_norm = total_norm ** (1. / 2)
    return total_norm.item()

def grad_vals(m):
    ps = []
    for p in m.parameters():
        if p.grad is not None:
            ps.append(p.grad.data.view(-1))
    ps = t.cat(ps)
    return ps.mean().item(), ps.std(), ps.abs().mean(), ps.abs().std(), ps.abs().min(), ps.abs().max()

def init_random(args, bs):
    return t.FloatTensor(bs, n_ch, im_sz, im_sz).uniform_(-1, 1)

In [7]:
# Setup SGLD model and data/replay buffer
# Images generated are added to a buffer and sampled with a probability (1-\rho) for efficiency
def get_model_and_buffer(args, device, sample_q):
    model_cls = F if args.uncond else CCF
    f = model_cls(args.depth, args.width, args.norm, dropout_rate=args.dropout_rate, n_classes=args.n_classes)
    if not args.uncond:
        assert args.buffer_size % args.n_classes == 0, "Buffer size must be divisible by args.n_classes"
    if args.load_path is None:
        # make replay buffer
        replay_buffer = init_random(args, args.buffer_size)
        epoch=-1 #Because it needs to start at 0
    else:
        print(f"loading model from {args.load_path}")
        ckpt_dict = t.load(args.load_path)
        f.load_state_dict(ckpt_dict["model_state_dict"])
        replay_buffer = ckpt_dict["replay_buffer"]
        epoch = ckpt_dict["epoch"]

    f = f.to(device)
    return f, replay_buffer, epoch

In [8]:
# Load in chosen dataset from svhn, cifar10, cifar100
def get_data(args):
    if args.dataset == "svhn":
        transform_train = tr.Compose(
            [tr.Pad(4, padding_mode="reflect"),
             tr.RandomCrop(im_sz),
             tr.ToTensor(),
             tr.Normalize((.5, .5, .5), (.5, .5, .5)),
             lambda x: x + args.sigma * t.randn_like(x)]
        )
    else:
        transform_train = tr.Compose(
            [tr.Pad(4, padding_mode="reflect"),
             tr.RandomCrop(im_sz),
             tr.RandomHorizontalFlip(),
             tr.ToTensor(),
             tr.Normalize((.5, .5, .5), (.5, .5, .5)),
             lambda x: x + args.sigma * t.randn_like(x)]
        )
        #transform_train = tr.Compose(
        #    [tr.ToTensor()]
        #)
    transform_test = tr.Compose(
        [tr.ToTensor(),
         tr.Normalize((.5, .5, .5), (.5, .5, .5)),
         lambda x: x + args.sigma * t.randn_like(x)]
    )
    def dataset_fn(train, transform):
        if args.dataset == "cifar10":
            return tv.datasets.CIFAR10(root=args.data_root, transform=transform, download=True, train=train)
        elif args.dataset == "cifar100":
            return tv.datasets.CIFAR100(root=args.data_root, transform=transform, download=True, train=train)
        else:
            return tv.datasets.SVHN(root=args.data_root, transform=transform, download=True,
                                    split="train" if train else "test")

    # get all training inds
    full_train = dataset_fn(True, transform_train)
    all_inds = list(range(len(full_train)))
    # set seed
    np.random.seed(5555)
    # shuffle
    np.random.shuffle(all_inds)
    # seperate out validation set
    if args.n_valid is not None:
        valid_inds, train_inds = all_inds[:args.n_valid], all_inds[args.n_valid:]
    else:
        valid_inds, train_inds = [], all_inds
    train_inds = np.array(train_inds)
    train_labeled_inds = []
    other_inds = []
    train_labels = np.array([full_train[ind][1] for ind in train_inds])
    if args.labels_per_class > 0:
        for i in range(args.n_classes):
            print(i)
            train_labeled_inds.extend(train_inds[train_labels == i][:args.labels_per_class])
            other_inds.extend(train_inds[train_labels == i][args.labels_per_class:])
    else:
        train_labeled_inds = train_inds

    dset_train = DataSubset(
        dataset_fn(True, transform_train),
        inds=train_inds)
    dset_train_labeled = DataSubset(
        dataset_fn(True, transform_train),
        inds=train_labeled_inds)
    dset_valid = DataSubset(
        dataset_fn(True, transform_test),
        inds=valid_inds)
    dload_train = DataLoader(dset_train, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True)
    dload_train_labeled = DataLoader(dset_train_labeled, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True)
    dload_train_labeled = cycle(dload_train_labeled)
    dset_test = dataset_fn(False, transform_test)
    dload_valid = DataLoader(dset_valid, batch_size=100, shuffle=False, num_workers=4, drop_last=False)
    dload_test = DataLoader(dset_test, batch_size=100, shuffle=False, num_workers=4, drop_last=False)
    return dload_train, dload_train_labeled, dload_valid,dload_test

In [9]:
# Routine for SGLD generation of fake images
def get_sample_q(args, device):
    # setup initial data/buffers
    def sample_p_0(replay_buffer, bs, y=None):
        if len(replay_buffer) == 0:
            return init_random(args, bs), []
        buffer_size = len(replay_buffer) if y is None else len(replay_buffer) // args.n_classes
        inds = t.randint(0, buffer_size, (bs,))
        # if cond, convert inds to class conditional inds
        if y is not None:
            inds = y.cpu() * buffer_size + inds
            assert not args.uncond, "Can't drawn conditional samples without giving me y"
        buffer_samples = replay_buffer[inds]
        random_samples = init_random(args, bs)
        choose_random = (t.rand(bs) < args.reinit_freq).float()[:, None, None, None]
        samples = choose_random * random_samples + (1 - choose_random) * buffer_samples
        return samples.to(device), inds

    # actual SGLD
    def sample_q(f, replay_buffer, y=None, n_steps=args.n_steps):
        """this func takes in replay_buffer now so we have the option to sample from
        scratch (i.e. replay_buffer==[]).  See test_wrn_ebm.py for example.
        """
        # here f is CCF to calculate energies
        # evaluate model, must set train back on later (TODO:but I dont need to train energies?)
        f.eval()
        # get batch size
        bs = args.batch_size if y is None else y.size(0)
        # generate initial samples and buffer inds of those samples (if buffer is used)
        init_sample, buffer_inds = sample_p_0(replay_buffer, bs=bs, y=y)
        x_k = t.autograd.Variable(init_sample, requires_grad=True)
        # sgld
        for k in range(n_steps):
            # calculate \parial E/\partial x_{k-1}
            f_prime = t.autograd.grad(f(x_k, y=y).sum(), [x_k], retain_graph=True)[0]
            # x_k = x_{k-1} + \alpha*\parial E/\partial x_{k-1} + \theta * N
            x_k.data += args.sgld_lr * f_prime + args.sgld_std * t.randn_like(x_k)
        
        # set self.training = True
        f.train()
        
        # Returns a new Tensor, detached from the current graph
        final_samples = x_k.detach()
        
        # update replay buffer
        if len(replay_buffer) > 0:
            replay_buffer[buffer_inds] = final_samples.cpu()
        return final_samples
    return sample_q

In [10]:
#To avoid repeat code and maintanence. This is for the evaluations
def eval_classification_inner(f,dload,device):
    softmax=nn.Softmax(dim=1)
    corrects, losses, logits_all = [], [], []
    for x_p_d, y_p_d in dload:
        x_p_d, y_p_d = x_p_d.to(device), y_p_d.to(device)
        logits = f.classify(x_p_d)
        logits_all.extend(logits)

        loss = nn.CrossEntropyLoss(reduce=False)(logits, y_p_d).cpu().numpy()
        losses.extend(loss)

        correct = (logits.max(1)[1] == y_p_d).float().cpu().numpy()
        corrects.extend(correct)

    logits_all=t.stack(logits_all)
    logits=softmax(logits_all)
    sms = logits.max(1)[0]
    cali_vals=[(a,b.item()) for a,b in zip(corrects,sms)]
    return corrects, losses, cali_vals

In [11]:
# calculate loss and accuracy for periodic printout
def eval_classification(f, dload, device):
    corrects, losses, _ = eval_classification_inner(f,dload,device)
    loss = np.mean(losses)
    correct = np.mean(corrects)
    return correct, loss

In [12]:
#save the calibration data to a file
def save_calibration(filename,cali_vals):
    with open(filename,"w") as f:
        f.write("correct,softmax\n")
        for i in cali_vals:
            f.write("{},{}\n".format(i[0],i[1]))

In [13]:
#calculate loss and accuracy for calibration
def eval_with_calibration(f, dload, device):
    corrects, losses, cali_vals = eval_classification_inner(f,dload,device)
    loss = np.mean(losses)
    correct = np.mean(corrects)
    save_calibration(os.path.join(args.save_dir,f'cali_{ev}.csv'),cali_vals)
    return correct, loss

In [14]:
#Track loss for convergence
def loss_tracker(filename,epoch,loss,correct):
    if not os.path.isfile(os.path.join(args.save_dir,filename)):
        with open(os.path.join(args.save_dir,filename),'w') as of:
            of.write("Epoch,Loss,Acc\n")
            of.write("{},{},{}\n".format(epoch,loss,correct))
    else:
        with open(os.path.join(args.save_dir,filename),'a') as of:
            of.write("{},{},{}\n".format(epoch,loss,correct))

In [15]:
# save checkpoint data
def checkpoint(f, opt, buffer, epoch_no, tag, args, device):
    f.cpu()
    ckpt_dict = {
        "model_state_dict": f.state_dict(),
        'optimizer_state_dict': opt.state_dict(),
        'epoch': epoch_no,
        "replay_buffer": buffer
    }
    t.save(ckpt_dict, os.path.join(args.save_dir, tag))
    t.save(ckpt_dict, os.path.join(args.save_dir,'most_recent.pt'))
    f.to(device)

In [16]:
#Track loss for convergence
def loss_tracker(filename,save_dir,epoch,loss,correct):
    if not os.path.isfile(os.path.join(args.save_dir,filename)):
        with open(os.path.join(args.save_dir,filename),'w') as f:
            f.write("Epoch,Loss,Acc\n")
            f.write("{},{},{}\n".format(epoch,loss,correct))
    else:
        with open(os.path.join(args.save_dir,filename),'a') as f:
            f.write("{},{},{}\n".format(epoch,loss,correct))

In [17]:
#get the newest ckpt if not using the "most_recent.pt" file
def nat_keys(word):
    def atoi(c):
        return int(c) if c.isdigit() else c
    return [atoi(c) for c in re.split('(\d+)',word)]

def get_most_recent_ckpt(dir):
    ckpt=sorted([i for i in os.listdir(dir) if 'ckpt'==i[:4]],key=nat_keys)[-1]
    return os.path.join(dir,ckpt)

In [18]:
#This function adds or overwrites a file to the output dir named '0_readme.txt'
#That file contains what we were hoping to do with that experiment
def exp_purpose(words,filename='0_readme.txt'):
    with open(os.path.join(args.save_dir,filename),'w') as f:
        f.write(words)

In [19]:
def get_optimizer(args,f):
    params = f.class_output.parameters() if args.clf_only else f.parameters()
    if args.optimizer == "adam":
        optim = t.optim.Adam(params, lr=args.lr, betas=[.9, .999], weight_decay=args.weight_decay)
    else:
        optim = t.optim.SGD(params, lr=args.lr, momentum=.9, weight_decay=args.weight_decay)
    return optim

In [20]:
def set_up_experiment(args,seed):
    utils.makedirs(args.save_dir)
    with open(f'{args.save_dir}/params.txt', 'w') as f:
        json.dump(args.__dict__, f)
    if args.print_to_log:
        sys.stdout = open(f'{args.save_dir}/log.txt', 'w')

    t.manual_seed(seed)
    if t.cuda.is_available():
        t.cuda.manual_seed_all(seed)
        
    # store purpose of experiment
    exp_purpose("Get SGLD to train using the CCF model and simplest parameters.")

In [21]:
def iterify(var):
    if type(var)==str:
        return [var]
    try:
        iter(var)
    except:
        var=[var]
    return var

In [22]:
#I tested this, it does not need to return to update the optimizer
def decay_epoch(optim):
    for param_group in optim.param_groups:
        new_lr = param_group['lr'] * args.decay_rate
        param_group['lr'] = new_lr
    print("Decaying lr to {}".format(new_lr))

In [23]:
def warmup_epoch(optim,cur_iter):
    lr = args.lr * cur_iter / float(args.warmup_iters)
    for param_group in optim.param_groups:
        param_group['lr'] = lr

In [24]:
def x_ent(f,x_lab,y_lab,epoch,cur_iter):
    logits = f.classify(x_lab)
    l_p_y_given_x = nn.CrossEntropyLoss()(logits, y_lab)
    if cur_iter % args.print_every == 0:
        acc = (logits.max(1)[1] == y_lab).float().mean()
        print('P(y|x) {}:{:>d} loss={:>14.9f}, acc={:>14.9f}'.format(epoch,cur_iter,
                                                                     l_p_y_given_x.item(),acc.item()))
    return l_p_y_given_x

In [25]:
def not_paper(sample_q,f,replay_buffer,y_lab,x_lab):
    assert not args.uncond, "this objective can only be trained for class-conditional EBM DUUUUUUUUHHHH!!!"
    x_q_lab = sample_q(f, replay_buffer, y=y_lab)
    fp, fq = f(x_lab, y_lab).mean(), f(x_q_lab, y_lab).mean()
    l_p_x_y = -(fp - fq)
    if cur_iter % args.print_every == 0:
        print('P(x, y) | {}:{:>d} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f} d={:>14.9f}'.format(epoch, i, fp, fq,fp-fq))
    return l_p_x_y

In [26]:
# main function for training
# Uses args from class below
def main(args):
    ######################################################
    ###                                                ###
    ###               Closure functions                ###
    ###                                                ###
    ######################################################
    
    #Three functions for the evaluation.              
    def basic_eval(eval_func,dls,evs=None,with_tracker=False):
        f.eval()
        with t.no_grad():
            for ev,dl in zip(iterify(evs),iterify(dls)):
                print('ev: ',ev)
                correct, loss = eval_func(f, dload_test, device)    
                if with_tracker:
                    loss_tracker(f'track_{ev}.csv',args.save_dir,epoch,loss,correct)
        print(f"{ev}: Epoch {epoch}: Valid Loss {loss}, Valid Acc {correct}")
        f.train()
        return correct
    
    def eval_all_3(eval_func,with_tracker=False):
        evs=['test', 'train', 'valid']
        dls=[dload_test,dload_train,dload_valid]
        return basic_eval(eval_func,dls,evs,with_tracker)
    
    def update_best():
        print("Best Valid!: {}".format(correct))
        checkpoint(f, optim, replay_buffer, epoch, f'best_valid_ckpt.pt', args, device)
    
    #Loss options
    def sgld():
        if args.class_cond_p_x_sample:
            assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond"
            y_q = t.randint(0, args.n_classes, (args.batch_size,)).to(device)
            x_q = sample_q(f, replay_buffer, y=y_q)
        else:
            # get data generated by SGLD
            # In paper x_q_shape torch.Size([64, 3, 32, 32])
            # Batch rgb 32x32
            x_q = sample_q(f, replay_buffer)  # sample from log-sumexp
            #print("x_q_shape",x_q.shape)

        # calculate energy for training data
        fp_all = f(x_p_d)

        # calculate energy for SGLD generated sample
        fq_all = f(x_q)

        # get means
        fp = fp_all.mean()
        fq = fq_all.mean()

        # surrogate for the difference of expected value of \partial Energy/\partial x
        # and \partial Energy/\partial x
        # Need to maximize this, so preceded by minus
        l_p_x = -(fp - fq)
        if cur_iter % args.print_every == 0:
            print('P(x) | {}:{:>d} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f} d={:>14.9f}'.format(epoch, i, fp, fq,fp - fq))

        return l_p_x    
    
    #Two functions for the adaptive learning
    def retry_epoch():
        bad_epoch=epoch
        args.sgld_lr/=2
        args.load_path=os.path.join(args.save_dir,f'ckpt_{(epoch-1)}.pt')
        f, replay_buffer, _ = get_model_and_buffer(args, device, sample_q)
        print(f'Diverged: Using adaptive learning: ckpt_{(epoch-1)}.pt')
        print(f'New sgld_lr: {args.sgld_lr}')
    
    def restore_lr():
        args.sgld_lr=org_sgld_lr
        print("Adaptive learning over, restored original lrs.")
        
    #I just moved this code wholesale to get it out of my way
    def handle_plots():
        if cur_iter % 100 == 0:
            if args.plot_uncond:
                if args.class_cond_p_x_sample:
                    assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond"
                    y_q = t.randint(0, args.n_classes, (args.batch_size,)).to(device)
                    x_q = sample_q(f, replay_buffer, y=y_q)
                else:
                    x_q = sample_q(f, replay_buffer)
                plot('{}/x_q_{}_{:>06d}.png'.format(args.save_dir, epoch, i), x_q)
            if args.plot_cond:  # generate class-conditional samples
                y = t.arange(0, args.n_classes)[None].repeat(args.n_classes, 1).transpose(1, 0).contiguous().view(-1).to(device)
                x_q_y = sample_q(f, replay_buffer, y=y)
                plot('{}/x_q_y{}_{:>06d}.png'.format(args.save_dir, epoch, i), x_q_y)
      
    
    
    ######################################################
    ###                                                ###
    ###                  Start main                    ###
    ###                                                ###
    ######################################################
    
    
    set_up_experiment(args, seed)
    
    # datasets
    dload_train, dload_train_labeled, dload_valid, dload_test = get_data(args)

    # device
    device = t.device('cuda' if t.cuda.is_available() else 'cpu')

    # MODEL
    sample_q = get_sample_q(args, device)
    f, replay_buffer, epoch = get_model_and_buffer(args, device, sample_q)

    sqrt = lambda x: int(t.sqrt(t.Tensor([x])))
    plot = lambda p, x: tv.utils.save_image(t.clamp(x, -1, 1), p, normalize=True, nrow=sqrt(x.size(0)))

    # optimizer
    optim=get_optimizer(args,f)

    # Quick eval of imported model
    basic_eval(eval_classification,dload_valid,'valid')
    
    #Set variables for the while loop
    bad_epoch=-1
    best_valid_acc = 0.0
    cur_iter = 0
    epoch+=1
    final_epoch=args.n_epochs+epoch
    diverged=False
    org_lr=args.lr
    org_sgld_lr=args.sgld_lr
    
    # loop over epochs -> While loop so we can go back epochs
    while epoch<final_epoch:
        
        # decaying learning rate?
        if (epoch in args.decay_epochs) and (not diverged): decay_epoch(optim)
                       
        # loop over data in batches
        # x_p_d sample from dataset
        for i, (x_p_d, _) in tqdm(enumerate(dload_train)):
            # scale up lr to full over warmup time
            if cur_iter <= args.warmup_iters: warmup_epoch(optim,cur_iter)
                
            x_p_d = x_p_d.to(device)
            x_lab, y_lab = dload_train_labeled.__next__()
            x_lab, y_lab = x_lab.to(device), y_lab.to(device)

            # initialize loss
            L = 0.
            
            # this maximizes log p(x) using SGLD
            if args.p_x_weight > 0:  # maximize log p(x)
                l_p_x = sgld()                                                                                  
                # add to loss
                L += args.p_x_weight * l_p_x
                
            # No SGLD energy
            if args.new_energy > 0:
                logits = f.classify(x_lab)

                ####################################################
                # Maximize entropy by assuming equal probabilities #
                ####################################################
                energy = logits.logsumexp(dim=1, keepdim=False) #.cpu().detach().numpy()

                e_mean = t.mean(energy)
                #print('Energy shape',energy.size())

                energy_loss = t.sum((e_mean - energy)**2)
                L += energy_loss

            # normal cross entropy loss function
            if args.p_y_given_x_weight > 0:  # maximize log p(y | x)
                l_p_y_given_x=x_ent(f,x_lab,y_lab,epoch,cur_iter)
                # add to loss
                L += args.p_y_given_x_weight * l_p_y_given_x
            
            #The code not for the paper
            if args.p_x_y_weight > 0:  # maximize log p(x, y)
                l_p_x_y = not_paper(sample_q,f,replay_buffer,y_lab,x_lab)
                # add to loss
                L += args.p_x_y_weight * l_p_x_y

            # Handle Loss divergence
            if L.abs().item() > 1e8:
                retry_epoch()
                diverged=True
                break
            
            # Optimize network using our loss function L
            optim.zero_grad()            
            L.backward()
            optim.step()
            cur_iter += 1

            # Plot outputs
            handle_plots()

        ####### END FOR LOOP 
        
        # restore after bad epoch
        if diverged and epoch>bad_epoch:
            restore_lr()
            diverged=False
            
        # If it diverged, then skip the evaluation and don't increment epoch
        if not diverged:        
            # Checkpoint
            if epoch % args.ckpt_every == 0: 
                checkpoint(f, optim, replay_buffer, epoch, f'ckpt_{epoch}.pt', args, device)

            # Performance assesment 
            if epoch % args.eval_every == 0 and (args.p_y_given_x_weight > 0 or args.p_x_y_weight > 0):
                correct = eval_all_3(eval_classification,with_tracker=True)
                if correct > best_valid_acc: 
                    best_valid_acc = correct
                    update_best()
 
            epoch+=1

    ####### END WHILE LOOP

In [27]:
# Setup parameters
# defaults for paper
# --lr .0001 --dataset cifar10 --optimizer adam --p_x_weight 1.0 --p_y_given_x_weight 1.0 
# --p_x_y_weight 0.0 --sigma .03 --width 10 --depth 28 --save_dir /YOUR/SAVE/DIR 
# --plot_uncond --warmup_iters 1000
#
# Regression
# {"dataset": "cifar10", "data_root": "../data", "lr": 0.0001, "decay_epochs": [160, 180], 
# "decay_rate": 0.3, "clf_only": false, "labels_per_class": -1, "optimizer": "adam", 
# "batch_size": 64, "n_epochs": 200, "warmup_iters": 1000, "p_x_weight": 1.0, 
# "p_y_given_x_weight": 1.0, "p_x_y_weight": 0.0, "dropout_rate": 0.0, "sigma": 0.03, 
# "weight_decay": 0.0, "norm": null, "n_steps": 20, "width": 10, "depth": 28, "uncond": false, 
# "class_cond_p_x_sample": false, "buffer_size": 10000, "reinit_freq": 0.05, "sgld_lr": 1.0, 
# "sgld_std": 0.01, "save_dir": "./savedir", "ckpt_every": 10, "eval_every": 1, 
# "print_every": 100, "load_path": null, "print_to_log": false, "plot_cond": false, 
#"plot_uncond": true, "n_valid": 5000, "n_classes": 10}
class train_args():
    def __init__(self, param_dict):
        # set defaults
        self.dataset = "cifar10" #, choices=["cifar10", "svhn", "cifar100"])
        self.n_classes = 100 if self.dataset == "cifar100" else 10
        self.data_root = "../data" 
        # optimization
        self.lr = 1e-4
        self.decay_epochs = [160, 180] # help="decay learning rate by decay_rate at these epochs")
        self.decay_rate = .3 # help="learning rate decay multiplier")
        self.clf_only = False #action="store_true", help="If set, then only train the classifier")
        self.labels_per_class = -1# help="number of labeled examples per class, if zero then use all labels")
        self.optimizer = "adam" #choices=["adam", "sgd"], default="adam")
        self.batch_size = 64
        self.n_epochs = 200
        self.warmup_iters = -1 # help="number of iters to linearly increase learning rate, if -1 then no warmmup")
        # loss weighting
        self.p_x_weight = 1.
        self.p_y_given_x_weight = 1.
        self.p_x_y_weight = 0.
        # regularization
        self.dropout_rate = 0.0
        self.sigma = 3e-2 # help="stddev of gaussian noise to add to input, .03 works but .1 is more stable")
        self.weight_decay = 0.0
        # network
        self.norm = None # choices=[None, "norm", "batch", "instance", "layer", "act"], help="norm to add to weights, none works fine")
        # EBM specific
        self.n_steps = 20 # help="number of steps of SGLD per iteration, 100 works for short-run, 20 works for PCD")
        self.width = 10 # help="WRN width parameter")
        self.depth = 28 # help="WRN depth parameter")
        self.uncond = False # "store_true" # help="If set, then the EBM is unconditional")
        self.class_cond_p_x_sample = False #, action="store_true", help="If set we sample from p(y)p(x|y), othewise sample from p(x)," "Sample quality higher if set, but classification accuracy better if not.")
        self.buffer_size = 10000
        self.reinit_freq = .05
        self.sgld_lr = 1.0
        self.sgld_std = 1e-2
        # logging + evaluation
        self.save_dir = './experiment'
        self.ckpt_every = 10 # help="Epochs between checkpoint save")
        self.eval_every = 1 # help="Epochs between evaluation")
        self.print_every = 100 # help="Iterations between print")
        self.load_path = None # path for checkpoint to load
        self.print_to_log = False #", action="store_true", help="If true, directs std-out to log file")
        self.plot_cond = False #", action="store_true", help="If set, save class-conditional samples")
        self.plot_uncond = False #", action="store_true", help="If set, save unconditional samples")
        self.n_valid = 5000
        self.new_energy = 0
        
        # set from inline dict
        for key in param_dict:
            #print(key, '->', param_dict[key])
            setattr(self, key, param_dict[key])

In [28]:
# setup change from defaults
inline_parms = {"lr": .0001, "dataset": "cifar100", "optimizer": "adam", 
                "save_dir": './cf100test', \
                "p_x_weight": 0, "p_y_given_x_weight": 1.0, "p_x_y_weight": 0.0, \
                "new_energy": 1.0, \
                "sigma": .03, "width": 10, "depth": 28, "plot_uncond": False, \
                "uncond": False, "decay_epochs": [], \
                "ckpt_every": 10, \
                 "n_epochs": 150} 

# instantiate
args = train_args(inline_parms)
args.n_classes = 100 if args.dataset == "cifar100" else 10

# run
main(args)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
| Wide-Resnet 28x10
ev:  valid




valid: Epoch -1: Valid Loss 4.6053972244262695, Valid Acc 0.012400000356137753


0it [00:00, ?it/s]

P(y|x) 0:0 loss=   4.599646091, acc=   0.031250000


101it [00:26,  3.95it/s]

P(y|x) 0:100 loss=   4.484840393, acc=   0.000000000


201it [00:52,  3.93it/s]

P(y|x) 0:200 loss=   4.304376125, acc=   0.031250000


301it [01:17,  3.93it/s]

P(y|x) 0:300 loss=   4.292606354, acc=   0.046875000


401it [01:43,  3.91it/s]

P(y|x) 0:400 loss=   4.167390347, acc=   0.078125000


501it [02:08,  3.89it/s]

P(y|x) 0:500 loss=   4.033817291, acc=   0.093750000


601it [02:34,  3.89it/s]

P(y|x) 0:600 loss=   4.154963017, acc=   0.093750000


701it [03:00,  3.89it/s]

P(y|x) 0:700 loss=   3.935567617, acc=   0.078125000


703it [03:00,  3.89it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 0: Valid Loss 3.9539413452148438, Valid Acc 0.09059999883174896
Best Valid!: 0.09059999883174896


98it [00:25,  3.89it/s]

P(y|x) 1:800 loss=   4.045700550, acc=   0.062500000


198it [00:51,  3.87it/s]

P(y|x) 1:900 loss=   3.925425053, acc=   0.109375000


298it [01:17,  3.87it/s]

P(y|x) 1:1000 loss=   3.817583084, acc=   0.109375000


398it [01:43,  3.86it/s]

P(y|x) 1:1100 loss=   3.731023550, acc=   0.156250000


498it [02:08,  3.86it/s]

P(y|x) 1:1200 loss=   3.687613964, acc=   0.125000000


598it [02:34,  3.86it/s]

P(y|x) 1:1300 loss=   3.770144939, acc=   0.093750000


698it [03:00,  3.85it/s]

P(y|x) 1:1400 loss=   3.490277767, acc=   0.203125000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 1: Valid Loss 3.639521598815918, Valid Acc 0.15240000188350677
Best Valid!: 0.15240000188350677


95it [00:25,  3.87it/s]

P(y|x) 2:1500 loss=   3.692127943, acc=   0.078125000


195it [00:50,  3.86it/s]

P(y|x) 2:1600 loss=   3.606184721, acc=   0.140625000


295it [01:16,  3.86it/s]

P(y|x) 2:1700 loss=   3.472673655, acc=   0.187500000


395it [01:42,  3.86it/s]

P(y|x) 2:1800 loss=   3.465282440, acc=   0.171875000


495it [02:08,  3.87it/s]

P(y|x) 2:1900 loss=   3.266919613, acc=   0.234375000


595it [02:34,  3.86it/s]

P(y|x) 2:2000 loss=   3.328949690, acc=   0.234375000


695it [03:00,  3.87it/s]

P(y|x) 2:2100 loss=   3.131244183, acc=   0.281250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 2: Valid Loss 3.2104876041412354, Valid Acc 0.23430000245571136
Best Valid!: 0.23430000245571136


92it [00:24,  3.88it/s]

P(y|x) 3:2200 loss=   2.938931465, acc=   0.359375000


192it [00:50,  3.87it/s]

P(y|x) 3:2300 loss=   2.946305752, acc=   0.328125000


292it [01:15,  3.87it/s]

P(y|x) 3:2400 loss=   3.251701355, acc=   0.171875000


392it [01:41,  3.87it/s]

P(y|x) 3:2500 loss=   3.092939615, acc=   0.281250000


492it [02:07,  3.87it/s]

P(y|x) 3:2600 loss=   2.980464697, acc=   0.296875000


592it [02:33,  3.87it/s]

P(y|x) 3:2700 loss=   3.063040257, acc=   0.218750000


692it [02:59,  3.87it/s]

P(y|x) 3:2800 loss=   3.002085447, acc=   0.281250000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 3: Valid Loss 2.9307057857513428, Valid Acc 0.27880001068115234
Best Valid!: 0.27880001068115234


89it [00:23,  3.87it/s]

P(y|x) 4:2900 loss=   2.834283113, acc=   0.250000000


189it [00:49,  3.86it/s]

P(y|x) 4:3000 loss=   2.723660469, acc=   0.359375000


289it [01:15,  3.86it/s]

P(y|x) 4:3100 loss=   3.010651350, acc=   0.281250000


389it [01:40,  3.87it/s]

P(y|x) 4:3200 loss=   2.743125916, acc=   0.328125000


489it [02:06,  3.87it/s]

P(y|x) 4:3300 loss=   2.727685690, acc=   0.312500000


589it [02:32,  3.87it/s]

P(y|x) 4:3400 loss=   2.901103497, acc=   0.281250000


689it [02:58,  3.86it/s]

P(y|x) 4:3500 loss=   2.700746298, acc=   0.328125000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 4: Valid Loss 2.8233964443206787, Valid Acc 0.3133000135421753
Best Valid!: 0.3133000135421753


86it [00:22,  3.86it/s]

P(y|x) 5:3600 loss=   2.921193838, acc=   0.250000000


186it [00:48,  3.86it/s]

P(y|x) 5:3700 loss=   2.694098234, acc=   0.328125000


286it [01:14,  3.86it/s]

P(y|x) 5:3800 loss=   2.997656345, acc=   0.343750000


386it [01:40,  3.85it/s]

P(y|x) 5:3900 loss=   2.649906397, acc=   0.281250000


486it [02:06,  3.85it/s]

P(y|x) 5:4000 loss=   2.933452129, acc=   0.312500000


586it [02:32,  3.86it/s]

P(y|x) 5:4100 loss=   2.858906269, acc=   0.281250000


686it [02:57,  3.86it/s]

P(y|x) 5:4200 loss=   2.760485411, acc=   0.328125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 5: Valid Loss 2.6234214305877686, Valid Acc 0.3605000078678131
Best Valid!: 0.3605000078678131


83it [00:21,  3.85it/s]

P(y|x) 6:4300 loss=   2.587633610, acc=   0.359375000


183it [00:47,  3.86it/s]

P(y|x) 6:4400 loss=   2.650614262, acc=   0.390625000


283it [01:13,  3.85it/s]

P(y|x) 6:4500 loss=   2.668899775, acc=   0.296875000


383it [01:39,  3.85it/s]

P(y|x) 6:4600 loss=   2.612883568, acc=   0.375000000


483it [02:05,  3.86it/s]

P(y|x) 6:4700 loss=   2.428915024, acc=   0.453125000


583it [02:31,  3.85it/s]

P(y|x) 6:4800 loss=   2.447144747, acc=   0.453125000


683it [02:57,  3.85it/s]

P(y|x) 6:4900 loss=   2.232503653, acc=   0.468750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 6: Valid Loss 2.4784762859344482, Valid Acc 0.397599995136261
Best Valid!: 0.397599995136261


80it [00:21,  3.86it/s]

P(y|x) 7:5000 loss=   2.437766075, acc=   0.437500000


180it [00:47,  3.84it/s]

P(y|x) 7:5100 loss=   2.468589783, acc=   0.468750000


280it [01:12,  3.86it/s]

P(y|x) 7:5200 loss=   2.476087809, acc=   0.468750000


380it [01:38,  3.84it/s]

P(y|x) 7:5300 loss=   2.555784702, acc=   0.421875000


480it [02:04,  3.85it/s]

P(y|x) 7:5400 loss=   2.361431837, acc=   0.500000000


580it [02:30,  3.86it/s]

P(y|x) 7:5500 loss=   2.336679459, acc=   0.437500000


680it [02:56,  3.85it/s]

P(y|x) 7:5600 loss=   2.319407463, acc=   0.484375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 7: Valid Loss 2.424191474914551, Valid Acc 0.40549999475479126
Best Valid!: 0.40549999475479126


77it [00:20,  3.86it/s]

P(y|x) 8:5700 loss=   2.525135279, acc=   0.359375000


177it [00:46,  3.85it/s]

P(y|x) 8:5800 loss=   2.531471014, acc=   0.296875000


277it [01:12,  3.87it/s]

P(y|x) 8:5900 loss=   2.426172733, acc=   0.406250000


377it [01:37,  3.86it/s]

P(y|x) 8:6000 loss=   2.187738657, acc=   0.500000000


477it [02:03,  3.86it/s]

P(y|x) 8:6100 loss=   2.168245077, acc=   0.515625000


577it [02:29,  3.87it/s]

P(y|x) 8:6200 loss=   2.215174675, acc=   0.437500000


677it [02:55,  3.87it/s]

P(y|x) 8:6300 loss=   2.258200884, acc=   0.343750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 8: Valid Loss 2.3409483432769775, Valid Acc 0.435699999332428
Best Valid!: 0.435699999332428


74it [00:19,  3.86it/s]

P(y|x) 9:6400 loss=   2.317859650, acc=   0.343750000


174it [00:45,  3.86it/s]

P(y|x) 9:6500 loss=   2.666694164, acc=   0.343750000


274it [01:11,  3.86it/s]

P(y|x) 9:6600 loss=   2.256363630, acc=   0.437500000


374it [01:37,  3.86it/s]

P(y|x) 9:6700 loss=   2.445895910, acc=   0.437500000


474it [02:03,  3.86it/s]

P(y|x) 9:6800 loss=   2.382975817, acc=   0.421875000


574it [02:29,  3.86it/s]

P(y|x) 9:6900 loss=   2.168658257, acc=   0.484375000


674it [02:54,  3.85it/s]

P(y|x) 9:7000 loss=   2.201116800, acc=   0.453125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 9: Valid Loss 2.3178632259368896, Valid Acc 0.44839999079704285
Best Valid!: 0.44839999079704285


71it [00:18,  3.87it/s]

P(y|x) 10:7100 loss=   2.102508068, acc=   0.484375000


171it [00:44,  3.87it/s]

P(y|x) 10:7200 loss=   2.263842821, acc=   0.406250000


271it [01:10,  3.86it/s]

P(y|x) 10:7300 loss=   2.058984995, acc=   0.546875000


371it [01:36,  3.86it/s]

P(y|x) 10:7400 loss=   1.977419853, acc=   0.468750000


471it [02:02,  3.87it/s]

P(y|x) 10:7500 loss=   2.169488192, acc=   0.390625000


571it [02:28,  3.87it/s]

P(y|x) 10:7600 loss=   2.269270420, acc=   0.406250000


671it [02:53,  3.87it/s]

P(y|x) 10:7700 loss=   2.205634594, acc=   0.406250000


703it [03:02,  3.86it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 10: Valid Loss 2.139014959335327, Valid Acc 0.4814000129699707
Best Valid!: 0.4814000129699707


68it [00:17,  3.86it/s]

P(y|x) 11:7800 loss=   1.999369860, acc=   0.484375000


168it [00:43,  3.87it/s]

P(y|x) 11:7900 loss=   1.966028452, acc=   0.500000000


268it [01:09,  3.86it/s]

P(y|x) 11:8000 loss=   2.219027042, acc=   0.437500000


368it [01:35,  3.86it/s]

P(y|x) 11:8100 loss=   2.071799040, acc=   0.531250000


468it [02:01,  3.87it/s]

P(y|x) 11:8200 loss=   2.214517593, acc=   0.500000000


568it [02:27,  3.87it/s]

P(y|x) 11:8300 loss=   2.057438612, acc=   0.546875000


668it [02:53,  3.86it/s]

P(y|x) 11:8400 loss=   2.350553989, acc=   0.453125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 11: Valid Loss 2.1178650856018066, Valid Acc 0.47620001435279846


65it [00:17,  3.84it/s]

P(y|x) 12:8500 loss=   2.268051863, acc=   0.484375000


165it [00:43,  3.85it/s]

P(y|x) 12:8600 loss=   2.044133186, acc=   0.562500000


265it [01:09,  3.86it/s]

P(y|x) 12:8700 loss=   2.069287062, acc=   0.468750000


365it [01:35,  3.86it/s]

P(y|x) 12:8800 loss=   1.873128176, acc=   0.640625000


465it [02:01,  3.86it/s]

P(y|x) 12:8900 loss=   2.201498985, acc=   0.453125000


565it [02:26,  3.86it/s]

P(y|x) 12:9000 loss=   2.052884340, acc=   0.515625000


665it [02:52,  3.86it/s]

P(y|x) 12:9100 loss=   2.112526894, acc=   0.515625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 12: Valid Loss 2.1044700145721436, Valid Acc 0.48669999837875366
Best Valid!: 0.48669999837875366


62it [00:16,  3.87it/s]

P(y|x) 13:9200 loss=   1.861656785, acc=   0.593750000


162it [00:42,  3.86it/s]

P(y|x) 13:9300 loss=   2.007452488, acc=   0.578125000


262it [01:08,  3.86it/s]

P(y|x) 13:9400 loss=   1.956529975, acc=   0.484375000


362it [01:34,  3.86it/s]

P(y|x) 13:9500 loss=   1.837924719, acc=   0.531250000


462it [02:00,  3.85it/s]

P(y|x) 13:9600 loss=   1.961441636, acc=   0.437500000


562it [02:26,  3.86it/s]

P(y|x) 13:9700 loss=   1.929265857, acc=   0.500000000


662it [02:51,  3.87it/s]

P(y|x) 13:9800 loss=   1.908538222, acc=   0.593750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 13: Valid Loss 2.015876054763794, Valid Acc 0.5180000066757202
Best Valid!: 0.5180000066757202


59it [00:15,  3.86it/s]

P(y|x) 14:9900 loss=   1.841628194, acc=   0.625000000


159it [00:41,  3.87it/s]

P(y|x) 14:10000 loss=   2.214739799, acc=   0.421875000


259it [01:07,  3.85it/s]

P(y|x) 14:10100 loss=   1.937313795, acc=   0.453125000


359it [01:33,  3.86it/s]

P(y|x) 14:10200 loss=   2.016310692, acc=   0.453125000


459it [01:59,  3.86it/s]

P(y|x) 14:10300 loss=   2.063193798, acc=   0.515625000


559it [02:25,  3.87it/s]

P(y|x) 14:10400 loss=   1.751492858, acc=   0.609375000


659it [02:51,  3.86it/s]

P(y|x) 14:10500 loss=   2.192257404, acc=   0.453125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 14: Valid Loss 1.9358015060424805, Valid Acc 0.5307999849319458
Best Valid!: 0.5307999849319458


56it [00:15,  3.86it/s]

P(y|x) 15:10600 loss=   1.940161347, acc=   0.546875000


156it [00:40,  3.86it/s]

P(y|x) 15:10700 loss=   2.069231510, acc=   0.437500000


256it [01:06,  3.87it/s]

P(y|x) 15:10800 loss=   2.083495855, acc=   0.468750000


356it [01:32,  3.87it/s]

P(y|x) 15:10900 loss=   2.087997913, acc=   0.484375000


456it [01:58,  3.87it/s]

P(y|x) 15:11000 loss=   2.103842020, acc=   0.375000000


556it [02:24,  3.87it/s]

P(y|x) 15:11100 loss=   1.971638560, acc=   0.484375000


656it [02:50,  3.87it/s]

P(y|x) 15:11200 loss=   1.841265678, acc=   0.625000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 15: Valid Loss 1.930056095123291, Valid Acc 0.5231000185012817


53it [00:14,  3.86it/s]

P(y|x) 16:11300 loss=   1.911155939, acc=   0.468750000


153it [00:39,  3.87it/s]

P(y|x) 16:11400 loss=   1.661454797, acc=   0.531250000


253it [01:05,  3.86it/s]

P(y|x) 16:11500 loss=   1.759185553, acc=   0.500000000


353it [01:31,  3.87it/s]

P(y|x) 16:11600 loss=   1.793286562, acc=   0.515625000


453it [01:57,  3.85it/s]

P(y|x) 16:11700 loss=   1.896799088, acc=   0.593750000


553it [02:23,  3.87it/s]

P(y|x) 16:11800 loss=   1.721570969, acc=   0.703125000


653it [02:49,  3.86it/s]

P(y|x) 16:11900 loss=   1.835356236, acc=   0.609375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 16: Valid Loss 1.8562158346176147, Valid Acc 0.5406000018119812
Best Valid!: 0.5406000018119812


50it [00:13,  3.86it/s]

P(y|x) 17:12000 loss=   1.536382318, acc=   0.609375000


150it [00:39,  3.85it/s]

P(y|x) 17:12100 loss=   1.722128868, acc=   0.562500000


250it [01:05,  3.86it/s]

P(y|x) 17:12200 loss=   1.759774566, acc=   0.578125000


350it [01:31,  3.86it/s]

P(y|x) 17:12300 loss=   1.837338209, acc=   0.531250000


450it [01:57,  3.85it/s]

P(y|x) 17:12400 loss=   1.781353951, acc=   0.578125000


550it [02:22,  3.86it/s]

P(y|x) 17:12500 loss=   1.975055695, acc=   0.515625000


650it [02:48,  3.87it/s]

P(y|x) 17:12600 loss=   1.664369941, acc=   0.640625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 17: Valid Loss 1.8262107372283936, Valid Acc 0.552299976348877
Best Valid!: 0.552299976348877


47it [00:12,  3.86it/s]

P(y|x) 18:12700 loss=   1.887119293, acc=   0.531250000


147it [00:38,  3.86it/s]

P(y|x) 18:12800 loss=   1.657486200, acc=   0.500000000


247it [01:04,  3.85it/s]

P(y|x) 18:12900 loss=   1.668317080, acc=   0.562500000


347it [01:30,  3.86it/s]

P(y|x) 18:13000 loss=   1.828679085, acc=   0.609375000


447it [01:56,  3.86it/s]

P(y|x) 18:13100 loss=   1.907768369, acc=   0.578125000


547it [02:22,  3.86it/s]

P(y|x) 18:13200 loss=   1.809144020, acc=   0.578125000


647it [02:48,  3.85it/s]

P(y|x) 18:13300 loss=   1.756117344, acc=   0.578125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 18: Valid Loss 1.8146023750305176, Valid Acc 0.5612999796867371
Best Valid!: 0.5612999796867371


44it [00:11,  3.84it/s]

P(y|x) 19:13400 loss=   1.343904376, acc=   0.687500000


144it [00:37,  3.87it/s]

P(y|x) 19:13500 loss=   1.802242041, acc=   0.578125000


244it [01:03,  3.86it/s]

P(y|x) 19:13600 loss=   1.582767844, acc=   0.609375000


344it [01:29,  3.86it/s]

P(y|x) 19:13700 loss=   2.016669273, acc=   0.500000000


444it [01:55,  3.87it/s]

P(y|x) 19:13800 loss=   1.525191426, acc=   0.671875000


544it [02:21,  3.86it/s]

P(y|x) 19:13900 loss=   1.792425632, acc=   0.500000000


644it [02:46,  3.87it/s]

P(y|x) 19:14000 loss=   1.823049068, acc=   0.484375000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 19: Valid Loss 1.777417778968811, Valid Acc 0.5586000084877014


41it [00:11,  3.85it/s]

P(y|x) 20:14100 loss=   1.779782772, acc=   0.562500000


141it [00:37,  3.84it/s]

P(y|x) 20:14200 loss=   1.677129507, acc=   0.609375000


241it [01:03,  3.86it/s]

P(y|x) 20:14300 loss=   1.475612521, acc=   0.703125000


341it [01:28,  3.86it/s]

P(y|x) 20:14400 loss=   1.468178988, acc=   0.687500000


441it [01:54,  3.86it/s]

P(y|x) 20:14500 loss=   1.568040133, acc=   0.656250000


541it [02:20,  3.86it/s]

P(y|x) 20:14600 loss=   1.516267657, acc=   0.625000000


641it [02:46,  3.87it/s]

P(y|x) 20:14700 loss=   1.853888869, acc=   0.531250000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 20: Valid Loss 1.753759741783142, Valid Acc 0.5758000016212463
Best Valid!: 0.5758000016212463


38it [00:10,  3.86it/s]

P(y|x) 21:14800 loss=   1.543182969, acc=   0.656250000


138it [00:36,  3.85it/s]

P(y|x) 21:14900 loss=   1.442690611, acc=   0.593750000


238it [01:02,  3.86it/s]

P(y|x) 21:15000 loss=   1.700911999, acc=   0.562500000


338it [01:28,  3.86it/s]

P(y|x) 21:15100 loss=   1.751361251, acc=   0.609375000


438it [01:53,  3.86it/s]

P(y|x) 21:15200 loss=   1.746355057, acc=   0.578125000


538it [02:19,  3.86it/s]

P(y|x) 21:15300 loss=   1.793790936, acc=   0.546875000


638it [02:45,  3.86it/s]

P(y|x) 21:15400 loss=   1.593014240, acc=   0.593750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 21: Valid Loss 1.7009152173995972, Valid Acc 0.5892000198364258
Best Valid!: 0.5892000198364258


35it [00:09,  3.86it/s]

P(y|x) 22:15500 loss=   1.632041216, acc=   0.640625000


135it [00:35,  3.86it/s]

P(y|x) 22:15600 loss=   1.561617851, acc=   0.625000000


235it [01:01,  3.85it/s]

P(y|x) 22:15700 loss=   1.579899192, acc=   0.734375000


335it [01:27,  3.86it/s]

P(y|x) 22:15800 loss=   1.542582512, acc=   0.671875000


435it [01:53,  3.86it/s]

P(y|x) 22:15900 loss=   1.558224678, acc=   0.625000000


535it [02:19,  3.86it/s]

P(y|x) 22:16000 loss=   1.431404948, acc=   0.703125000


635it [02:45,  3.86it/s]

P(y|x) 22:16100 loss=   1.478892326, acc=   0.640625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 22: Valid Loss 1.7151812314987183, Valid Acc 0.5763000249862671


32it [00:08,  3.86it/s]

P(y|x) 23:16200 loss=   1.405479908, acc=   0.718750000


132it [00:34,  3.87it/s]

P(y|x) 23:16300 loss=   1.590447903, acc=   0.687500000


232it [01:00,  3.86it/s]

P(y|x) 23:16400 loss=   1.508146286, acc=   0.640625000


332it [01:26,  3.85it/s]

P(y|x) 23:16500 loss=   1.792066216, acc=   0.437500000


432it [01:52,  3.85it/s]

P(y|x) 23:16600 loss=   1.629430652, acc=   0.609375000


532it [02:18,  3.85it/s]

P(y|x) 23:16700 loss=   1.657998681, acc=   0.562500000


632it [02:44,  3.85it/s]

P(y|x) 23:16800 loss=   1.650426269, acc=   0.609375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 23: Valid Loss 1.6447854042053223, Valid Acc 0.6011999845504761
Best Valid!: 0.6011999845504761


29it [00:07,  3.86it/s]

P(y|x) 24:16900 loss=   1.283354759, acc=   0.687500000


129it [00:33,  3.85it/s]

P(y|x) 24:17000 loss=   1.460373163, acc=   0.687500000


229it [00:59,  3.85it/s]

P(y|x) 24:17100 loss=   1.370454073, acc=   0.625000000


329it [01:25,  3.85it/s]

P(y|x) 24:17200 loss=   1.474144697, acc=   0.671875000


429it [01:51,  3.86it/s]

P(y|x) 24:17300 loss=   1.414974570, acc=   0.640625000


529it [02:17,  3.85it/s]

P(y|x) 24:17400 loss=   1.352930784, acc=   0.734375000


629it [02:43,  3.86it/s]

P(y|x) 24:17500 loss=   1.570530295, acc=   0.593750000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 24: Valid Loss 1.645080327987671, Valid Acc 0.6068999767303467
Best Valid!: 0.6068999767303467


26it [00:07,  3.86it/s]

P(y|x) 25:17600 loss=   1.329123974, acc=   0.750000000


126it [00:33,  3.87it/s]

P(y|x) 25:17700 loss=   1.327937365, acc=   0.781250000


226it [00:58,  3.85it/s]

P(y|x) 25:17800 loss=   1.515450835, acc=   0.656250000


326it [01:24,  3.86it/s]

P(y|x) 25:17900 loss=   1.275386572, acc=   0.687500000


426it [01:50,  3.85it/s]

P(y|x) 25:18000 loss=   1.348699808, acc=   0.671875000


526it [02:16,  3.85it/s]

P(y|x) 25:18100 loss=   1.454783440, acc=   0.640625000


626it [02:42,  3.85it/s]

P(y|x) 25:18200 loss=   1.581269979, acc=   0.625000000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 25: Valid Loss 1.6137346029281616, Valid Acc 0.6100000143051147
Best Valid!: 0.6100000143051147


23it [00:06,  3.88it/s]

P(y|x) 26:18300 loss=   1.480441093, acc=   0.640625000


123it [00:32,  3.85it/s]

P(y|x) 26:18400 loss=   1.433011770, acc=   0.656250000


223it [00:58,  3.86it/s]

P(y|x) 26:18500 loss=   1.446976662, acc=   0.656250000


323it [01:24,  3.86it/s]

P(y|x) 26:18600 loss=   1.272966266, acc=   0.750000000


423it [01:50,  3.85it/s]

P(y|x) 26:18700 loss=   1.448083639, acc=   0.625000000


523it [02:16,  3.84it/s]

P(y|x) 26:18800 loss=   1.481725216, acc=   0.593750000


623it [02:41,  3.84it/s]

P(y|x) 26:18900 loss=   1.380092025, acc=   0.640625000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 26: Valid Loss 1.5731242895126343, Valid Acc 0.6051999926567078


20it [00:05,  3.85it/s]

P(y|x) 27:19000 loss=   1.335165501, acc=   0.703125000


120it [00:31,  3.83it/s]

P(y|x) 27:19100 loss=   1.400745392, acc=   0.609375000


220it [00:57,  3.86it/s]

P(y|x) 27:19200 loss=   1.386926889, acc=   0.671875000


320it [01:23,  3.85it/s]

P(y|x) 27:19300 loss=   1.435583472, acc=   0.703125000


420it [01:49,  3.85it/s]

P(y|x) 27:19400 loss=   1.493075252, acc=   0.671875000


520it [02:15,  3.85it/s]

P(y|x) 27:19500 loss=   1.218312860, acc=   0.796875000


620it [02:41,  3.85it/s]

P(y|x) 27:19600 loss=   1.348308444, acc=   0.734375000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 27: Valid Loss 1.6273465156555176, Valid Acc 0.6026999950408936


17it [00:04,  3.83it/s]

P(y|x) 28:19700 loss=   1.351013541, acc=   0.765625000


117it [00:30,  3.86it/s]

P(y|x) 28:19800 loss=   1.510923862, acc=   0.687500000


217it [00:56,  3.85it/s]

P(y|x) 28:19900 loss=   1.237861037, acc=   0.703125000


317it [01:22,  3.87it/s]

P(y|x) 28:20000 loss=   1.054390073, acc=   0.796875000


417it [01:48,  3.86it/s]

P(y|x) 28:20100 loss=   1.561848998, acc=   0.656250000


517it [02:14,  3.87it/s]

P(y|x) 28:20200 loss=   1.279292941, acc=   0.718750000


617it [02:40,  3.87it/s]

P(y|x) 28:20300 loss=   1.178907156, acc=   0.750000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 28: Valid Loss 1.6097978353500366, Valid Acc 0.5946000218391418


14it [00:04,  3.83it/s]

P(y|x) 29:20400 loss=   1.434704900, acc=   0.640625000


114it [00:29,  3.84it/s]

P(y|x) 29:20500 loss=   1.167073250, acc=   0.718750000


214it [00:55,  3.86it/s]

P(y|x) 29:20600 loss=   1.251175404, acc=   0.750000000


314it [01:21,  3.85it/s]

P(y|x) 29:20700 loss=   1.557582974, acc=   0.640625000


414it [01:47,  3.85it/s]

P(y|x) 29:20800 loss=   1.282021523, acc=   0.718750000


514it [02:13,  3.85it/s]

P(y|x) 29:20900 loss=   1.228571177, acc=   0.765625000


614it [02:39,  3.84it/s]

P(y|x) 29:21000 loss=   1.165889025, acc=   0.718750000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 29: Valid Loss 1.5171403884887695, Valid Acc 0.6243000030517578
Best Valid!: 0.6243000030517578


11it [00:03,  3.82it/s]

P(y|x) 30:21100 loss=   1.251165748, acc=   0.750000000


111it [00:29,  3.87it/s]

P(y|x) 30:21200 loss=   1.222701311, acc=   0.718750000


211it [00:55,  3.87it/s]

P(y|x) 30:21300 loss=   1.323420405, acc=   0.671875000


311it [01:20,  3.87it/s]

P(y|x) 30:21400 loss=   1.150354147, acc=   0.765625000


411it [01:46,  3.87it/s]

P(y|x) 30:21500 loss=   1.382223129, acc=   0.687500000


511it [02:12,  3.87it/s]

P(y|x) 30:21600 loss=   1.178226113, acc=   0.656250000


611it [02:38,  3.87it/s]

P(y|x) 30:21700 loss=   1.317034006, acc=   0.625000000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 30: Valid Loss 1.5293173789978027, Valid Acc 0.623199999332428


8it [00:02,  3.73it/s]

P(y|x) 31:21800 loss=   1.351155639, acc=   0.671875000


108it [00:28,  3.86it/s]

P(y|x) 31:21900 loss=   1.246185660, acc=   0.718750000


208it [00:54,  3.85it/s]

P(y|x) 31:22000 loss=   1.216895103, acc=   0.765625000


308it [01:20,  3.86it/s]

P(y|x) 31:22100 loss=   1.584110141, acc=   0.562500000


408it [01:46,  3.86it/s]

P(y|x) 31:22200 loss=   1.251858711, acc=   0.718750000


508it [02:11,  3.85it/s]

P(y|x) 31:22300 loss=   1.334847450, acc=   0.703125000


608it [02:37,  3.83it/s]

P(y|x) 31:22400 loss=   1.451857448, acc=   0.765625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 31: Valid Loss 1.5276011228561401, Valid Acc 0.6288999915122986
Best Valid!: 0.6288999915122986


5it [00:01,  3.34it/s]

P(y|x) 32:22500 loss=   1.297370315, acc=   0.718750000


105it [00:27,  3.84it/s]

P(y|x) 32:22600 loss=   1.243438601, acc=   0.671875000


205it [00:53,  3.85it/s]

P(y|x) 32:22700 loss=   1.087593436, acc=   0.781250000


305it [01:19,  3.86it/s]

P(y|x) 32:22800 loss=   1.259362698, acc=   0.750000000


405it [01:45,  3.85it/s]

P(y|x) 32:22900 loss=   1.186129570, acc=   0.718750000


505it [02:11,  3.86it/s]

P(y|x) 32:23000 loss=   1.249648690, acc=   0.687500000


605it [02:37,  3.86it/s]

P(y|x) 32:23100 loss=   1.189528108, acc=   0.703125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 32: Valid Loss 1.546297311782837, Valid Acc 0.6119999885559082


2it [00:00,  2.24it/s]

P(y|x) 33:23200 loss=   1.414382100, acc=   0.703125000


102it [00:26,  3.86it/s]

P(y|x) 33:23300 loss=   1.148466706, acc=   0.765625000


202it [00:52,  3.86it/s]

P(y|x) 33:23400 loss=   1.134742498, acc=   0.750000000


302it [01:18,  3.84it/s]

P(y|x) 33:23500 loss=   1.217616200, acc=   0.656250000


402it [01:44,  3.85it/s]

P(y|x) 33:23600 loss=   1.172966242, acc=   0.718750000


502it [02:10,  3.85it/s]

P(y|x) 33:23700 loss=   1.039559007, acc=   0.796875000


602it [02:36,  3.85it/s]

P(y|x) 33:23800 loss=   1.417770624, acc=   0.656250000


702it [03:02,  3.86it/s]

P(y|x) 33:23900 loss=   1.368712187, acc=   0.578125000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 33: Valid Loss 1.5020865201950073, Valid Acc 0.6233000159263611


99it [00:26,  3.85it/s]

P(y|x) 34:24000 loss=   1.200539827, acc=   0.750000000


199it [00:52,  3.85it/s]

P(y|x) 34:24100 loss=   1.161836028, acc=   0.781250000


299it [01:18,  3.85it/s]

P(y|x) 34:24200 loss=   1.341526151, acc=   0.687500000


399it [01:43,  3.86it/s]

P(y|x) 34:24300 loss=   1.167903304, acc=   0.718750000


499it [02:09,  3.86it/s]

P(y|x) 34:24400 loss=   1.024001598, acc=   0.781250000


599it [02:35,  3.85it/s]

P(y|x) 34:24500 loss=   1.114147305, acc=   0.750000000


699it [03:01,  3.86it/s]

P(y|x) 34:24600 loss=   1.261210084, acc=   0.812500000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 34: Valid Loss 1.463304042816162, Valid Acc 0.6342999935150146
Best Valid!: 0.6342999935150146


96it [00:25,  3.86it/s]

P(y|x) 35:24700 loss=   1.189855337, acc=   0.718750000


196it [00:51,  3.86it/s]

P(y|x) 35:24800 loss=   1.274633527, acc=   0.656250000


296it [01:17,  3.86it/s]

P(y|x) 35:24900 loss=   1.024738193, acc=   0.796875000


396it [01:42,  3.86it/s]

P(y|x) 35:25000 loss=   1.206036687, acc=   0.765625000


496it [02:08,  3.85it/s]

P(y|x) 35:25100 loss=   1.178143024, acc=   0.625000000


596it [02:34,  3.85it/s]

P(y|x) 35:25200 loss=   1.137218237, acc=   0.718750000


696it [03:00,  3.84it/s]

P(y|x) 35:25300 loss=   1.148291588, acc=   0.765625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 35: Valid Loss 1.497987151145935, Valid Acc 0.6255999803543091


93it [00:24,  3.85it/s]

P(y|x) 36:25400 loss=   1.113485932, acc=   0.750000000


193it [00:50,  3.85it/s]

P(y|x) 36:25500 loss=   1.103509903, acc=   0.703125000


293it [01:16,  3.85it/s]

P(y|x) 36:25600 loss=   1.136860609, acc=   0.656250000


393it [01:42,  3.86it/s]

P(y|x) 36:25700 loss=   1.190143108, acc=   0.703125000


493it [02:08,  3.85it/s]

P(y|x) 36:25800 loss=   1.379639626, acc=   0.703125000


593it [02:34,  3.85it/s]

P(y|x) 36:25900 loss=   0.962895989, acc=   0.859375000


693it [03:00,  3.86it/s]

P(y|x) 36:26000 loss=   1.079730392, acc=   0.734375000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 36: Valid Loss 1.4401533603668213, Valid Acc 0.650600016117096
Best Valid!: 0.650600016117096


90it [00:23,  3.86it/s]

P(y|x) 37:26100 loss=   1.139684558, acc=   0.703125000


190it [00:49,  3.84it/s]

P(y|x) 37:26200 loss=   1.311776161, acc=   0.671875000


290it [01:15,  3.86it/s]

P(y|x) 37:26300 loss=   1.032775760, acc=   0.718750000


390it [01:41,  3.86it/s]

P(y|x) 37:26400 loss=   1.282628775, acc=   0.718750000


490it [02:07,  3.86it/s]

P(y|x) 37:26500 loss=   1.124227166, acc=   0.765625000


590it [02:33,  3.85it/s]

P(y|x) 37:26600 loss=   0.956611156, acc=   0.750000000


690it [02:59,  3.86it/s]

P(y|x) 37:26700 loss=   1.179973125, acc=   0.765625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 37: Valid Loss 1.429261326789856, Valid Acc 0.6491000056266785


87it [00:23,  3.85it/s]

P(y|x) 38:26800 loss=   0.869058013, acc=   0.828125000


187it [00:48,  3.86it/s]

P(y|x) 38:26900 loss=   1.021138430, acc=   0.781250000


287it [01:14,  3.86it/s]

P(y|x) 38:27000 loss=   0.957162440, acc=   0.765625000


387it [01:40,  3.87it/s]

P(y|x) 38:27100 loss=   1.031087875, acc=   0.781250000


487it [02:06,  3.87it/s]

P(y|x) 38:27200 loss=   1.127094626, acc=   0.781250000


587it [02:32,  3.87it/s]

P(y|x) 38:27300 loss=   1.018877149, acc=   0.734375000


687it [02:58,  3.86it/s]

P(y|x) 38:27400 loss=   1.233655691, acc=   0.687500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 38: Valid Loss 1.4384465217590332, Valid Acc 0.6446999907493591


84it [00:22,  3.85it/s]

P(y|x) 39:27500 loss=   1.042771220, acc=   0.781250000


184it [00:48,  3.86it/s]

P(y|x) 39:27600 loss=   1.078070283, acc=   0.765625000


284it [01:13,  3.85it/s]

P(y|x) 39:27700 loss=   0.900600195, acc=   0.828125000


384it [01:39,  3.85it/s]

P(y|x) 39:27800 loss=   0.890189409, acc=   0.875000000


484it [02:05,  3.85it/s]

P(y|x) 39:27900 loss=   1.069468260, acc=   0.796875000


584it [02:31,  3.86it/s]

P(y|x) 39:28000 loss=   1.136058569, acc=   0.781250000


684it [02:57,  3.86it/s]

P(y|x) 39:28100 loss=   1.259063363, acc=   0.734375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 39: Valid Loss 1.3991903066635132, Valid Acc 0.6449000239372253


81it [00:21,  3.84it/s]

P(y|x) 40:28200 loss=   0.875890076, acc=   0.890625000


181it [00:47,  3.86it/s]

P(y|x) 40:28300 loss=   0.998317182, acc=   0.781250000


281it [01:13,  3.87it/s]

P(y|x) 40:28400 loss=   0.943525851, acc=   0.828125000


381it [01:39,  3.87it/s]

P(y|x) 40:28500 loss=   1.035362482, acc=   0.828125000


481it [02:04,  3.87it/s]

P(y|x) 40:28600 loss=   1.058713794, acc=   0.781250000


581it [02:30,  3.87it/s]

P(y|x) 40:28700 loss=   0.890505791, acc=   0.796875000


681it [02:56,  3.88it/s]

P(y|x) 40:28800 loss=   0.893397808, acc=   0.843750000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 40: Valid Loss 1.3794909715652466, Valid Acc 0.6503000259399414


78it [00:20,  3.85it/s]

P(y|x) 41:28900 loss=   1.161010146, acc=   0.750000000


178it [00:46,  3.85it/s]

P(y|x) 41:29000 loss=   1.293663383, acc=   0.671875000


278it [01:12,  3.85it/s]

P(y|x) 41:29100 loss=   0.910365880, acc=   0.796875000


378it [01:38,  3.85it/s]

P(y|x) 41:29200 loss=   1.089865446, acc=   0.812500000


478it [02:04,  3.84it/s]

P(y|x) 41:29300 loss=   1.082173586, acc=   0.828125000


578it [02:30,  3.84it/s]

P(y|x) 41:29400 loss=   0.929327011, acc=   0.812500000


678it [02:56,  3.85it/s]

P(y|x) 41:29500 loss=   1.058165669, acc=   0.750000000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 41: Valid Loss 1.3998686075210571, Valid Acc 0.6489999890327454


75it [00:19,  3.86it/s]

P(y|x) 42:29600 loss=   0.810867071, acc=   0.890625000


175it [00:45,  3.85it/s]

P(y|x) 42:29700 loss=   1.023315072, acc=   0.765625000


275it [01:11,  3.86it/s]

P(y|x) 42:29800 loss=   1.026719093, acc=   0.812500000


375it [01:37,  3.87it/s]

P(y|x) 42:29900 loss=   0.875226796, acc=   0.812500000


475it [02:03,  3.87it/s]

P(y|x) 42:30000 loss=   1.035631537, acc=   0.781250000


575it [02:29,  3.87it/s]

P(y|x) 42:30100 loss=   1.093537569, acc=   0.687500000


675it [02:54,  3.87it/s]

P(y|x) 42:30200 loss=   1.152758121, acc=   0.734375000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 42: Valid Loss 1.3944425582885742, Valid Acc 0.6481999754905701


72it [00:19,  3.83it/s]

P(y|x) 43:30300 loss=   0.954701424, acc=   0.734375000


172it [00:44,  3.86it/s]

P(y|x) 43:30400 loss=   1.019351125, acc=   0.750000000


272it [01:10,  3.86it/s]

P(y|x) 43:30500 loss=   1.176701069, acc=   0.765625000


372it [01:36,  3.86it/s]

P(y|x) 43:30600 loss=   0.964321911, acc=   0.750000000


472it [02:02,  3.85it/s]

P(y|x) 43:30700 loss=   1.085577726, acc=   0.781250000


572it [02:28,  3.86it/s]

P(y|x) 43:30800 loss=   1.126240849, acc=   0.687500000


672it [02:54,  3.86it/s]

P(y|x) 43:30900 loss=   1.122811198, acc=   0.703125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 43: Valid Loss 1.347421407699585, Valid Acc 0.6592000126838684
Best Valid!: 0.6592000126838684


69it [00:18,  3.86it/s]

P(y|x) 44:31000 loss=   0.890353203, acc=   0.781250000


169it [00:44,  3.85it/s]

P(y|x) 44:31100 loss=   1.050178289, acc=   0.734375000


269it [01:10,  3.86it/s]

P(y|x) 44:31200 loss=   0.965242982, acc=   0.843750000


369it [01:35,  3.86it/s]

P(y|x) 44:31300 loss=   0.985950828, acc=   0.812500000


469it [02:01,  3.85it/s]

P(y|x) 44:31400 loss=   0.941279829, acc=   0.765625000


569it [02:27,  3.86it/s]

P(y|x) 44:31500 loss=   1.066795111, acc=   0.687500000


669it [02:53,  3.86it/s]

P(y|x) 44:31600 loss=   1.052116036, acc=   0.718750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 44: Valid Loss 1.414549708366394, Valid Acc 0.6403999924659729


66it [00:17,  3.85it/s]

P(y|x) 45:31700 loss=   1.015266299, acc=   0.781250000


166it [00:43,  3.85it/s]

P(y|x) 45:31800 loss=   1.001642227, acc=   0.734375000


266it [01:09,  3.84it/s]

P(y|x) 45:31900 loss=   0.840173662, acc=   0.781250000


366it [01:35,  3.85it/s]

P(y|x) 45:32000 loss=   0.935906112, acc=   0.812500000


466it [02:01,  3.86it/s]

P(y|x) 45:32100 loss=   1.035374880, acc=   0.765625000


566it [02:27,  3.84it/s]

P(y|x) 45:32200 loss=   0.862004578, acc=   0.828125000


666it [02:53,  3.84it/s]

P(y|x) 45:32300 loss=   1.025116682, acc=   0.796875000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 45: Valid Loss 1.4001153707504272, Valid Acc 0.6474000215530396


63it [00:16,  3.86it/s]

P(y|x) 46:32400 loss=   0.960521817, acc=   0.750000000


163it [00:42,  3.86it/s]

P(y|x) 46:32500 loss=   0.895793557, acc=   0.796875000


263it [01:08,  3.85it/s]

P(y|x) 46:32600 loss=   0.820612788, acc=   0.812500000


363it [01:34,  3.87it/s]

P(y|x) 46:32700 loss=   0.752783537, acc=   0.828125000


463it [02:00,  3.86it/s]

P(y|x) 46:32800 loss=   0.800599813, acc=   0.812500000


563it [02:26,  3.86it/s]

P(y|x) 46:32900 loss=   0.978515208, acc=   0.796875000


663it [02:52,  3.86it/s]

P(y|x) 46:33000 loss=   0.733692527, acc=   0.843750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 46: Valid Loss 1.310713291168213, Valid Acc 0.6629999876022339
Best Valid!: 0.6629999876022339


60it [00:15,  3.86it/s]

P(y|x) 47:33100 loss=   0.974899590, acc=   0.765625000


160it [00:41,  3.86it/s]

P(y|x) 47:33200 loss=   0.771334589, acc=   0.828125000


260it [01:07,  3.85it/s]

P(y|x) 47:33300 loss=   0.832991719, acc=   0.875000000


360it [01:33,  3.85it/s]

P(y|x) 47:33400 loss=   0.900476456, acc=   0.796875000


460it [01:59,  3.87it/s]

P(y|x) 47:33500 loss=   0.864300609, acc=   0.812500000


560it [02:25,  3.85it/s]

P(y|x) 47:33600 loss=   0.906395435, acc=   0.828125000


660it [02:51,  3.86it/s]

P(y|x) 47:33700 loss=   1.098219752, acc=   0.718750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 47: Valid Loss 1.3306605815887451, Valid Acc 0.6556000113487244


57it [00:15,  3.85it/s]

P(y|x) 48:33800 loss=   0.827933133, acc=   0.828125000


157it [00:41,  3.84it/s]

P(y|x) 48:33900 loss=   0.809886277, acc=   0.875000000


257it [01:07,  3.86it/s]

P(y|x) 48:34000 loss=   0.667915761, acc=   0.921875000


357it [01:33,  3.86it/s]

P(y|x) 48:34100 loss=   0.811205447, acc=   0.812500000


457it [01:59,  3.86it/s]

P(y|x) 48:34200 loss=   0.957395494, acc=   0.796875000


557it [02:24,  3.85it/s]

P(y|x) 48:34300 loss=   1.001637578, acc=   0.781250000


657it [02:50,  3.87it/s]

P(y|x) 48:34400 loss=   0.823461175, acc=   0.828125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 48: Valid Loss 1.3095746040344238, Valid Acc 0.6689000129699707
Best Valid!: 0.6689000129699707


54it [00:14,  3.87it/s]

P(y|x) 49:34500 loss=   0.765704691, acc=   0.828125000


154it [00:40,  3.86it/s]

P(y|x) 49:34600 loss=   0.855673492, acc=   0.828125000


254it [01:06,  3.86it/s]

P(y|x) 49:34700 loss=   0.825066209, acc=   0.828125000


354it [01:32,  3.86it/s]

P(y|x) 49:34800 loss=   0.925358415, acc=   0.781250000


454it [01:57,  3.86it/s]

P(y|x) 49:34900 loss=   0.831928551, acc=   0.859375000


554it [02:23,  3.86it/s]

P(y|x) 49:35000 loss=   0.956749082, acc=   0.781250000


654it [02:49,  3.87it/s]

P(y|x) 49:35100 loss=   0.900833488, acc=   0.796875000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 49: Valid Loss 1.3703778982162476, Valid Acc 0.6542999744415283


51it [00:13,  3.86it/s]

P(y|x) 50:35200 loss=   0.730649948, acc=   0.843750000


151it [00:39,  3.86it/s]

P(y|x) 50:35300 loss=   0.845468819, acc=   0.828125000


251it [01:05,  3.85it/s]

P(y|x) 50:35400 loss=   0.840190113, acc=   0.765625000


351it [01:31,  3.85it/s]

P(y|x) 50:35500 loss=   0.698889852, acc=   0.875000000


451it [01:57,  3.86it/s]

P(y|x) 50:35600 loss=   0.652995288, acc=   0.937500000


551it [02:23,  3.86it/s]

P(y|x) 50:35700 loss=   0.871623278, acc=   0.812500000


651it [02:49,  3.85it/s]

P(y|x) 50:35800 loss=   0.734700024, acc=   0.875000000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 50: Valid Loss 1.3452261686325073, Valid Acc 0.663100004196167


48it [00:12,  3.86it/s]

P(y|x) 51:35900 loss=   0.719376683, acc=   0.906250000


148it [00:38,  3.84it/s]

P(y|x) 51:36000 loss=   0.704680920, acc=   0.875000000


248it [01:04,  3.85it/s]

P(y|x) 51:36100 loss=   0.812333167, acc=   0.859375000


348it [01:30,  3.85it/s]

P(y|x) 51:36200 loss=   0.657348156, acc=   0.875000000


448it [01:56,  3.85it/s]

P(y|x) 51:36300 loss=   0.920252681, acc=   0.812500000


548it [02:22,  3.85it/s]

P(y|x) 51:36400 loss=   0.934542179, acc=   0.781250000


648it [02:48,  3.86it/s]

P(y|x) 51:36500 loss=   0.865194499, acc=   0.765625000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 51: Valid Loss 1.2726163864135742, Valid Acc 0.6762999892234802
Best Valid!: 0.6762999892234802


45it [00:12,  3.86it/s]

P(y|x) 52:36600 loss=   0.684158266, acc=   0.859375000


145it [00:37,  3.84it/s]

P(y|x) 52:36700 loss=   0.706363022, acc=   0.937500000


245it [01:03,  3.85it/s]

P(y|x) 52:36800 loss=   0.809124470, acc=   0.843750000


345it [01:29,  3.86it/s]

P(y|x) 52:36900 loss=   0.988233566, acc=   0.734375000


445it [01:55,  3.84it/s]

P(y|x) 52:37000 loss=   0.831007957, acc=   0.796875000


545it [02:21,  3.85it/s]

P(y|x) 52:37100 loss=   0.861087382, acc=   0.859375000


645it [02:47,  3.86it/s]

P(y|x) 52:37200 loss=   0.679435611, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 52: Valid Loss 1.3264254331588745, Valid Acc 0.6586999893188477


42it [00:11,  3.85it/s]

P(y|x) 53:37300 loss=   0.630620062, acc=   0.906250000


142it [00:37,  3.86it/s]

P(y|x) 53:37400 loss=   0.835867822, acc=   0.828125000


242it [01:03,  3.86it/s]

P(y|x) 53:37500 loss=   0.626030147, acc=   0.937500000


342it [01:28,  3.85it/s]

P(y|x) 53:37600 loss=   0.688719869, acc=   0.906250000


442it [01:54,  3.86it/s]

P(y|x) 53:37700 loss=   0.882711828, acc=   0.796875000


542it [02:20,  3.86it/s]

P(y|x) 53:37800 loss=   0.813791156, acc=   0.843750000


642it [02:46,  3.86it/s]

P(y|x) 53:37900 loss=   0.902265429, acc=   0.750000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 53: Valid Loss 1.3096308708190918, Valid Acc 0.6654999852180481


39it [00:10,  3.86it/s]

P(y|x) 54:38000 loss=   0.785450399, acc=   0.828125000


139it [00:36,  3.85it/s]

P(y|x) 54:38100 loss=   0.924891531, acc=   0.796875000


239it [01:02,  3.85it/s]

P(y|x) 54:38200 loss=   0.726829231, acc=   0.859375000


339it [01:28,  3.85it/s]

P(y|x) 54:38300 loss=   0.687141418, acc=   0.859375000


439it [01:54,  3.85it/s]

P(y|x) 54:38400 loss=   0.659076929, acc=   0.875000000


539it [02:20,  3.85it/s]

P(y|x) 54:38500 loss=   0.810039103, acc=   0.812500000


639it [02:46,  3.85it/s]

P(y|x) 54:38600 loss=   0.820548773, acc=   0.812500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 54: Valid Loss 1.2641922235488892, Valid Acc 0.6748999953269958


36it [00:09,  3.86it/s]

P(y|x) 55:38700 loss=   0.653660417, acc=   0.828125000


136it [00:35,  3.86it/s]

P(y|x) 55:38800 loss=   0.705472469, acc=   0.843750000


236it [01:01,  3.85it/s]

P(y|x) 55:38900 loss=   0.675267458, acc=   0.921875000


336it [01:27,  3.85it/s]

P(y|x) 55:39000 loss=   0.819272161, acc=   0.765625000


436it [01:53,  3.85it/s]

P(y|x) 55:39100 loss=   0.809910536, acc=   0.812500000


536it [02:19,  3.87it/s]

P(y|x) 55:39200 loss=   0.769898355, acc=   0.859375000


636it [02:45,  3.87it/s]

P(y|x) 55:39300 loss=   0.742359459, acc=   0.843750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 55: Valid Loss 1.299078106880188, Valid Acc 0.6690000295639038


33it [00:08,  3.87it/s]

P(y|x) 56:39400 loss=   0.922665417, acc=   0.781250000


133it [00:34,  3.86it/s]

P(y|x) 56:39500 loss=   0.642448246, acc=   0.890625000


233it [01:00,  3.86it/s]

P(y|x) 56:39600 loss=   0.761615574, acc=   0.781250000


333it [01:26,  3.85it/s]

P(y|x) 56:39700 loss=   0.630797029, acc=   0.890625000


433it [01:52,  3.85it/s]

P(y|x) 56:39800 loss=   0.763215005, acc=   0.812500000


533it [02:18,  3.87it/s]

P(y|x) 56:39900 loss=   0.534400284, acc=   0.937500000


633it [02:44,  3.86it/s]

P(y|x) 56:40000 loss=   0.680851340, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 56: Valid Loss 1.2960362434387207, Valid Acc 0.6653000116348267


30it [00:08,  3.85it/s]

P(y|x) 57:40100 loss=   0.692043662, acc=   0.859375000


130it [00:34,  3.85it/s]

P(y|x) 57:40200 loss=   0.590920746, acc=   0.921875000


230it [01:00,  3.86it/s]

P(y|x) 57:40300 loss=   0.529539347, acc=   0.921875000


330it [01:26,  3.86it/s]

P(y|x) 57:40400 loss=   0.777864218, acc=   0.781250000


430it [01:51,  3.86it/s]

P(y|x) 57:40500 loss=   0.741867542, acc=   0.828125000


530it [02:17,  3.86it/s]

P(y|x) 57:40600 loss=   0.765527308, acc=   0.859375000


630it [02:43,  3.86it/s]

P(y|x) 57:40700 loss=   0.746624172, acc=   0.875000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 57: Valid Loss 1.2851797342300415, Valid Acc 0.6700999736785889


27it [00:07,  3.86it/s]

P(y|x) 58:40800 loss=   0.680329919, acc=   0.890625000


127it [00:33,  3.86it/s]

P(y|x) 58:40900 loss=   0.732975066, acc=   0.828125000


227it [00:59,  3.86it/s]

P(y|x) 58:41000 loss=   0.675868928, acc=   0.875000000


327it [01:25,  3.84it/s]

P(y|x) 58:41100 loss=   0.828526497, acc=   0.812500000


427it [01:51,  3.86it/s]

P(y|x) 58:41200 loss=   0.820347548, acc=   0.812500000


527it [02:17,  3.87it/s]

P(y|x) 58:41300 loss=   0.612432301, acc=   0.859375000


627it [02:42,  3.85it/s]

P(y|x) 58:41400 loss=   0.729453325, acc=   0.828125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 58: Valid Loss 1.2651193141937256, Valid Acc 0.6708999872207642


24it [00:06,  3.87it/s]

P(y|x) 59:41500 loss=   0.759733856, acc=   0.859375000


124it [00:32,  3.85it/s]

P(y|x) 59:41600 loss=   0.632450044, acc=   0.890625000


224it [00:58,  3.84it/s]

P(y|x) 59:41700 loss=   0.655544460, acc=   0.859375000


324it [01:24,  3.85it/s]

P(y|x) 59:41800 loss=   0.721932232, acc=   0.843750000


424it [01:50,  3.86it/s]

P(y|x) 59:41900 loss=   0.569417179, acc=   0.906250000


524it [02:16,  3.85it/s]

P(y|x) 59:42000 loss=   0.639448345, acc=   0.875000000


624it [02:42,  3.86it/s]

P(y|x) 59:42100 loss=   0.752417922, acc=   0.828125000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 59: Valid Loss 1.2950444221496582, Valid Acc 0.6665999889373779


21it [00:05,  3.85it/s]

P(y|x) 60:42200 loss=   0.619020224, acc=   0.890625000


121it [00:31,  3.85it/s]

P(y|x) 60:42300 loss=   0.513042033, acc=   0.875000000


221it [00:57,  3.86it/s]

P(y|x) 60:42400 loss=   0.703173757, acc=   0.843750000


321it [01:23,  3.87it/s]

P(y|x) 60:42500 loss=   0.642369390, acc=   0.875000000


421it [01:49,  3.86it/s]

P(y|x) 60:42600 loss=   0.732590675, acc=   0.859375000


521it [02:15,  3.86it/s]

P(y|x) 60:42700 loss=   0.643556952, acc=   0.921875000


621it [02:41,  3.86it/s]

P(y|x) 60:42800 loss=   0.763864875, acc=   0.812500000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 60: Valid Loss 1.2785909175872803, Valid Acc 0.6686999797821045


18it [00:05,  3.85it/s]

P(y|x) 61:42900 loss=   0.524881363, acc=   0.921875000


118it [00:30,  3.85it/s]

P(y|x) 61:43000 loss=   0.668703020, acc=   0.875000000


218it [00:56,  3.86it/s]

P(y|x) 61:43100 loss=   0.672350764, acc=   0.796875000


318it [01:22,  3.87it/s]

P(y|x) 61:43200 loss=   0.580362439, acc=   0.906250000


418it [01:48,  3.87it/s]

P(y|x) 61:43300 loss=   0.647404671, acc=   0.828125000


518it [02:14,  3.86it/s]

P(y|x) 61:43400 loss=   0.756932676, acc=   0.828125000


618it [02:40,  3.86it/s]

P(y|x) 61:43500 loss=   0.708601117, acc=   0.843750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 61: Valid Loss 1.3027963638305664, Valid Acc 0.6582000255584717


15it [00:04,  3.85it/s]

P(y|x) 62:43600 loss=   0.506695151, acc=   0.921875000


115it [00:30,  3.85it/s]

P(y|x) 62:43700 loss=   0.766147852, acc=   0.859375000


215it [00:56,  3.85it/s]

P(y|x) 62:43800 loss=   0.613646924, acc=   0.875000000


315it [01:22,  3.85it/s]

P(y|x) 62:43900 loss=   0.582094967, acc=   0.906250000


415it [01:48,  3.86it/s]

P(y|x) 62:44000 loss=   0.666205943, acc=   0.859375000


515it [02:13,  3.86it/s]

P(y|x) 62:44100 loss=   0.664010882, acc=   0.859375000


615it [02:39,  3.86it/s]

P(y|x) 62:44200 loss=   0.778639436, acc=   0.843750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 62: Valid Loss 1.227405309677124, Valid Acc 0.6801000237464905
Best Valid!: 0.6801000237464905


12it [00:03,  3.84it/s]

P(y|x) 63:44300 loss=   0.798012555, acc=   0.890625000


112it [00:29,  3.86it/s]

P(y|x) 63:44400 loss=   0.529067755, acc=   0.921875000


212it [00:55,  3.85it/s]

P(y|x) 63:44500 loss=   0.630202711, acc=   0.859375000


312it [01:21,  3.85it/s]

P(y|x) 63:44600 loss=   0.601382017, acc=   0.859375000


412it [01:47,  3.86it/s]

P(y|x) 63:44700 loss=   0.530425608, acc=   0.921875000


512it [02:13,  3.86it/s]

P(y|x) 63:44800 loss=   0.864291608, acc=   0.796875000


612it [02:39,  3.86it/s]

P(y|x) 63:44900 loss=   0.522845387, acc=   0.921875000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 63: Valid Loss 1.2982990741729736, Valid Acc 0.6654000282287598


9it [00:02,  3.74it/s]

P(y|x) 64:45000 loss=   0.739271283, acc=   0.843750000


109it [00:28,  3.86it/s]

P(y|x) 64:45100 loss=   0.563052952, acc=   0.906250000


209it [00:54,  3.85it/s]

P(y|x) 64:45200 loss=   0.595911324, acc=   0.906250000


309it [01:20,  3.86it/s]

P(y|x) 64:45300 loss=   0.576570630, acc=   0.828125000


409it [01:46,  3.85it/s]

P(y|x) 64:45400 loss=   0.552577913, acc=   0.859375000


509it [02:12,  3.84it/s]

P(y|x) 64:45500 loss=   0.562740982, acc=   0.906250000


609it [02:38,  3.85it/s]

P(y|x) 64:45600 loss=   0.648152232, acc=   0.875000000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 64: Valid Loss 1.3341748714447021, Valid Acc 0.6557000279426575


6it [00:02,  3.48it/s]

P(y|x) 65:45700 loss=   0.724916577, acc=   0.859375000


106it [00:28,  3.85it/s]

P(y|x) 65:45800 loss=   0.779133379, acc=   0.859375000


206it [00:53,  3.84it/s]

P(y|x) 65:45900 loss=   0.785517931, acc=   0.812500000


306it [01:19,  3.85it/s]

P(y|x) 65:46000 loss=   0.576280832, acc=   0.921875000


406it [01:45,  3.86it/s]

P(y|x) 65:46100 loss=   0.744279683, acc=   0.781250000


506it [02:11,  3.86it/s]

P(y|x) 65:46200 loss=   0.570867360, acc=   0.890625000


606it [02:37,  3.85it/s]

P(y|x) 65:46300 loss=   0.624783456, acc=   0.812500000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 65: Valid Loss 1.3132760524749756, Valid Acc 0.6567000150680542


3it [00:01,  2.74it/s]

P(y|x) 66:46400 loss=   0.673199296, acc=   0.859375000


103it [00:27,  3.87it/s]

P(y|x) 66:46500 loss=   0.492287815, acc=   0.921875000


203it [00:52,  3.87it/s]

P(y|x) 66:46600 loss=   0.795295119, acc=   0.796875000


303it [01:18,  3.87it/s]

P(y|x) 66:46700 loss=   0.568998575, acc=   0.875000000


403it [01:44,  3.87it/s]

P(y|x) 66:46800 loss=   0.594021559, acc=   0.906250000


503it [02:10,  3.87it/s]

P(y|x) 66:46900 loss=   0.598233461, acc=   0.890625000


603it [02:36,  3.86it/s]

P(y|x) 66:47000 loss=   0.749901175, acc=   0.781250000


703it [03:02,  3.86it/s]

P(y|x) 66:47100 loss=   0.525358975, acc=   0.937500000
ev:  test





ev:  train
ev:  valid
valid: Epoch 66: Valid Loss 1.2100651264190674, Valid Acc 0.6794999837875366


100it [00:26,  3.84it/s]

P(y|x) 67:47200 loss=   0.551523030, acc=   0.875000000


200it [00:52,  3.85it/s]

P(y|x) 67:47300 loss=   0.538362384, acc=   0.890625000


300it [01:18,  3.85it/s]

P(y|x) 67:47400 loss=   0.571833193, acc=   0.859375000


400it [01:44,  3.86it/s]

P(y|x) 67:47500 loss=   0.492957860, acc=   0.953125000


500it [02:10,  3.86it/s]

P(y|x) 67:47600 loss=   0.500632942, acc=   0.953125000


600it [02:36,  3.85it/s]

P(y|x) 67:47700 loss=   0.555923998, acc=   0.875000000


700it [03:01,  3.86it/s]

P(y|x) 67:47800 loss=   0.539168477, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 67: Valid Loss 1.2211273908615112, Valid Acc 0.6822999715805054
Best Valid!: 0.6822999715805054


97it [00:25,  3.86it/s]

P(y|x) 68:47900 loss=   0.538620472, acc=   0.937500000


197it [00:51,  3.84it/s]

P(y|x) 68:48000 loss=   0.593487144, acc=   0.859375000


297it [01:17,  3.84it/s]

P(y|x) 68:48100 loss=   0.622847378, acc=   0.921875000


397it [01:43,  3.86it/s]

P(y|x) 68:48200 loss=   0.645855308, acc=   0.828125000


497it [02:09,  3.86it/s]

P(y|x) 68:48300 loss=   0.495183140, acc=   0.906250000


597it [02:35,  3.86it/s]

P(y|x) 68:48400 loss=   0.537918270, acc=   0.937500000


697it [03:01,  3.85it/s]

P(y|x) 68:48500 loss=   0.588515222, acc=   0.921875000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 68: Valid Loss 1.2235169410705566, Valid Acc 0.6801000237464905


94it [00:24,  3.86it/s]

P(y|x) 69:48600 loss=   0.683905602, acc=   0.843750000


194it [00:50,  3.86it/s]

P(y|x) 69:48700 loss=   0.560641170, acc=   0.906250000


294it [01:16,  3.85it/s]

P(y|x) 69:48800 loss=   0.468012005, acc=   0.953125000


394it [01:42,  3.86it/s]

P(y|x) 69:48900 loss=   0.494133413, acc=   0.906250000


494it [02:08,  3.86it/s]

P(y|x) 69:49000 loss=   0.612187266, acc=   0.906250000


594it [02:34,  3.86it/s]

P(y|x) 69:49100 loss=   0.509129405, acc=   0.906250000


694it [03:00,  3.86it/s]

P(y|x) 69:49200 loss=   0.533203125, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 69: Valid Loss 1.2561511993408203, Valid Acc 0.6777999997138977


91it [00:24,  3.86it/s]

P(y|x) 70:49300 loss=   0.534024537, acc=   0.906250000


191it [00:49,  3.86it/s]

P(y|x) 70:49400 loss=   0.582856238, acc=   0.906250000


291it [01:15,  3.86it/s]

P(y|x) 70:49500 loss=   0.659126759, acc=   0.859375000


391it [01:41,  3.85it/s]

P(y|x) 70:49600 loss=   0.590025723, acc=   0.843750000


491it [02:07,  3.85it/s]

P(y|x) 70:49700 loss=   0.597252667, acc=   0.875000000


591it [02:33,  3.86it/s]

P(y|x) 70:49800 loss=   0.651979029, acc=   0.875000000


691it [02:59,  3.86it/s]

P(y|x) 70:49900 loss=   0.539987326, acc=   0.921875000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 70: Valid Loss 1.2171834707260132, Valid Acc 0.6819000244140625


88it [00:23,  3.86it/s]

P(y|x) 71:50000 loss=   0.477592826, acc=   0.968750000


188it [00:49,  3.85it/s]

P(y|x) 71:50100 loss=   0.412921965, acc=   0.937500000


288it [01:15,  3.85it/s]

P(y|x) 71:50200 loss=   0.632690370, acc=   0.875000000


388it [01:41,  3.85it/s]

P(y|x) 71:50300 loss=   0.496984035, acc=   0.859375000


488it [02:07,  3.84it/s]

P(y|x) 71:50400 loss=   0.499158949, acc=   0.921875000


588it [02:33,  3.84it/s]

P(y|x) 71:50500 loss=   0.684045434, acc=   0.859375000


688it [02:59,  3.84it/s]

P(y|x) 71:50600 loss=   0.652709424, acc=   0.875000000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 71: Valid Loss 1.2251160144805908, Valid Acc 0.6815000176429749


85it [00:22,  3.85it/s]

P(y|x) 72:50700 loss=   0.527431965, acc=   0.937500000


185it [00:48,  3.85it/s]

P(y|x) 72:50800 loss=   0.507998347, acc=   0.890625000


285it [01:14,  3.86it/s]

P(y|x) 72:50900 loss=   0.459497601, acc=   0.937500000


385it [01:40,  3.87it/s]

P(y|x) 72:51000 loss=   0.497148156, acc=   0.921875000


485it [02:06,  3.87it/s]

P(y|x) 72:51100 loss=   0.435742974, acc=   0.953125000


585it [02:31,  3.88it/s]

P(y|x) 72:51200 loss=   0.485376030, acc=   0.921875000


685it [02:57,  3.87it/s]

P(y|x) 72:51300 loss=   0.607849121, acc=   0.875000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 72: Valid Loss 1.2817859649658203, Valid Acc 0.6740000247955322


82it [00:21,  3.86it/s]

P(y|x) 73:51400 loss=   0.439945996, acc=   0.968750000


182it [00:47,  3.85it/s]

P(y|x) 73:51500 loss=   0.453273237, acc=   0.921875000


282it [01:13,  3.86it/s]

P(y|x) 73:51600 loss=   0.449993014, acc=   0.921875000


382it [01:39,  3.86it/s]

P(y|x) 73:51700 loss=   0.645722270, acc=   0.875000000


482it [02:05,  3.86it/s]

P(y|x) 73:51800 loss=   0.541775763, acc=   0.890625000


582it [02:31,  3.86it/s]

P(y|x) 73:51900 loss=   0.499127865, acc=   0.875000000


682it [02:57,  3.86it/s]

P(y|x) 73:52000 loss=   0.508249044, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 73: Valid Loss 1.2354816198349, Valid Acc 0.6787999868392944


79it [00:20,  3.86it/s]

P(y|x) 74:52100 loss=   0.469587564, acc=   0.953125000


179it [00:46,  3.85it/s]

P(y|x) 74:52200 loss=   0.528706610, acc=   0.859375000


279it [01:12,  3.86it/s]

P(y|x) 74:52300 loss=   0.540543497, acc=   0.921875000


379it [01:38,  3.86it/s]

P(y|x) 74:52400 loss=   0.432635635, acc=   0.906250000


479it [02:04,  3.85it/s]

P(y|x) 74:52500 loss=   0.432750791, acc=   0.921875000


579it [02:30,  3.85it/s]

P(y|x) 74:52600 loss=   0.482707828, acc=   0.937500000


679it [02:56,  3.86it/s]

P(y|x) 74:52700 loss=   0.594969571, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 74: Valid Loss 1.2627787590026855, Valid Acc 0.6672999858856201


76it [00:20,  3.85it/s]

P(y|x) 75:52800 loss=   0.475429356, acc=   0.921875000


176it [00:46,  3.86it/s]

P(y|x) 75:52900 loss=   0.398065388, acc=   0.937500000


276it [01:11,  3.86it/s]

P(y|x) 75:53000 loss=   0.423100621, acc=   0.906250000


376it [01:37,  3.87it/s]

P(y|x) 75:53100 loss=   0.633021355, acc=   0.843750000


476it [02:03,  3.87it/s]

P(y|x) 75:53200 loss=   0.470644951, acc=   0.937500000


576it [02:29,  3.86it/s]

P(y|x) 75:53300 loss=   0.521104574, acc=   0.906250000


676it [02:55,  3.86it/s]

P(y|x) 75:53400 loss=   0.509435236, acc=   0.875000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 75: Valid Loss 1.2903802394866943, Valid Acc 0.6669999957084656


73it [00:19,  3.89it/s]

P(y|x) 76:53500 loss=   0.550096631, acc=   0.906250000


173it [00:44,  3.86it/s]

P(y|x) 76:53600 loss=   0.493149757, acc=   0.875000000


273it [01:10,  3.85it/s]

P(y|x) 76:53700 loss=   0.561577380, acc=   0.859375000


373it [01:36,  3.84it/s]

P(y|x) 76:53800 loss=   0.598174334, acc=   0.859375000


473it [02:02,  3.86it/s]

P(y|x) 76:53900 loss=   0.443389595, acc=   0.890625000


573it [02:28,  3.86it/s]

P(y|x) 76:54000 loss=   0.408163935, acc=   0.921875000


673it [02:54,  3.86it/s]

P(y|x) 76:54100 loss=   0.497157723, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 76: Valid Loss 1.2369943857192993, Valid Acc 0.6816999912261963


70it [00:18,  3.85it/s]

P(y|x) 77:54200 loss=   0.567493498, acc=   0.890625000


170it [00:44,  3.85it/s]

P(y|x) 77:54300 loss=   0.477709174, acc=   0.890625000


270it [01:10,  3.85it/s]

P(y|x) 77:54400 loss=   0.385361165, acc=   0.921875000


370it [01:36,  3.85it/s]

P(y|x) 77:54500 loss=   0.481267065, acc=   0.890625000


470it [02:02,  3.86it/s]

P(y|x) 77:54600 loss=   0.372911036, acc=   0.984375000


570it [02:28,  3.86it/s]

P(y|x) 77:54700 loss=   0.506529331, acc=   0.921875000


670it [02:53,  3.87it/s]

P(y|x) 77:54800 loss=   0.374097139, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 77: Valid Loss 1.218261480331421, Valid Acc 0.6875
Best Valid!: 0.6875


67it [00:17,  3.87it/s]

P(y|x) 78:54900 loss=   0.402629763, acc=   0.953125000


167it [00:43,  3.87it/s]

P(y|x) 78:55000 loss=   0.467627555, acc=   0.937500000


267it [01:09,  3.86it/s]

P(y|x) 78:55100 loss=   0.539342523, acc=   0.921875000


367it [01:35,  3.85it/s]

P(y|x) 78:55200 loss=   0.649731994, acc=   0.875000000


467it [02:01,  3.86it/s]

P(y|x) 78:55300 loss=   0.397982568, acc=   0.953125000


567it [02:27,  3.84it/s]

P(y|x) 78:55400 loss=   0.539624929, acc=   0.906250000


667it [02:53,  3.87it/s]

P(y|x) 78:55500 loss=   0.529937983, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 78: Valid Loss 1.211910605430603, Valid Acc 0.6855999827384949


64it [00:16,  3.86it/s]

P(y|x) 79:55600 loss=   0.451382875, acc=   0.921875000


164it [00:42,  3.85it/s]

P(y|x) 79:55700 loss=   0.455019414, acc=   0.968750000


264it [01:08,  3.85it/s]

P(y|x) 79:55800 loss=   0.356347889, acc=   0.953125000


364it [01:34,  3.86it/s]

P(y|x) 79:55900 loss=   0.537502527, acc=   0.890625000


464it [02:00,  3.85it/s]

P(y|x) 79:56000 loss=   0.441820025, acc=   0.890625000


564it [02:26,  3.86it/s]

P(y|x) 79:56100 loss=   0.332219571, acc=   0.953125000


664it [02:52,  3.85it/s]

P(y|x) 79:56200 loss=   0.569452405, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 79: Valid Loss 1.2438942193984985, Valid Acc 0.6735000014305115


61it [00:16,  3.85it/s]

P(y|x) 80:56300 loss=   0.501731634, acc=   0.890625000


161it [00:42,  3.86it/s]

P(y|x) 80:56400 loss=   0.362773538, acc=   0.968750000


261it [01:08,  3.87it/s]

P(y|x) 80:56500 loss=   0.384981871, acc=   0.906250000


361it [01:33,  3.87it/s]

P(y|x) 80:56600 loss=   0.509733975, acc=   0.921875000


461it [01:59,  3.86it/s]

P(y|x) 80:56700 loss=   0.693138719, acc=   0.859375000


561it [02:25,  3.86it/s]

P(y|x) 80:56800 loss=   0.476097405, acc=   0.937500000


661it [02:51,  3.86it/s]

P(y|x) 80:56900 loss=   0.374354571, acc=   0.984375000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 80: Valid Loss 1.223584532737732, Valid Acc 0.684499979019165


58it [00:15,  3.85it/s]

P(y|x) 81:57000 loss=   0.497076362, acc=   0.890625000


158it [00:41,  3.84it/s]

P(y|x) 81:57100 loss=   0.396163791, acc=   0.937500000


258it [01:07,  3.84it/s]

P(y|x) 81:57200 loss=   0.525573909, acc=   0.875000000


358it [01:33,  3.85it/s]

P(y|x) 81:57300 loss=   0.473057032, acc=   0.921875000


458it [01:59,  3.85it/s]

P(y|x) 81:57400 loss=   0.390748084, acc=   0.968750000


558it [02:25,  3.83it/s]

P(y|x) 81:57500 loss=   0.515256941, acc=   0.890625000


658it [02:51,  3.86it/s]

P(y|x) 81:57600 loss=   0.560407043, acc=   0.890625000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 81: Valid Loss 1.2271201610565186, Valid Acc 0.6807000041007996


55it [00:14,  3.86it/s]

P(y|x) 82:57700 loss=   0.383059949, acc=   0.953125000


155it [00:40,  3.85it/s]

P(y|x) 82:57800 loss=   0.550951481, acc=   0.921875000


255it [01:06,  3.85it/s]

P(y|x) 82:57900 loss=   0.364420563, acc=   0.953125000


355it [01:32,  3.86it/s]

P(y|x) 82:58000 loss=   0.515662253, acc=   0.890625000


455it [01:58,  3.85it/s]

P(y|x) 82:58100 loss=   0.515371084, acc=   0.890625000


555it [02:24,  3.84it/s]

P(y|x) 82:58200 loss=   0.578929126, acc=   0.906250000


655it [02:50,  3.85it/s]

P(y|x) 82:58300 loss=   0.492517173, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 82: Valid Loss 1.1792993545532227, Valid Acc 0.6912999749183655
Best Valid!: 0.6912999749183655


52it [00:13,  3.86it/s]

P(y|x) 83:58400 loss=   0.426116824, acc=   0.906250000


152it [00:39,  3.85it/s]

P(y|x) 83:58500 loss=   0.429271907, acc=   0.906250000


252it [01:05,  3.85it/s]

P(y|x) 83:58600 loss=   0.570503652, acc=   0.859375000


352it [01:31,  3.85it/s]

P(y|x) 83:58700 loss=   0.541113257, acc=   0.859375000


452it [01:57,  3.84it/s]

P(y|x) 83:58800 loss=   0.366610765, acc=   0.984375000


552it [02:23,  3.84it/s]

P(y|x) 83:58900 loss=   0.644691765, acc=   0.843750000


652it [02:49,  3.84it/s]

P(y|x) 83:59000 loss=   0.495316416, acc=   0.937500000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 83: Valid Loss 1.2181017398834229, Valid Acc 0.6815999746322632


49it [00:13,  3.85it/s]

P(y|x) 84:59100 loss=   0.352339864, acc=   0.968750000


149it [00:38,  3.86it/s]

P(y|x) 84:59200 loss=   0.466678053, acc=   0.890625000


249it [01:04,  3.87it/s]

P(y|x) 84:59300 loss=   0.429696560, acc=   0.953125000


349it [01:30,  3.84it/s]

P(y|x) 84:59400 loss=   0.380556613, acc=   0.937500000


449it [01:56,  3.86it/s]

P(y|x) 84:59500 loss=   0.362327784, acc=   0.968750000


549it [02:22,  3.86it/s]

P(y|x) 84:59600 loss=   0.461213171, acc=   0.937500000


649it [02:48,  3.86it/s]

P(y|x) 84:59700 loss=   0.522602737, acc=   0.875000000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 84: Valid Loss 1.2482552528381348, Valid Acc 0.6786999702453613


46it [00:12,  3.86it/s]

P(y|x) 85:59800 loss=   0.400633395, acc=   0.937500000


146it [00:38,  3.85it/s]

P(y|x) 85:59900 loss=   0.456767887, acc=   0.875000000


246it [01:04,  3.86it/s]

P(y|x) 85:60000 loss=   0.463262916, acc=   0.937500000


346it [01:30,  3.86it/s]

P(y|x) 85:60100 loss=   0.442686409, acc=   0.906250000


446it [01:56,  3.86it/s]

P(y|x) 85:60200 loss=   0.531185567, acc=   0.859375000


546it [02:22,  3.86it/s]

P(y|x) 85:60300 loss=   0.531070054, acc=   0.875000000


646it [02:48,  3.85it/s]

P(y|x) 85:60400 loss=   0.375381440, acc=   0.937500000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 85: Valid Loss 1.2326772212982178, Valid Acc 0.6771000027656555


43it [00:11,  3.84it/s]

P(y|x) 86:60500 loss=   0.369933605, acc=   0.921875000


143it [00:37,  3.85it/s]

P(y|x) 86:60600 loss=   0.325618148, acc=   0.953125000


243it [01:03,  3.86it/s]

P(y|x) 86:60700 loss=   0.655433416, acc=   0.843750000


343it [01:29,  3.86it/s]

P(y|x) 86:60800 loss=   0.360727161, acc=   0.937500000


443it [01:55,  3.83it/s]

P(y|x) 86:60900 loss=   0.272691220, acc=   1.000000000


543it [02:21,  3.85it/s]

P(y|x) 86:61000 loss=   0.415240228, acc=   0.953125000


643it [02:47,  3.86it/s]

P(y|x) 86:61100 loss=   0.408106089, acc=   0.921875000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 86: Valid Loss 1.2283580303192139, Valid Acc 0.6808000206947327


40it [00:10,  3.87it/s]

P(y|x) 87:61200 loss=   0.325637996, acc=   0.953125000


140it [00:36,  3.86it/s]

P(y|x) 87:61300 loss=   0.423427641, acc=   0.906250000


240it [01:02,  3.86it/s]

P(y|x) 87:61400 loss=   0.394291520, acc=   0.953125000


340it [01:28,  3.85it/s]

P(y|x) 87:61500 loss=   0.445839316, acc=   0.875000000


440it [01:54,  3.87it/s]

P(y|x) 87:61600 loss=   0.374232471, acc=   0.921875000


540it [02:20,  3.87it/s]

P(y|x) 87:61700 loss=   0.356300980, acc=   0.937500000


640it [02:46,  3.86it/s]

P(y|x) 87:61800 loss=   0.433533162, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 87: Valid Loss 1.2188432216644287, Valid Acc 0.6843000054359436


37it [00:10,  3.84it/s]

P(y|x) 88:61900 loss=   0.542452753, acc=   0.859375000


137it [00:35,  3.86it/s]

P(y|x) 88:62000 loss=   0.369450003, acc=   0.906250000


237it [01:01,  3.86it/s]

P(y|x) 88:62100 loss=   0.340532601, acc=   0.937500000


337it [01:27,  3.86it/s]

P(y|x) 88:62200 loss=   0.454875231, acc=   0.875000000


437it [01:53,  3.86it/s]

P(y|x) 88:62300 loss=   0.342775404, acc=   0.953125000


537it [02:19,  3.86it/s]

P(y|x) 88:62400 loss=   0.332422554, acc=   0.968750000


637it [02:45,  3.86it/s]

P(y|x) 88:62500 loss=   0.423011839, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 88: Valid Loss 1.2304054498672485, Valid Acc 0.675000011920929


34it [00:09,  3.86it/s]

P(y|x) 89:62600 loss=   0.316185296, acc=   0.953125000


134it [00:35,  3.85it/s]

P(y|x) 89:62700 loss=   0.424843997, acc=   0.906250000


234it [01:00,  3.83it/s]

P(y|x) 89:62800 loss=   0.371568233, acc=   0.921875000


334it [01:26,  3.85it/s]

P(y|x) 89:62900 loss=   0.470552057, acc=   0.921875000


434it [01:52,  3.85it/s]

P(y|x) 89:63000 loss=   0.409990191, acc=   0.921875000


534it [02:18,  3.85it/s]

P(y|x) 89:63100 loss=   0.690910995, acc=   0.750000000


634it [02:44,  3.86it/s]

P(y|x) 89:63200 loss=   0.446806192, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 89: Valid Loss 1.1809732913970947, Valid Acc 0.6920999884605408
Best Valid!: 0.6920999884605408


31it [00:08,  3.86it/s]

P(y|x) 90:63300 loss=   0.370354414, acc=   0.953125000


131it [00:34,  3.85it/s]

P(y|x) 90:63400 loss=   0.531470120, acc=   0.859375000


231it [01:00,  3.86it/s]

P(y|x) 90:63500 loss=   0.349437177, acc=   0.937500000


331it [01:26,  3.85it/s]

P(y|x) 90:63600 loss=   0.334072769, acc=   0.937500000


431it [01:52,  3.85it/s]

P(y|x) 90:63700 loss=   0.349930078, acc=   0.937500000


531it [02:18,  3.85it/s]

P(y|x) 90:63800 loss=   0.397650868, acc=   0.953125000


631it [02:43,  3.85it/s]

P(y|x) 90:63900 loss=   0.358671159, acc=   0.984375000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 90: Valid Loss 1.2474318742752075, Valid Acc 0.684499979019165


28it [00:07,  3.87it/s]

P(y|x) 91:64000 loss=   0.359655917, acc=   0.921875000


128it [00:33,  3.86it/s]

P(y|x) 91:64100 loss=   0.315536588, acc=   0.953125000


228it [00:59,  3.86it/s]

P(y|x) 91:64200 loss=   0.372073263, acc=   0.937500000


328it [01:25,  3.86it/s]

P(y|x) 91:64300 loss=   0.244953141, acc=   1.000000000


428it [01:51,  3.86it/s]

P(y|x) 91:64400 loss=   0.265481800, acc=   0.968750000


528it [02:17,  3.86it/s]

P(y|x) 91:64500 loss=   0.356650859, acc=   0.921875000


628it [02:43,  3.86it/s]

P(y|x) 91:64600 loss=   0.534951866, acc=   0.843750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 91: Valid Loss 1.1854444742202759, Valid Acc 0.6899999976158142


25it [00:06,  3.86it/s]

P(y|x) 92:64700 loss=   0.440155655, acc=   0.875000000


125it [00:32,  3.85it/s]

P(y|x) 92:64800 loss=   0.248214155, acc=   0.968750000


225it [00:58,  3.86it/s]

P(y|x) 92:64900 loss=   0.347019285, acc=   0.953125000


325it [01:24,  3.85it/s]

P(y|x) 92:65000 loss=   0.299907863, acc=   0.937500000


425it [01:50,  3.86it/s]

P(y|x) 92:65100 loss=   0.437744677, acc=   0.906250000


525it [02:16,  3.84it/s]

P(y|x) 92:65200 loss=   0.384804636, acc=   0.968750000


625it [02:42,  3.85it/s]

P(y|x) 92:65300 loss=   0.462293983, acc=   0.906250000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 92: Valid Loss 1.263861894607544, Valid Acc 0.6697999835014343


22it [00:06,  3.86it/s]

P(y|x) 93:65400 loss=   0.409209460, acc=   0.921875000


122it [00:32,  3.85it/s]

P(y|x) 93:65500 loss=   0.389737606, acc=   0.937500000


222it [00:57,  3.84it/s]

P(y|x) 93:65600 loss=   0.463538736, acc=   0.921875000


322it [01:23,  3.84it/s]

P(y|x) 93:65700 loss=   0.371207476, acc=   0.937500000


422it [01:49,  3.85it/s]

P(y|x) 93:65800 loss=   0.358824104, acc=   0.968750000


522it [02:15,  3.86it/s]

P(y|x) 93:65900 loss=   0.332156688, acc=   0.968750000


622it [02:41,  3.85it/s]

P(y|x) 93:66000 loss=   0.347718149, acc=   0.937500000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 93: Valid Loss 1.2525182962417603, Valid Acc 0.6791999936103821


19it [00:05,  3.85it/s]

P(y|x) 94:66100 loss=   0.301981956, acc=   0.953125000


119it [00:31,  3.85it/s]

P(y|x) 94:66200 loss=   0.432582229, acc=   0.937500000


219it [00:57,  3.87it/s]

P(y|x) 94:66300 loss=   0.426125854, acc=   0.906250000


319it [01:23,  3.85it/s]

P(y|x) 94:66400 loss=   0.390514284, acc=   0.921875000


419it [01:49,  3.86it/s]

P(y|x) 94:66500 loss=   0.308438152, acc=   0.968750000


519it [02:14,  3.84it/s]

P(y|x) 94:66600 loss=   0.294427902, acc=   0.968750000


619it [02:40,  3.84it/s]

P(y|x) 94:66700 loss=   0.439833969, acc=   0.875000000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 94: Valid Loss 1.1999895572662354, Valid Acc 0.685699999332428


16it [00:04,  3.85it/s]

P(y|x) 95:66800 loss=   0.280152172, acc=   0.953125000


116it [00:30,  3.84it/s]

P(y|x) 95:66900 loss=   0.366791248, acc=   0.937500000


216it [00:56,  3.85it/s]

P(y|x) 95:67000 loss=   0.352836460, acc=   0.953125000


316it [01:22,  3.86it/s]

P(y|x) 95:67100 loss=   0.361420929, acc=   0.968750000


416it [01:48,  3.85it/s]

P(y|x) 95:67200 loss=   0.366681069, acc=   0.953125000


516it [02:14,  3.85it/s]

P(y|x) 95:67300 loss=   0.332047164, acc=   0.937500000


616it [02:40,  3.86it/s]

P(y|x) 95:67400 loss=   0.427025080, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 95: Valid Loss 1.3473399877548218, Valid Acc 0.6547999978065491


13it [00:03,  3.82it/s]

P(y|x) 96:67500 loss=   0.320022911, acc=   0.953125000


113it [00:29,  3.84it/s]

P(y|x) 96:67600 loss=   0.454987705, acc=   0.921875000


213it [00:55,  3.86it/s]

P(y|x) 96:67700 loss=   0.315349102, acc=   0.953125000


313it [01:21,  3.86it/s]

P(y|x) 96:67800 loss=   0.302119523, acc=   0.953125000


413it [01:47,  3.84it/s]

P(y|x) 96:67900 loss=   0.274919033, acc=   0.968750000


513it [02:13,  3.85it/s]

P(y|x) 96:68000 loss=   0.674158871, acc=   0.828125000


613it [02:39,  3.83it/s]

P(y|x) 96:68100 loss=   0.416637361, acc=   0.937500000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 96: Valid Loss 1.3098324537277222, Valid Acc 0.6647999882698059


10it [00:03,  3.77it/s]

P(y|x) 97:68200 loss=   0.355656594, acc=   0.937500000


110it [00:29,  3.86it/s]

P(y|x) 97:68300 loss=   0.324821264, acc=   0.937500000


210it [00:54,  3.86it/s]

P(y|x) 97:68400 loss=   0.244687915, acc=   0.984375000


310it [01:20,  3.86it/s]

P(y|x) 97:68500 loss=   0.331158131, acc=   0.953125000


410it [01:46,  3.86it/s]

P(y|x) 97:68600 loss=   0.322734356, acc=   0.953125000


510it [02:12,  3.86it/s]

P(y|x) 97:68700 loss=   0.248335749, acc=   0.984375000


610it [02:38,  3.86it/s]

P(y|x) 97:68800 loss=   0.426596224, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 97: Valid Loss 1.2163231372833252, Valid Acc 0.6743999719619751


7it [00:02,  3.65it/s]

P(y|x) 98:68900 loss=   0.300506622, acc=   0.968750000


107it [00:28,  3.85it/s]

P(y|x) 98:69000 loss=   0.319699943, acc=   0.953125000


207it [00:54,  3.86it/s]

P(y|x) 98:69100 loss=   0.362623662, acc=   0.953125000


307it [01:20,  3.86it/s]

P(y|x) 98:69200 loss=   0.356918752, acc=   0.937500000


407it [01:45,  3.86it/s]

P(y|x) 98:69300 loss=   0.331467956, acc=   0.953125000


507it [02:11,  3.86it/s]

P(y|x) 98:69400 loss=   0.393360615, acc=   0.890625000


607it [02:37,  3.86it/s]

P(y|x) 98:69500 loss=   0.425493270, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 98: Valid Loss 1.232542872428894, Valid Acc 0.6815000176429749


4it [00:01,  3.08it/s]

P(y|x) 99:69600 loss=   0.247584283, acc=   0.968750000


104it [00:27,  3.84it/s]

P(y|x) 99:69700 loss=   0.424293101, acc=   0.953125000


204it [00:53,  3.87it/s]

P(y|x) 99:69800 loss=   0.270789593, acc=   0.984375000


304it [01:19,  3.85it/s]

P(y|x) 99:69900 loss=   0.300072014, acc=   0.937500000


404it [01:45,  3.85it/s]

P(y|x) 99:70000 loss=   0.322897017, acc=   0.953125000


504it [02:11,  3.86it/s]

P(y|x) 99:70100 loss=   0.232567638, acc=   0.968750000


604it [02:37,  3.85it/s]

P(y|x) 99:70200 loss=   0.408716589, acc=   0.968750000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 99: Valid Loss 1.2343097925186157, Valid Acc 0.6783999800682068


1it [00:00,  1.36it/s]

P(y|x) 100:70300 loss=   0.389908254, acc=   0.937500000


101it [00:26,  3.85it/s]

P(y|x) 100:70400 loss=   0.283329129, acc=   0.953125000


201it [00:52,  3.85it/s]

P(y|x) 100:70500 loss=   0.419852346, acc=   0.906250000


301it [01:18,  3.85it/s]

P(y|x) 100:70600 loss=   0.475899130, acc=   0.906250000


401it [01:44,  3.84it/s]

P(y|x) 100:70700 loss=   0.409143358, acc=   0.906250000


501it [02:10,  3.86it/s]

P(y|x) 100:70800 loss=   0.275003046, acc=   0.968750000


601it [02:36,  3.85it/s]

P(y|x) 100:70900 loss=   0.262934029, acc=   0.953125000


701it [03:02,  3.86it/s]

P(y|x) 100:71000 loss=   0.249600813, acc=   0.968750000


703it [03:03,  3.84it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 100: Valid Loss 1.1992309093475342, Valid Acc 0.6863999962806702


98it [00:25,  3.89it/s]

P(y|x) 101:71100 loss=   0.215866894, acc=   1.000000000


198it [00:51,  3.87it/s]

P(y|x) 101:71200 loss=   0.379978746, acc=   0.937500000


298it [01:17,  3.86it/s]

P(y|x) 101:71300 loss=   0.232684538, acc=   0.984375000


398it [01:43,  3.86it/s]

P(y|x) 101:71400 loss=   0.293662339, acc=   0.984375000


498it [02:09,  3.85it/s]

P(y|x) 101:71500 loss=   0.340580344, acc=   0.921875000


598it [02:35,  3.85it/s]

P(y|x) 101:71600 loss=   0.329936683, acc=   0.937500000


698it [03:01,  3.85it/s]

P(y|x) 101:71700 loss=   0.374188453, acc=   0.890625000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 101: Valid Loss 1.1744762659072876, Valid Acc 0.6937999725341797
Best Valid!: 0.6937999725341797


95it [00:25,  3.86it/s]

P(y|x) 102:71800 loss=   0.356796831, acc=   0.937500000


195it [00:51,  3.86it/s]

P(y|x) 102:71900 loss=   0.266315728, acc=   0.968750000


295it [01:17,  3.83it/s]

P(y|x) 102:72000 loss=   0.351910710, acc=   0.906250000


395it [01:43,  3.86it/s]

P(y|x) 102:72100 loss=   0.313106805, acc=   0.968750000


495it [02:08,  3.85it/s]

P(y|x) 102:72200 loss=   0.389000356, acc=   0.937500000


595it [02:34,  3.86it/s]

P(y|x) 102:72300 loss=   0.228621259, acc=   0.968750000


695it [03:00,  3.86it/s]

P(y|x) 102:72400 loss=   0.397145838, acc=   0.921875000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 102: Valid Loss 1.2573926448822021, Valid Acc 0.6726999878883362


92it [00:24,  3.85it/s]

P(y|x) 103:72500 loss=   0.396527737, acc=   0.937500000


192it [00:50,  3.85it/s]

P(y|x) 103:72600 loss=   0.345279306, acc=   0.953125000


292it [01:16,  3.86it/s]

P(y|x) 103:72700 loss=   0.331806183, acc=   0.937500000


392it [01:42,  3.85it/s]

P(y|x) 103:72800 loss=   0.275955319, acc=   0.937500000


492it [02:08,  3.85it/s]

P(y|x) 103:72900 loss=   0.463350296, acc=   0.906250000


592it [02:33,  3.87it/s]

P(y|x) 103:73000 loss=   0.396432310, acc=   0.953125000


692it [02:59,  3.85it/s]

P(y|x) 103:73100 loss=   0.346979499, acc=   0.937500000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 103: Valid Loss 1.2701919078826904, Valid Acc 0.6676999926567078


89it [00:23,  3.85it/s]

P(y|x) 104:73200 loss=   0.317587703, acc=   0.937500000


189it [00:49,  3.85it/s]

P(y|x) 104:73300 loss=   0.395308644, acc=   0.890625000


289it [01:15,  3.85it/s]

P(y|x) 104:73400 loss=   0.314946532, acc=   0.921875000


389it [01:41,  3.86it/s]

P(y|x) 104:73500 loss=   0.349159062, acc=   0.921875000


489it [02:07,  3.86it/s]

P(y|x) 104:73600 loss=   0.251600087, acc=   1.000000000


589it [02:33,  3.86it/s]

P(y|x) 104:73700 loss=   0.355992824, acc=   0.906250000


689it [02:59,  3.85it/s]

P(y|x) 104:73800 loss=   0.277589351, acc=   0.937500000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 104: Valid Loss 1.2046277523040771, Valid Acc 0.6912000179290771


86it [00:22,  3.86it/s]

P(y|x) 105:73900 loss=   0.357029319, acc=   0.890625000


186it [00:48,  3.86it/s]

P(y|x) 105:74000 loss=   0.302886903, acc=   0.953125000


286it [01:14,  3.86it/s]

P(y|x) 105:74100 loss=   0.342072546, acc=   0.937500000


386it [01:40,  3.86it/s]

P(y|x) 105:74200 loss=   0.211997673, acc=   0.984375000


486it [02:06,  3.85it/s]

P(y|x) 105:74300 loss=   0.217909023, acc=   1.000000000


586it [02:32,  3.87it/s]

P(y|x) 105:74400 loss=   0.404784381, acc=   0.921875000


686it [02:58,  3.86it/s]

P(y|x) 105:74500 loss=   0.372589737, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 105: Valid Loss 1.3345688581466675, Valid Acc 0.6585999727249146


83it [00:21,  3.86it/s]

P(y|x) 106:74600 loss=   0.275665909, acc=   0.953125000


183it [00:47,  3.87it/s]

P(y|x) 106:74700 loss=   0.314156741, acc=   0.953125000


283it [01:13,  3.87it/s]

P(y|x) 106:74800 loss=   0.382380247, acc=   0.890625000


383it [01:39,  3.87it/s]

P(y|x) 106:74900 loss=   0.300902218, acc=   0.937500000


483it [02:05,  3.87it/s]

P(y|x) 106:75000 loss=   0.457899243, acc=   0.921875000


583it [02:31,  3.87it/s]

P(y|x) 106:75100 loss=   0.318457037, acc=   0.953125000


683it [02:56,  3.87it/s]

P(y|x) 106:75200 loss=   0.309488773, acc=   0.953125000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 106: Valid Loss 1.224524974822998, Valid Acc 0.6759999990463257


80it [00:21,  3.86it/s]

P(y|x) 107:75300 loss=   0.243726268, acc=   0.921875000


180it [00:47,  3.86it/s]

P(y|x) 107:75400 loss=   0.235671341, acc=   0.968750000


280it [01:12,  3.85it/s]

P(y|x) 107:75500 loss=   0.235050723, acc=   1.000000000


380it [01:38,  3.85it/s]

P(y|x) 107:75600 loss=   0.273297101, acc=   0.984375000


480it [02:04,  3.85it/s]

P(y|x) 107:75700 loss=   0.265384674, acc=   0.921875000


580it [02:30,  3.85it/s]

P(y|x) 107:75800 loss=   0.510841012, acc=   0.875000000


680it [02:56,  3.85it/s]

P(y|x) 107:75900 loss=   0.403456926, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 107: Valid Loss 1.234686017036438, Valid Acc 0.684499979019165


77it [00:20,  3.86it/s]

P(y|x) 108:76000 loss=   0.276455075, acc=   0.937500000


177it [00:46,  3.85it/s]

P(y|x) 108:76100 loss=   0.260874003, acc=   0.984375000


277it [01:12,  3.85it/s]

P(y|x) 108:76200 loss=   0.347379744, acc=   0.953125000


377it [01:38,  3.85it/s]

P(y|x) 108:76300 loss=   0.277858377, acc=   0.953125000


477it [02:04,  3.85it/s]

P(y|x) 108:76400 loss=   0.387349486, acc=   0.921875000


577it [02:30,  3.85it/s]

P(y|x) 108:76500 loss=   0.239777178, acc=   0.984375000


677it [02:56,  3.85it/s]

P(y|x) 108:76600 loss=   0.282493293, acc=   0.953125000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 108: Valid Loss 1.2444496154785156, Valid Acc 0.6802999973297119


74it [00:19,  3.86it/s]

P(y|x) 109:76700 loss=   0.381752521, acc=   0.906250000


174it [00:45,  3.86it/s]

P(y|x) 109:76800 loss=   0.366203278, acc=   0.937500000


274it [01:11,  3.86it/s]

P(y|x) 109:76900 loss=   0.395953327, acc=   0.890625000


374it [01:37,  3.86it/s]

P(y|x) 109:77000 loss=   0.321144730, acc=   0.968750000


474it [02:03,  3.86it/s]

P(y|x) 109:77100 loss=   0.341675252, acc=   0.937500000


574it [02:28,  3.87it/s]

P(y|x) 109:77200 loss=   0.293026686, acc=   0.984375000


674it [02:54,  3.87it/s]

P(y|x) 109:77300 loss=   0.372100949, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 109: Valid Loss 1.226880669593811, Valid Acc 0.683899998664856


71it [00:18,  3.86it/s]

P(y|x) 110:77400 loss=   0.339598238, acc=   0.921875000


171it [00:44,  3.85it/s]

P(y|x) 110:77500 loss=   0.300474048, acc=   0.953125000


271it [01:10,  3.86it/s]

P(y|x) 110:77600 loss=   0.315874130, acc=   0.953125000


371it [01:36,  3.84it/s]

P(y|x) 110:77700 loss=   0.318851292, acc=   0.968750000


471it [02:02,  3.85it/s]

P(y|x) 110:77800 loss=   0.206043109, acc=   1.000000000


571it [02:28,  3.85it/s]

P(y|x) 110:77900 loss=   0.242506176, acc=   0.953125000


671it [02:54,  3.85it/s]

P(y|x) 110:78000 loss=   0.226770371, acc=   1.000000000


703it [03:02,  3.84it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 110: Valid Loss 1.2332520484924316, Valid Acc 0.6786999702453613


68it [00:18,  3.85it/s]

P(y|x) 111:78100 loss=   0.180209830, acc=   0.984375000


168it [00:43,  3.85it/s]

P(y|x) 111:78200 loss=   0.291010112, acc=   0.937500000


268it [01:09,  3.85it/s]

P(y|x) 111:78300 loss=   0.244228557, acc=   0.953125000


368it [01:35,  3.86it/s]

P(y|x) 111:78400 loss=   0.394228369, acc=   0.921875000


468it [02:01,  3.86it/s]

P(y|x) 111:78500 loss=   0.374771595, acc=   0.953125000


568it [02:27,  3.86it/s]

P(y|x) 111:78600 loss=   0.328689784, acc=   0.968750000


668it [02:53,  3.86it/s]

P(y|x) 111:78700 loss=   0.408438504, acc=   0.875000000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 111: Valid Loss 1.2340680360794067, Valid Acc 0.6843000054359436


65it [00:17,  3.87it/s]

P(y|x) 112:78800 loss=   0.246108323, acc=   0.984375000


165it [00:43,  3.87it/s]

P(y|x) 112:78900 loss=   0.346181005, acc=   0.953125000


265it [01:08,  3.86it/s]

P(y|x) 112:79000 loss=   0.348781288, acc=   0.906250000


365it [01:34,  3.87it/s]

P(y|x) 112:79100 loss=   0.189812779, acc=   0.984375000


465it [02:00,  3.86it/s]

P(y|x) 112:79200 loss=   0.314502060, acc=   0.953125000


565it [02:26,  3.86it/s]

P(y|x) 112:79300 loss=   0.224914476, acc=   0.968750000


665it [02:52,  3.86it/s]

P(y|x) 112:79400 loss=   0.206682041, acc=   0.984375000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 112: Valid Loss 1.4243265390396118, Valid Acc 0.6365000009536743


62it [00:16,  3.87it/s]

P(y|x) 113:79500 loss=   0.464664549, acc=   0.875000000


162it [00:42,  3.85it/s]

P(y|x) 113:79600 loss=   0.308710515, acc=   0.921875000


262it [01:08,  3.85it/s]

P(y|x) 113:79700 loss=   0.323594660, acc=   0.937500000


362it [01:34,  3.86it/s]

P(y|x) 113:79800 loss=   0.337340415, acc=   0.937500000


462it [02:00,  3.87it/s]

P(y|x) 113:79900 loss=   0.409885943, acc=   0.890625000


562it [02:25,  3.86it/s]

P(y|x) 113:80000 loss=   0.263402402, acc=   0.953125000


662it [02:51,  3.87it/s]

P(y|x) 113:80100 loss=   0.210439205, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 113: Valid Loss 1.1908228397369385, Valid Acc 0.6929000020027161


59it [00:15,  3.86it/s]

P(y|x) 114:80200 loss=   0.175821215, acc=   1.000000000


159it [00:41,  3.85it/s]

P(y|x) 114:80300 loss=   0.230989039, acc=   0.984375000


259it [01:07,  3.86it/s]

P(y|x) 114:80400 loss=   0.249597520, acc=   1.000000000


359it [01:33,  3.86it/s]

P(y|x) 114:80500 loss=   0.191041857, acc=   0.968750000


459it [01:59,  3.86it/s]

P(y|x) 114:80600 loss=   0.186243743, acc=   0.984375000


559it [02:25,  3.86it/s]

P(y|x) 114:80700 loss=   0.382034391, acc=   0.953125000


659it [02:51,  3.86it/s]

P(y|x) 114:80800 loss=   0.350222766, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 114: Valid Loss 1.1939892768859863, Valid Acc 0.6904000043869019


56it [00:15,  3.85it/s]

P(y|x) 115:80900 loss=   0.183589995, acc=   0.984375000


156it [00:41,  3.85it/s]

P(y|x) 115:81000 loss=   0.238469228, acc=   0.968750000


256it [01:06,  3.86it/s]

P(y|x) 115:81100 loss=   0.255069852, acc=   0.953125000


356it [01:32,  3.85it/s]

P(y|x) 115:81200 loss=   0.268516034, acc=   0.953125000


456it [01:58,  3.84it/s]

P(y|x) 115:81300 loss=   0.355733633, acc=   0.953125000


556it [02:24,  3.85it/s]

P(y|x) 115:81400 loss=   0.376963764, acc=   0.875000000


656it [02:50,  3.85it/s]

P(y|x) 115:81500 loss=   0.221136287, acc=   0.984375000


703it [03:03,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 115: Valid Loss 1.181304931640625, Valid Acc 0.6926000118255615


53it [00:14,  3.86it/s]

P(y|x) 116:81600 loss=   0.233186632, acc=   0.984375000


153it [00:40,  3.85it/s]

P(y|x) 116:81700 loss=   0.250342488, acc=   0.953125000


253it [01:06,  3.84it/s]

P(y|x) 116:81800 loss=   0.241180599, acc=   0.968750000


353it [01:31,  3.85it/s]

P(y|x) 116:81900 loss=   0.351887405, acc=   0.937500000


453it [01:57,  3.85it/s]

P(y|x) 116:82000 loss=   0.252535880, acc=   0.921875000


553it [02:23,  3.85it/s]

P(y|x) 116:82100 loss=   0.345820785, acc=   0.937500000


653it [02:49,  3.86it/s]

P(y|x) 116:82200 loss=   0.304333359, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 116: Valid Loss 1.156938076019287, Valid Acc 0.698199987411499
Best Valid!: 0.698199987411499


50it [00:13,  3.86it/s]

P(y|x) 117:82300 loss=   0.325419486, acc=   0.937500000


150it [00:39,  3.85it/s]

P(y|x) 117:82400 loss=   0.262482136, acc=   0.968750000


250it [01:05,  3.86it/s]

P(y|x) 117:82500 loss=   0.272668958, acc=   0.968750000


350it [01:31,  3.86it/s]

P(y|x) 117:82600 loss=   0.491196722, acc=   0.890625000


450it [01:57,  3.85it/s]

P(y|x) 117:82700 loss=   0.289335698, acc=   0.937500000


550it [02:23,  3.86it/s]

P(y|x) 117:82800 loss=   0.286870003, acc=   0.953125000


650it [02:48,  3.85it/s]

P(y|x) 117:82900 loss=   0.226379424, acc=   0.984375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 117: Valid Loss 1.242269515991211, Valid Acc 0.6771000027656555


47it [00:12,  3.86it/s]

P(y|x) 118:83000 loss=   0.237097487, acc=   0.968750000


147it [00:38,  3.86it/s]

P(y|x) 118:83100 loss=   0.260374129, acc=   0.968750000


247it [01:04,  3.86it/s]

P(y|x) 118:83200 loss=   0.201166987, acc=   0.968750000


347it [01:30,  3.87it/s]

P(y|x) 118:83300 loss=   0.262352288, acc=   0.953125000


447it [01:56,  3.86it/s]

P(y|x) 118:83400 loss=   0.271003157, acc=   0.984375000


547it [02:22,  3.85it/s]

P(y|x) 118:83500 loss=   0.198974177, acc=   0.968750000


647it [02:48,  3.87it/s]

P(y|x) 118:83600 loss=   0.244367585, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 118: Valid Loss 1.2462115287780762, Valid Acc 0.6820999979972839


44it [00:11,  3.86it/s]

P(y|x) 119:83700 loss=   0.359615266, acc=   0.890625000


144it [00:37,  3.86it/s]

P(y|x) 119:83800 loss=   0.283191562, acc=   0.906250000


244it [01:03,  3.86it/s]

P(y|x) 119:83900 loss=   0.247843459, acc=   0.937500000


344it [01:29,  3.85it/s]

P(y|x) 119:84000 loss=   0.227950990, acc=   0.984375000


444it [01:55,  3.86it/s]

P(y|x) 119:84100 loss=   0.236035377, acc=   0.984375000


544it [02:21,  3.85it/s]

P(y|x) 119:84200 loss=   0.248284519, acc=   0.906250000


644it [02:47,  3.86it/s]

P(y|x) 119:84300 loss=   0.228340164, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 119: Valid Loss 1.2498176097869873, Valid Acc 0.6783999800682068


41it [00:11,  3.86it/s]

P(y|x) 120:84400 loss=   0.361082345, acc=   0.921875000


141it [00:36,  3.86it/s]

P(y|x) 120:84500 loss=   0.231310636, acc=   0.968750000


241it [01:02,  3.86it/s]

P(y|x) 120:84600 loss=   0.283569366, acc=   0.953125000


341it [01:28,  3.86it/s]

P(y|x) 120:84700 loss=   0.149672374, acc=   1.000000000


441it [01:54,  3.86it/s]

P(y|x) 120:84800 loss=   0.203217030, acc=   0.984375000


541it [02:20,  3.86it/s]

P(y|x) 120:84900 loss=   0.259893805, acc=   0.921875000


641it [02:46,  3.85it/s]

P(y|x) 120:85000 loss=   0.263628721, acc=   0.953125000


703it [03:02,  3.84it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 120: Valid Loss 1.1641578674316406, Valid Acc 0.6930000185966492


38it [00:10,  3.85it/s]

P(y|x) 121:85100 loss=   0.168945134, acc=   0.984375000


138it [00:36,  3.84it/s]

P(y|x) 121:85200 loss=   0.277245641, acc=   0.953125000


238it [01:02,  3.86it/s]

P(y|x) 121:85300 loss=   0.248055995, acc=   0.968750000


338it [01:28,  3.86it/s]

P(y|x) 121:85400 loss=   0.311484247, acc=   0.921875000


438it [01:53,  3.86it/s]

P(y|x) 121:85500 loss=   0.307988524, acc=   0.937500000


538it [02:19,  3.86it/s]

P(y|x) 121:85600 loss=   0.351920724, acc=   0.906250000


638it [02:45,  3.86it/s]

P(y|x) 121:85700 loss=   0.310114264, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 121: Valid Loss 1.2098217010498047, Valid Acc 0.6823999881744385


35it [00:09,  3.86it/s]

P(y|x) 122:85800 loss=   0.274688303, acc=   0.937500000


135it [00:35,  3.87it/s]

P(y|x) 122:85900 loss=   0.171455473, acc=   1.000000000


235it [01:01,  3.86it/s]

P(y|x) 122:86000 loss=   0.263652802, acc=   0.953125000


335it [01:27,  3.87it/s]

P(y|x) 122:86100 loss=   0.359318137, acc=   0.906250000


435it [01:52,  3.87it/s]

P(y|x) 122:86200 loss=   0.266499043, acc=   0.984375000


535it [02:18,  3.86it/s]

P(y|x) 122:86300 loss=   0.264703780, acc=   0.968750000


635it [02:44,  3.86it/s]

P(y|x) 122:86400 loss=   0.261185706, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 122: Valid Loss 1.1952834129333496, Valid Acc 0.6934999823570251


32it [00:08,  3.86it/s]

P(y|x) 123:86500 loss=   0.365149498, acc=   0.968750000


132it [00:34,  3.85it/s]

P(y|x) 123:86600 loss=   0.316982418, acc=   0.890625000


232it [01:00,  3.85it/s]

P(y|x) 123:86700 loss=   0.254211396, acc=   0.968750000


332it [01:26,  3.84it/s]

P(y|x) 123:86800 loss=   0.288563967, acc=   0.968750000


432it [01:52,  3.85it/s]

P(y|x) 123:86900 loss=   0.234171107, acc=   0.953125000


532it [02:18,  3.85it/s]

P(y|x) 123:87000 loss=   0.301578820, acc=   0.968750000


632it [02:44,  3.86it/s]

P(y|x) 123:87100 loss=   0.229421407, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 123: Valid Loss 1.1716545820236206, Valid Acc 0.6940000057220459


29it [00:07,  3.86it/s]

P(y|x) 124:87200 loss=   0.285256892, acc=   0.937500000


129it [00:33,  3.86it/s]

P(y|x) 124:87300 loss=   0.219427049, acc=   0.968750000


229it [00:59,  3.86it/s]

P(y|x) 124:87400 loss=   0.219369978, acc=   0.984375000


329it [01:25,  3.85it/s]

P(y|x) 124:87500 loss=   0.211331382, acc=   0.968750000


429it [01:51,  3.86it/s]

P(y|x) 124:87600 loss=   0.221151680, acc=   0.953125000


529it [02:17,  3.87it/s]

P(y|x) 124:87700 loss=   0.198303327, acc=   0.968750000


629it [02:43,  3.87it/s]

P(y|x) 124:87800 loss=   0.300749660, acc=   0.953125000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 124: Valid Loss 1.2780407667160034, Valid Acc 0.6687999963760376


26it [00:07,  3.85it/s]

P(y|x) 125:87900 loss=   0.220003337, acc=   0.953125000


126it [00:33,  3.85it/s]

P(y|x) 125:88000 loss=   0.224668175, acc=   0.968750000


226it [00:58,  3.84it/s]

P(y|x) 125:88100 loss=   0.213844061, acc=   0.968750000


326it [01:24,  3.84it/s]

P(y|x) 125:88200 loss=   0.302596480, acc=   0.937500000


426it [01:50,  3.86it/s]

P(y|x) 125:88300 loss=   0.233701617, acc=   0.968750000


526it [02:16,  3.85it/s]

P(y|x) 125:88400 loss=   0.243940681, acc=   0.953125000


626it [02:42,  3.86it/s]

P(y|x) 125:88500 loss=   0.325024188, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 125: Valid Loss 1.3420798778533936, Valid Acc 0.6626999974250793


23it [00:06,  3.84it/s]

P(y|x) 126:88600 loss=   0.462007344, acc=   0.906250000


123it [00:32,  3.85it/s]

P(y|x) 126:88700 loss=   0.253847450, acc=   0.968750000


223it [00:58,  3.84it/s]

P(y|x) 126:88800 loss=   0.304521769, acc=   0.921875000


323it [01:24,  3.85it/s]

P(y|x) 126:88900 loss=   0.236290008, acc=   0.968750000


423it [01:50,  3.85it/s]

P(y|x) 126:89000 loss=   0.235083789, acc=   0.968750000


523it [02:16,  3.85it/s]

P(y|x) 126:89100 loss=   0.222954050, acc=   0.953125000


623it [02:41,  3.85it/s]

P(y|x) 126:89200 loss=   0.322248459, acc=   0.953125000


703it [03:02,  3.84it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 126: Valid Loss 1.416581392288208, Valid Acc 0.6351000070571899


20it [00:05,  3.83it/s]

P(y|x) 127:89300 loss=   0.276754916, acc=   0.984375000


120it [00:31,  3.85it/s]

P(y|x) 127:89400 loss=   0.235499054, acc=   0.968750000


220it [00:57,  3.86it/s]

P(y|x) 127:89500 loss=   0.255846947, acc=   0.953125000


320it [01:23,  3.87it/s]

P(y|x) 127:89600 loss=   0.206436738, acc=   1.000000000


420it [01:49,  3.86it/s]

P(y|x) 127:89700 loss=   0.176981956, acc=   0.968750000


520it [02:15,  3.87it/s]

P(y|x) 127:89800 loss=   0.286870450, acc=   0.937500000


620it [02:40,  3.87it/s]

P(y|x) 127:89900 loss=   0.252524972, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 127: Valid Loss 1.229722023010254, Valid Acc 0.6823999881744385


17it [00:04,  3.85it/s]

P(y|x) 128:90000 loss=   0.157663643, acc=   0.984375000


117it [00:30,  3.86it/s]

P(y|x) 128:90100 loss=   0.249491274, acc=   0.968750000


217it [00:56,  3.86it/s]

P(y|x) 128:90200 loss=   0.223990813, acc=   0.984375000


317it [01:22,  3.86it/s]

P(y|x) 128:90300 loss=   0.262619764, acc=   0.968750000


417it [01:48,  3.83it/s]

P(y|x) 128:90400 loss=   0.245505989, acc=   0.984375000


517it [02:14,  3.86it/s]

P(y|x) 128:90500 loss=   0.390483022, acc=   0.906250000


617it [02:40,  3.86it/s]

P(y|x) 128:90600 loss=   0.260756969, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 128: Valid Loss 1.2889351844787598, Valid Acc 0.669700026512146


14it [00:04,  3.84it/s]

P(y|x) 129:90700 loss=   0.319212824, acc=   0.921875000


114it [00:29,  3.86it/s]

P(y|x) 129:90800 loss=   0.266128242, acc=   0.968750000


214it [00:55,  3.86it/s]

P(y|x) 129:90900 loss=   0.160938278, acc=   0.984375000


314it [01:21,  3.87it/s]

P(y|x) 129:91000 loss=   0.225350142, acc=   0.968750000


414it [01:47,  3.86it/s]

P(y|x) 129:91100 loss=   0.217713907, acc=   0.984375000


514it [02:13,  3.86it/s]

P(y|x) 129:91200 loss=   0.249922901, acc=   0.921875000


614it [02:39,  3.86it/s]

P(y|x) 129:91300 loss=   0.328967780, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 129: Valid Loss 1.2334768772125244, Valid Acc 0.6818000078201294


11it [00:03,  3.83it/s]

P(y|x) 130:91400 loss=   0.215087563, acc=   0.968750000


111it [00:29,  3.86it/s]

P(y|x) 130:91500 loss=   0.207529843, acc=   0.968750000


211it [00:55,  3.85it/s]

P(y|x) 130:91600 loss=   0.223211899, acc=   0.968750000


311it [01:20,  3.85it/s]

P(y|x) 130:91700 loss=   0.197231516, acc=   0.984375000


411it [01:46,  3.87it/s]

P(y|x) 130:91800 loss=   0.173422918, acc=   0.984375000


511it [02:12,  3.86it/s]

P(y|x) 130:91900 loss=   0.217289880, acc=   0.968750000


611it [02:38,  3.85it/s]

P(y|x) 130:92000 loss=   0.233862460, acc=   0.984375000


703it [03:02,  3.85it/s]


ev:  test
ev:  train
ev:  valid
valid: Epoch 130: Valid Loss 1.216654658317566, Valid Acc 0.6866999864578247


8it [00:02,  3.72it/s]

P(y|x) 131:92100 loss=   0.222545832, acc=   0.953125000


108it [00:28,  3.85it/s]

P(y|x) 131:92200 loss=   0.196286470, acc=   0.953125000


208it [00:54,  3.86it/s]

P(y|x) 131:92300 loss=   0.203986317, acc=   0.984375000


308it [01:20,  3.85it/s]

P(y|x) 131:92400 loss=   0.255337805, acc=   0.968750000


408it [01:46,  3.87it/s]

P(y|x) 131:92500 loss=   0.237153143, acc=   0.953125000


508it [02:11,  3.87it/s]

P(y|x) 131:92600 loss=   0.262952179, acc=   0.953125000


608it [02:37,  3.87it/s]

P(y|x) 131:92700 loss=   0.219032139, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 131: Valid Loss 1.2139816284179688, Valid Acc 0.6881999969482422


5it [00:01,  3.41it/s]

P(y|x) 132:92800 loss=   0.182500958, acc=   0.968750000


105it [00:27,  3.86it/s]

P(y|x) 132:92900 loss=   0.307349026, acc=   0.984375000


205it [00:53,  3.85it/s]

P(y|x) 132:93000 loss=   0.235908210, acc=   0.968750000


305it [01:19,  3.86it/s]

P(y|x) 132:93100 loss=   0.242398307, acc=   0.953125000


405it [01:45,  3.86it/s]

P(y|x) 132:93200 loss=   0.268961847, acc=   0.953125000


505it [02:11,  3.86it/s]

P(y|x) 132:93300 loss=   0.305751026, acc=   0.953125000


605it [02:37,  3.86it/s]

P(y|x) 132:93400 loss=   0.235882610, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 132: Valid Loss 1.2169640064239502, Valid Acc 0.6872000098228455


2it [00:00,  2.36it/s]

P(y|x) 133:93500 loss=   0.236823753, acc=   0.953125000


102it [00:26,  3.84it/s]

P(y|x) 133:93600 loss=   0.179754660, acc=   0.984375000


202it [00:52,  3.85it/s]

P(y|x) 133:93700 loss=   0.286723733, acc=   0.937500000


302it [01:18,  3.86it/s]

P(y|x) 133:93800 loss=   0.302371651, acc=   0.937500000


402it [01:44,  3.87it/s]

P(y|x) 133:93900 loss=   0.278652012, acc=   0.921875000


502it [02:10,  3.86it/s]

P(y|x) 133:94000 loss=   0.238062724, acc=   0.953125000


602it [02:36,  3.88it/s]

P(y|x) 133:94100 loss=   0.293735415, acc=   0.937500000


702it [03:02,  3.88it/s]

P(y|x) 133:94200 loss=   0.220760256, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 133: Valid Loss 1.210947871208191, Valid Acc 0.6884999871253967


99it [00:25,  3.87it/s]

P(y|x) 134:94300 loss=   0.197221026, acc=   0.968750000


199it [00:51,  3.85it/s]

P(y|x) 134:94400 loss=   0.293961883, acc=   0.921875000


299it [01:17,  3.86it/s]

P(y|x) 134:94500 loss=   0.234085560, acc=   0.953125000


399it [01:43,  3.85it/s]

P(y|x) 134:94600 loss=   0.324150026, acc=   0.921875000


499it [02:09,  3.85it/s]

P(y|x) 134:94700 loss=   0.150529280, acc=   1.000000000


599it [02:35,  3.86it/s]

P(y|x) 134:94800 loss=   0.190998197, acc=   0.968750000


699it [03:01,  3.86it/s]

P(y|x) 134:94900 loss=   0.273633212, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 134: Valid Loss 1.2263542413711548, Valid Acc 0.6905999779701233


96it [00:25,  3.85it/s]

P(y|x) 135:95000 loss=   0.192845374, acc=   1.000000000


196it [00:51,  3.87it/s]

P(y|x) 135:95100 loss=   0.180384994, acc=   0.984375000


296it [01:17,  3.86it/s]

P(y|x) 135:95200 loss=   0.499784827, acc=   0.843750000


396it [01:42,  3.87it/s]

P(y|x) 135:95300 loss=   0.270053267, acc=   0.937500000


496it [02:08,  3.86it/s]

P(y|x) 135:95400 loss=   0.241354868, acc=   0.984375000


596it [02:34,  3.86it/s]

P(y|x) 135:95500 loss=   0.307393730, acc=   0.921875000


696it [03:00,  3.86it/s]

P(y|x) 135:95600 loss=   0.223662063, acc=   0.984375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 135: Valid Loss 1.246396541595459, Valid Acc 0.6769999861717224


93it [00:24,  3.86it/s]

P(y|x) 136:95700 loss=   0.268932194, acc=   0.968750000


193it [00:50,  3.86it/s]

P(y|x) 136:95800 loss=   0.258179456, acc=   0.953125000


293it [01:16,  3.87it/s]

P(y|x) 136:95900 loss=   0.242436618, acc=   0.968750000


393it [01:42,  3.86it/s]

P(y|x) 136:96000 loss=   0.170739338, acc=   0.984375000


493it [02:08,  3.87it/s]

P(y|x) 136:96100 loss=   0.235664308, acc=   0.953125000


593it [02:33,  3.87it/s]

P(y|x) 136:96200 loss=   0.277612656, acc=   0.953125000


693it [02:59,  3.87it/s]

P(y|x) 136:96300 loss=   0.325028509, acc=   0.921875000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 136: Valid Loss 1.249295711517334, Valid Acc 0.6858000159263611


90it [00:23,  3.86it/s]

P(y|x) 137:96400 loss=   0.186645269, acc=   0.984375000


190it [00:49,  3.86it/s]

P(y|x) 137:96500 loss=   0.224942416, acc=   0.968750000


290it [01:15,  3.85it/s]

P(y|x) 137:96600 loss=   0.210206896, acc=   0.984375000


390it [01:41,  3.86it/s]

P(y|x) 137:96700 loss=   0.249952808, acc=   0.968750000


490it [02:07,  3.86it/s]

P(y|x) 137:96800 loss=   0.270148307, acc=   0.937500000


590it [02:33,  3.85it/s]

P(y|x) 137:96900 loss=   0.209207460, acc=   0.953125000


690it [02:59,  3.87it/s]

P(y|x) 137:97000 loss=   0.234563634, acc=   0.937500000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 137: Valid Loss 1.2066709995269775, Valid Acc 0.692300021648407


87it [00:22,  3.86it/s]

P(y|x) 138:97100 loss=   0.198368356, acc=   0.984375000


187it [00:48,  3.86it/s]

P(y|x) 138:97200 loss=   0.179595828, acc=   0.984375000


287it [01:14,  3.85it/s]

P(y|x) 138:97300 loss=   0.256945640, acc=   0.953125000


387it [01:40,  3.86it/s]

P(y|x) 138:97400 loss=   0.135029793, acc=   0.984375000


487it [02:06,  3.86it/s]

P(y|x) 138:97500 loss=   0.150305450, acc=   1.000000000


587it [02:32,  3.86it/s]

P(y|x) 138:97600 loss=   0.265735060, acc=   0.984375000


687it [02:58,  3.87it/s]

P(y|x) 138:97700 loss=   0.301303029, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 138: Valid Loss 1.263466477394104, Valid Acc 0.6751000285148621


84it [00:22,  3.84it/s]

P(y|x) 139:97800 loss=   0.241264716, acc=   0.968750000


184it [00:48,  3.86it/s]

P(y|x) 139:97900 loss=   0.207120702, acc=   0.953125000


284it [01:14,  3.86it/s]

P(y|x) 139:98000 loss=   0.253136545, acc=   0.953125000


384it [01:39,  3.86it/s]

P(y|x) 139:98100 loss=   0.143156305, acc=   0.984375000


484it [02:05,  3.85it/s]

P(y|x) 139:98200 loss=   0.162247717, acc=   0.968750000


584it [02:31,  3.86it/s]

P(y|x) 139:98300 loss=   0.277460754, acc=   0.953125000


684it [02:57,  3.86it/s]

P(y|x) 139:98400 loss=   0.263182819, acc=   0.906250000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 139: Valid Loss 1.1794360876083374, Valid Acc 0.6937999725341797


81it [00:21,  3.86it/s]

P(y|x) 140:98500 loss=   0.193715602, acc=   0.984375000


181it [00:47,  3.84it/s]

P(y|x) 140:98600 loss=   0.219856277, acc=   0.953125000


281it [01:13,  3.86it/s]

P(y|x) 140:98700 loss=   0.286097914, acc=   0.953125000


381it [01:39,  3.86it/s]

P(y|x) 140:98800 loss=   0.264564574, acc=   0.968750000


480it [02:04,  3.87it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

78it [00:20,  3.93it/s]

P(y|x) 141:99200 loss=   0.137776792, acc=   1.000000000


178it [00:45,  3.92it/s]

P(y|x) 141:99300 loss=   0.236034930, acc=   0.984375000


278it [01:11,  3.91it/s]

P(y|x) 141:99400 loss=   0.165303484, acc=   0.984375000


378it [01:36,  3.89it/s]

P(y|x) 141:99500 loss=   0.288930058, acc=   0.968750000


478it [02:02,  3.89it/s]

P(y|x) 141:99600 loss=   0.383028418, acc=   0.906250000


578it [02:28,  3.88it/s]

P(y|x) 141:99700 loss=   0.241616175, acc=   0.953125000


678it [02:54,  3.87it/s]

P(y|x) 141:99800 loss=   0.158921465, acc=   0.984375000


703it [03:00,  3.89it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 141: Valid Loss 1.180078148841858, Valid Acc 0.6930999755859375


75it [00:19,  3.88it/s]

P(y|x) 142:99900 loss=   0.264173836, acc=   0.937500000


175it [00:45,  3.87it/s]

P(y|x) 142:100000 loss=   0.163841918, acc=   0.984375000


275it [01:11,  3.87it/s]

P(y|x) 142:100100 loss=   0.198771447, acc=   0.953125000


375it [01:37,  3.86it/s]

P(y|x) 142:100200 loss=   0.139695376, acc=   1.000000000


475it [02:03,  3.86it/s]

P(y|x) 142:100300 loss=   0.181187257, acc=   0.984375000


575it [02:29,  3.85it/s]

P(y|x) 142:100400 loss=   0.278378218, acc=   0.968750000


675it [02:54,  3.86it/s]

P(y|x) 142:100500 loss=   0.263117582, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 142: Valid Loss 1.240844488143921, Valid Acc 0.6830999851226807


72it [00:19,  3.83it/s]

P(y|x) 143:100600 loss=   0.166135252, acc=   0.984375000


172it [00:45,  3.85it/s]

P(y|x) 143:100700 loss=   0.191655800, acc=   0.953125000


272it [01:11,  3.86it/s]

P(y|x) 143:100800 loss=   0.250483096, acc=   0.968750000


372it [01:36,  3.86it/s]

P(y|x) 143:100900 loss=   0.110678248, acc=   0.984375000


472it [02:02,  3.87it/s]

P(y|x) 143:101000 loss=   0.275169969, acc=   0.937500000


572it [02:28,  3.87it/s]

P(y|x) 143:101100 loss=   0.162439838, acc=   1.000000000


672it [02:54,  3.87it/s]

P(y|x) 143:101200 loss=   0.179046199, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 143: Valid Loss 1.198093295097351, Valid Acc 0.6962000131607056


69it [00:18,  3.85it/s]

P(y|x) 144:101300 loss=   0.213925302, acc=   0.953125000


169it [00:44,  3.85it/s]

P(y|x) 144:101400 loss=   0.245969713, acc=   0.968750000


269it [01:10,  3.86it/s]

P(y|x) 144:101500 loss=   0.239191234, acc=   0.937500000


369it [01:36,  3.87it/s]

P(y|x) 144:101600 loss=   0.208716795, acc=   0.968750000


469it [02:01,  3.87it/s]

P(y|x) 144:101700 loss=   0.642289877, acc=   0.859375000


569it [02:27,  3.87it/s]

P(y|x) 144:101800 loss=   0.289018244, acc=   0.953125000


669it [02:53,  3.86it/s]

P(y|x) 144:101900 loss=   0.219135433, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 144: Valid Loss 1.1832245588302612, Valid Acc 0.6948000192642212


66it [00:17,  3.85it/s]

P(y|x) 145:102000 loss=   0.220892295, acc=   0.953125000


166it [00:43,  3.86it/s]

P(y|x) 145:102100 loss=   0.131647095, acc=   1.000000000


266it [01:09,  3.86it/s]

P(y|x) 145:102200 loss=   0.134172767, acc=   0.984375000


366it [01:35,  3.87it/s]

P(y|x) 145:102300 loss=   0.123647973, acc=   0.984375000


466it [02:01,  3.86it/s]

P(y|x) 145:102400 loss=   0.122533306, acc=   1.000000000


566it [02:26,  3.86it/s]

P(y|x) 145:102500 loss=   0.369029641, acc=   0.921875000


666it [02:52,  3.87it/s]

P(y|x) 145:102600 loss=   0.186812952, acc=   0.968750000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 145: Valid Loss 1.1711223125457764, Valid Acc 0.6955999732017517


63it [00:16,  3.86it/s]

P(y|x) 146:102700 loss=   0.261308551, acc=   0.953125000


163it [00:42,  3.86it/s]

P(y|x) 146:102800 loss=   0.264002651, acc=   0.984375000


263it [01:08,  3.86it/s]

P(y|x) 146:102900 loss=   0.202478230, acc=   0.968750000


363it [01:34,  3.84it/s]

P(y|x) 146:103000 loss=   0.142626241, acc=   0.984375000


463it [02:00,  3.85it/s]

P(y|x) 146:103100 loss=   0.134391874, acc=   1.000000000


563it [02:26,  3.87it/s]

P(y|x) 146:103200 loss=   0.156378001, acc=   0.984375000


663it [02:52,  3.87it/s]

P(y|x) 146:103300 loss=   0.208151817, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 146: Valid Loss 1.2542349100112915, Valid Acc 0.6765000224113464


60it [00:15,  3.86it/s]

P(y|x) 147:103400 loss=   0.254272223, acc=   0.937500000


160it [00:41,  3.87it/s]

P(y|x) 147:103500 loss=   0.314646125, acc=   0.953125000


260it [01:07,  3.87it/s]

P(y|x) 147:103600 loss=   0.252699852, acc=   0.968750000


360it [01:33,  3.87it/s]

P(y|x) 147:103700 loss=   0.118022025, acc=   1.000000000


460it [01:59,  3.88it/s]

P(y|x) 147:103800 loss=   0.090473577, acc=   1.000000000


560it [02:25,  3.86it/s]

P(y|x) 147:103900 loss=   0.331432402, acc=   0.953125000


660it [02:50,  3.87it/s]

P(y|x) 147:104000 loss=   0.169412524, acc=   1.000000000


703it [03:02,  3.86it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 147: Valid Loss 1.194793701171875, Valid Acc 0.6891999840736389


57it [00:15,  3.87it/s]

P(y|x) 148:104100 loss=   0.284904689, acc=   0.953125000


157it [00:40,  3.87it/s]

P(y|x) 148:104200 loss=   0.193758354, acc=   0.953125000


257it [01:06,  3.87it/s]

P(y|x) 148:104300 loss=   0.153410673, acc=   1.000000000


357it [01:32,  3.87it/s]

P(y|x) 148:104400 loss=   0.159325987, acc=   0.984375000


457it [01:58,  3.87it/s]

P(y|x) 148:104500 loss=   0.163751900, acc=   1.000000000


557it [02:24,  3.86it/s]

P(y|x) 148:104600 loss=   0.292095751, acc=   0.953125000


657it [02:50,  3.86it/s]

P(y|x) 148:104700 loss=   0.262178600, acc=   0.953125000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 148: Valid Loss 1.2738741636276245, Valid Acc 0.6761000156402588


54it [00:14,  3.85it/s]

P(y|x) 149:104800 loss=   0.203762323, acc=   0.968750000


154it [00:40,  3.86it/s]

P(y|x) 149:104900 loss=   0.230144486, acc=   0.953125000


254it [01:06,  3.86it/s]

P(y|x) 149:105000 loss=   0.126060158, acc=   0.984375000


354it [01:32,  3.86it/s]

P(y|x) 149:105100 loss=   0.335095048, acc=   0.906250000


454it [01:58,  3.87it/s]

P(y|x) 149:105200 loss=   0.234558403, acc=   0.953125000


554it [02:24,  3.86it/s]

P(y|x) 149:105300 loss=   0.207388088, acc=   0.968750000


654it [02:49,  3.86it/s]

P(y|x) 149:105400 loss=   0.207312077, acc=   0.984375000


703it [03:02,  3.85it/s]

ev:  test





ev:  train
ev:  valid
valid: Epoch 149: Valid Loss 1.2247169017791748, Valid Acc 0.6917999982833862
