In [13]:
# imports
import utils # from The Google Research Authors
import torch as t, torch.nn as nn, torch.nn.functional as tnnF, torch.distributions as tdist
from torch.utils.data import DataLoader, Dataset
import torchvision as tv, torchvision.transforms as tr
import os
import sys
import argparse
#import ipdb
import numpy as np
import wideresnet # from The Google Research Authors
import json

In [14]:
# Sampling
#from tqdm import tqdm
t.backends.cudnn.benchmark = True
t.backends.cudnn.enabled = True
seed = 1

# images RGB 32x32
im_sz = 32

In [15]:
# get random subset of data
class DataSubset(Dataset):
    def __init__(self, base_dataset, inds=None, size=-1):
        self.base_dataset = base_dataset
        if inds is None:
            inds = np.random.choice(list(range(len(base_dataset))), size, replace=False)
        self.inds = inds

    def __getitem__(self, index):
        base_ind = self.inds[index]
        return self.base_dataset[base_ind]

    def __len__(self):
        return len(self.inds)

In [16]:
# setup Wide_ResNet
# Uses The Google Research Authors, file wideresnet.py
class F(nn.Module):
    def __init__(self, depth=28, width=2, norm=None, dropout_rate=0.0, n_classes=10):
        super(F, self).__init__()
        self.f = wideresnet.Wide_ResNet(depth, width, norm=norm, dropout_rate=dropout_rate)
        self.energy_output = nn.Linear(self.f.last_dim, 1)
        self.class_output = nn.Linear(self.f.last_dim, n_classes)

    def classify(self, x):
        penult_z = self.f(x)
        return self.class_output(penult_z).squeeze()

In [17]:
# various utilities
def cycle(loader):
    while True:
        for data in loader:
            yield data

In [18]:
# Load in chosen dataset cifar10
def get_data(args):
    transform_train = tr.Compose(
        [tr.Pad(4, padding_mode="reflect"),
         tr.RandomCrop(im_sz),
         tr.RandomHorizontalFlip(),
         tr.ToTensor(),
         tr.Normalize((.5, .5, .5), (.5, .5, .5)),
         lambda x: x + args.sigma * t.randn_like(x)]
    )
    #transform_train = tr.Compose(
    #    [tr.ToTensor()]
    #)
    transform_test = tr.Compose(
        [tr.ToTensor(),
         tr.Normalize((.5, .5, .5), (.5, .5, .5)),
         lambda x: x + args.sigma * t.randn_like(x)]
    )
    def dataset_fn(train, transform):
        return tv.datasets.CIFAR10(root=args.data_root, transform=transform, download=True, train=train)

    # get all training inds
    full_train = dataset_fn(True, transform_train)
    all_inds = list(range(len(full_train)))
    # set seed
    np.random.seed(1234)
    # shuffle
    np.random.shuffle(all_inds)
    # seperate out validation set
    if args.n_valid is not None:
        valid_inds, train_inds = all_inds[:args.n_valid], all_inds[args.n_valid:]
    else:
        valid_inds, train_inds = [], all_inds
    train_inds = np.array(train_inds)
    train_labeled_inds = []
    other_inds = []
    train_labels = np.array([full_train[ind][1] for ind in train_inds])
    if args.labels_per_class > 0:
        for i in range(args.n_classes):
            print(i)
            train_labeled_inds.extend(train_inds[train_labels == i][:args.labels_per_class])
            other_inds.extend(train_inds[train_labels == i][args.labels_per_class:])
    else:
        train_labeled_inds = train_inds

    dset_train = DataSubset(
        dataset_fn(True, transform_train),
        inds=train_inds)
    dset_train_labeled = DataSubset(
        dataset_fn(True, transform_train),
        inds=train_labeled_inds)
    dset_valid = DataSubset(
        dataset_fn(True, transform_test),
        inds=valid_inds)
    dload_train = DataLoader(dset_train, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True)
    dload_train_labeled = DataLoader(dset_train_labeled, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True)
    dload_train_labeled = cycle(dload_train_labeled)
    dset_test = dataset_fn(False, transform_test)
    dload_valid = DataLoader(dset_valid, batch_size=100, shuffle=False, num_workers=4, drop_last=False)
    dload_test = DataLoader(dset_test, batch_size=100, shuffle=False, num_workers=4, drop_last=False)
    return dload_train, dload_train_labeled, dload_valid,dload_test

In [19]:
# calculate loss and accuracy for periodic printout
def eval_classification(f, dload, device):
    corrects, losses = [], []
    for x_p_d, y_p_d in dload:
        x_p_d, y_p_d = x_p_d.to(device), y_p_d.to(device)
        logits = f.classify(x_p_d)
        loss = nn.CrossEntropyLoss(reduce=False)(logits, y_p_d).cpu().numpy()
        losses.extend(loss)
        correct = (logits.max(1)[1] == y_p_d).float().cpu().numpy()
        corrects.extend(correct)
    loss = np.mean(losses)
    correct = np.mean(corrects)
    return correct, loss

In [20]:
# save checkpoint data
def checkpoint(f, opt, epoch_no, tag, args, device):
    f.cpu()
    ckpt_dict = {
        "model_state_dict": f.state_dict(),
        'optimizer_state_dict': opt.state_dict(),
        'epoch': epoch_no,
        #"replay_buffer": buffer
    }
    t.save(ckpt_dict, os.path.join(args.save_dir, tag))
    t.save(ckpt_dict, os.path.join(args.save_dir, "most_recent.pt"))
    f.to(device)

In [21]:
#Track loss for convergence
def var_tracker(filename,save_dir,epoch,loss,correct):
    if not os.path.isfile(os.path.join(args.save_dir,filename)):
        with open(os.path.join(args.save_dir,filename),'w') as f:
            f.write("Epoch,Loss,Acc\n")
            f.write("{},{},{}\n".format(epoch,loss,correct))
    else:
        with open(os.path.join(args.save_dir,filename),'a') as f:
            f.write("{},{},{}\n".format(epoch,loss,correct))

In [22]:
# main function for training
# Uses args from class below
def main(args):
    utils.makedirs(args.save_dir)
    with open(f'{args.save_dir}/params.txt', 'w') as f:
        json.dump(args.__dict__, f)
    if args.print_to_log:
        sys.stdout = open(f'{args.save_dir}/log.txt', 'w')

    t.manual_seed(seed)
    if t.cuda.is_available():
        t.cuda.manual_seed_all(seed)

    # datasets
    dload_train, dload_train_labeled, dload_valid, dload_test = get_data(args)

    device = t.device('cuda' if t.cuda.is_available() else 'cpu')

    # setup Wide_ResNet
    f = F(args.depth, args.width, args.norm, dropout_rate=args.dropout_rate, n_classes=args.n_classes)
    
    # push to GPU
    f = f.to(device)

    # optimizer
    params = f.class_output.parameters() if args.clf_only else f.parameters()
    optim = t.optim.Adam(params, lr=args.lr, betas=[.9, .999], weight_decay=args.weight_decay)

    # epoch_start
    epoch_start = 0
    
    # load checkpoint?
    if args.load_path:
        print(f"loading model from {args.load_path}")
        ckpt_dict = t.load(args.load_path)
        f.load_state_dict(ckpt_dict["model_state_dict"])
        optim.load_state_dict(ckpt_dict['optimizer_state_dict'])
        epoch_start = ckpt_dict['epoch']

    # push to GPU
    f = f.to(device)
    
    # Show train set loss/accuracy after reload
    f.eval()
    with t.no_grad():
        correct, loss = eval_classification(f, dload_train, device)
        print("Epoch {}: Train Loss {}, Train Acc {}".format(epoch_start, loss, correct))
    f.train()

    best_valid_acc = 0.0
    cur_iter = 0
    
    # loop over epochs
    for epoch in range(epoch_start, epoch_start + args.n_epochs):
        # loop over data in batches
        # x_p_d sample from dataset
        for i, (x_p_d, _) in enumerate(dload_train): #tqdm(enumerate(dload_train)):

            #print("x_p_d_shape",x_p_d.shape)
            x_p_d = x_p_d.to(device)
            x_lab, y_lab = dload_train_labeled.__next__()
            x_lab, y_lab = x_lab.to(device), y_lab.to(device)

            # initialize loss
            L = 0.
            
            # normal cross entropy loss function
            # maximize log p(y | x)
            logits = f.classify(x_lab)
            l_p_y_given_x = nn.CrossEntropyLoss()(logits, y_lab)
            if cur_iter % args.print_every == 0:
                acc = (logits.max(1)[1] == y_lab).float().mean()
                print('P(y|x) {}:{:>d} loss={:>14.9f}, acc={:>14.9f}'.format(epoch,
                                                                             cur_iter,
                                                                             l_p_y_given_x.item(),
                                                                             acc.item()))
            # add to loss
            L += l_p_y_given_x

            # break if the loss diverged
            if L.abs().item() > 1e8:
                print("Divergwence error")
                1/0

            # Optimize network using our loss function L
            optim.zero_grad()
            L.backward()
            optim.step()
            cur_iter += 1

        # do checkpointing
        if epoch % args.ckpt_every == 0:
            checkpoint(f, optim, epoch, f'ckpt_{epoch}.pt', args, device)

        # Print performance assesment 
        if epoch % args.eval_every == 0:
            f.eval()
            with t.no_grad():
                # train set
                correct, loss = eval_classification(f, dload_train, device)
                print("Epoch {}: Train Loss {}, Train Acc {}".format(epoch, loss, correct))
                var_tracker('train.csv',args.save_dir,epoch,loss,correct)

                # test set
                correct, loss = eval_classification(f, dload_test, device)
                print("Epoch {}: Test Loss {}, Test Acc {}".format(epoch, loss, correct))
                var_tracker('test.csv',args.save_dir,epoch,loss,correct)

                # validation set
                correct, loss = eval_classification(f, dload_valid, device)
                print("Epoch {}: Valid Loss {}, Valid Acc {}".format(epoch, loss, correct))
                var_tracker('valid.csv',args.save_dir,epoch,loss,correct)

            f.train()
            
        # do "last" checkpoint
        checkpoint(f, optim, epoch, "last_ckpt.pt", args, device)

In [23]:
# Setup parameters
# defaults for paper
# --lr .0001 --dataset cifar10 --optimizer adam --p_x_weight 1.0 --p_y_given_x_weight 1.0 
# --p_x_y_weight 0.0 --sigma .03 --width 10 --depth 28 --save_dir /YOUR/SAVE/DIR 
# --plot_uncond --warmup_iters 1000
#
class train_args():
    def __init__(self, param_dict):
        # set defaults
        self.dataset = "cifar10"
        self.n_classes = 10
        self.width = 10 # wide-resnet widen_factor
        self.depth = 28  # wide-resnet depth
        self.sigma = .03 # image transformation
        self.data_root = "../data" 
        # optimization
        self.lr = 1e-4
        self.clf_only = False #action="store_true", help="If set, then only train the classifier")
        self.labels_per_class = -1# help="number of labeled examples per class, if zero then use all labels")
        self.batch_size = 64
        self.n_epochs = 200
        # regularization
        self.dropout_rate = 0.0
        self.sigma = 3e-2 # help="stddev of gaussian noise to add to input, .03 works but .1 is more stable")
        self.weight_decay = 0.0
        # network
        self.norm = None # choices=[None, "norm", "batch", "instance", "layer", "act"], help="norm to add to weights, none works fine")
        # logging + evaluation
        self.save_dir = './experiment'
        self.ckpt_every = 10 # help="Epochs between checkpoint save")
        self.eval_every = 1 # help="Epochs between evaluation")
        self.print_every = 100 # help="Iterations between print")
        self.load_path = None # path for checkpoint to load
        self.print_to_log = False #", action="store_true", help="If true, directs std-out to log file")
        self.n_valid = 5000 # number of validation images
        
        # set from inline dict
        for key in param_dict:
            #print(key, '->', param_dict[key])
            setattr(self, key, param_dict[key])

In [24]:
# setup change from defaults
# For paper defaults
# Added best_valid_ckpt load
inline_parms = {"lr": .0001, "save_dir": './simple'} #, "load_path": './all1/last_ckpt.pt'} 

# instantiate
args = train_args(inline_parms)

# run
main(args)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
| Wide-Resnet 28x10




Epoch 0: Train Loss 2.3045055866241455, Train Acc 0.1001289114356041
P(y|x) 0:0 loss=   2.293083906, acc=   0.140625000
P(y|x) 0:100 loss=   2.067859650, acc=   0.250000000
P(y|x) 0:200 loss=   1.676035047, acc=   0.390625000
P(y|x) 0:300 loss=   1.938641429, acc=   0.359375000
P(y|x) 0:400 loss=   1.510731697, acc=   0.406250000
P(y|x) 0:500 loss=   1.555539608, acc=   0.468750000
P(y|x) 0:600 loss=   1.499374866, acc=   0.406250000
P(y|x) 0:700 loss=   1.326265335, acc=   0.484375000
Epoch 0: Train Loss 1.402660608291626, Train Acc 0.484375
Epoch 0: Test Loss 2.0056633949279785, Test Acc 0.323199987411499
Epoch 0: Valid Loss 2.0113980770111084, Valid Acc 0.3199999928474426
P(y|x) 1:800 loss=   1.211954117, acc=   0.562500000
P(y|x) 1:900 loss=   1.248008728, acc=   0.546875000
P(y|x) 1:1000 loss=   1.069668174, acc=   0.578125000
P(y|x) 1:1100 loss=   1.296680212, acc=   0.515625000
P(y|x) 1:1200 loss=   1.090669513, acc=   0.562500000
P(y|x) 1:1300 loss=   1.049031973, acc=   0.6562

Epoch 13: Valid Loss 2.5216879844665527, Valid Acc 0.6516000032424927
P(y|x) 14:9900 loss=   0.057797864, acc=   0.984375000
P(y|x) 14:10000 loss=   0.048307516, acc=   0.984375000
P(y|x) 14:10100 loss=   0.117119297, acc=   0.953125000
P(y|x) 14:10200 loss=   0.035069220, acc=   1.000000000
P(y|x) 14:10300 loss=   0.169159561, acc=   0.953125000
P(y|x) 14:10400 loss=   0.025823370, acc=   1.000000000
P(y|x) 14:10500 loss=   0.083008364, acc=   0.968750000
Epoch 14: Train Loss 0.06955934315919876, Train Acc 0.9755289554595947
Epoch 14: Test Loss 2.2111682891845703, Test Acc 0.6581000089645386
Epoch 14: Valid Loss 2.190364122390747, Valid Acc 0.6538000106811523
P(y|x) 15:10600 loss=   0.122984476, acc=   0.953125000
P(y|x) 15:10700 loss=   0.064904720, acc=   0.968750000
P(y|x) 15:10800 loss=   0.038121834, acc=   0.984375000
P(y|x) 15:10900 loss=   0.184650540, acc=   0.921875000
P(y|x) 15:11000 loss=   0.132802397, acc=   0.968750000
P(y|x) 15:11100 loss=   0.142477557, acc=   0.93750

P(y|x) 27:19600 loss=   0.010567948, acc=   1.000000000
Epoch 27: Train Loss 0.03227405995130539, Train Acc 0.9888868927955627
Epoch 27: Test Loss 2.2982265949249268, Test Acc 0.6820999979972839
Epoch 27: Valid Loss 2.2344906330108643, Valid Acc 0.6868000030517578
P(y|x) 28:19700 loss=   0.029833689, acc=   0.984375000
P(y|x) 28:19800 loss=   0.025109932, acc=   1.000000000
P(y|x) 28:19900 loss=   0.294038236, acc=   0.937500000
P(y|x) 28:20000 loss=   0.053386912, acc=   0.984375000
P(y|x) 28:20100 loss=   0.146725059, acc=   0.968750000
P(y|x) 28:20200 loss=   0.099382885, acc=   0.984375000
P(y|x) 28:20300 loss=   0.055488899, acc=   0.984375000
Epoch 28: Train Loss 0.05834679678082466, Train Acc 0.9803298115730286
Epoch 28: Test Loss 3.555069923400879, Test Acc 0.6028000116348267
Epoch 28: Valid Loss 3.518015146255493, Valid Acc 0.6051999926567078
P(y|x) 29:20400 loss=   0.047374800, acc=   0.984375000
P(y|x) 29:20500 loss=   0.038620740, acc=   0.984375000
P(y|x) 29:20600 loss=   

P(y|x) 41:29100 loss=   0.130228266, acc=   0.984375000
P(y|x) 41:29200 loss=   0.073913738, acc=   0.968750000
P(y|x) 41:29300 loss=   0.005908385, acc=   1.000000000
P(y|x) 41:29400 loss=   0.070913754, acc=   0.968750000
P(y|x) 41:29500 loss=   0.062868133, acc=   0.968750000
Epoch 41: Train Loss 0.04639952629804611, Train Acc 0.9847972989082336
Epoch 41: Test Loss 4.586594581604004, Test Acc 0.5152999758720398
Epoch 41: Valid Loss 4.596183776855469, Valid Acc 0.5072000026702881
P(y|x) 42:29600 loss=   0.003638335, acc=   1.000000000
P(y|x) 42:29700 loss=   0.064792350, acc=   0.984375000
P(y|x) 42:29800 loss=   0.000372544, acc=   1.000000000
P(y|x) 42:29900 loss=   0.012418598, acc=   1.000000000
P(y|x) 42:30000 loss=   0.004446760, acc=   1.000000000
P(y|x) 42:30100 loss=   0.003569633, acc=   1.000000000
P(y|x) 42:30200 loss=   0.074992225, acc=   0.984375000
Epoch 42: Train Loss 0.025889698415994644, Train Acc 0.9919096827507019
Epoch 42: Test Loss 3.346628427505493, Test Acc 0

Epoch 54: Valid Loss 3.0502209663391113, Valid Acc 0.5956000089645386
P(y|x) 55:38700 loss=   0.134048879, acc=   0.968750000
P(y|x) 55:38800 loss=   0.077293016, acc=   0.984375000
P(y|x) 55:38900 loss=   0.266104013, acc=   0.984375000
P(y|x) 55:39000 loss=   0.071012013, acc=   0.984375000
P(y|x) 55:39100 loss=   0.001402840, acc=   1.000000000
P(y|x) 55:39200 loss=   0.001939088, acc=   1.000000000
P(y|x) 55:39300 loss=   0.006458580, acc=   1.000000000
Epoch 55: Train Loss 0.03360585868358612, Train Acc 0.9896426200866699
Epoch 55: Test Loss 3.1071858406066895, Test Acc 0.6699000000953674
Epoch 55: Valid Loss 3.0565309524536133, Valid Acc 0.6832000017166138
P(y|x) 56:39400 loss=   0.009987354, acc=   1.000000000
P(y|x) 56:39500 loss=   0.040996313, acc=   0.984375000
P(y|x) 56:39600 loss=   0.000305623, acc=   1.000000000
P(y|x) 56:39700 loss=   0.026022308, acc=   0.984375000
P(y|x) 56:39800 loss=   0.087051168, acc=   0.984375000
P(y|x) 56:39900 loss=   0.005958214, acc=   1.000

P(y|x) 68:48400 loss=   0.023784935, acc=   0.984375000
P(y|x) 68:48500 loss=   0.018069237, acc=   1.000000000
Epoch 68: Train Loss 0.02005959115922451, Train Acc 0.9933988451957703
Epoch 68: Test Loss 3.451092481613159, Test Acc 0.6309000253677368
Epoch 68: Valid Loss 3.3839592933654785, Valid Acc 0.6266000270843506
P(y|x) 69:48600 loss=   0.003205925, acc=   1.000000000
P(y|x) 69:48700 loss=   0.006431654, acc=   1.000000000
P(y|x) 69:48800 loss=   0.002412558, acc=   1.000000000
P(y|x) 69:48900 loss=   0.012527369, acc=   1.000000000
P(y|x) 69:49000 loss=   0.001886152, acc=   1.000000000
P(y|x) 69:49100 loss=   0.009852365, acc=   1.000000000
P(y|x) 69:49200 loss=   0.011054844, acc=   1.000000000
Epoch 69: Train Loss 0.005109682679176331, Train Acc 0.9980663061141968
Epoch 69: Test Loss 3.153864622116089, Test Acc 0.6866999864578247
Epoch 69: Valid Loss 3.089103937149048, Valid Acc 0.6851999759674072
P(y|x) 70:49300 loss=   0.059477210, acc=   0.984375000
P(y|x) 70:49400 loss=   

P(y|x) 82:57900 loss=   0.007913366, acc=   1.000000000
P(y|x) 82:58000 loss=   0.039095238, acc=   0.968750000
P(y|x) 82:58100 loss=   0.087493829, acc=   0.968750000
P(y|x) 82:58200 loss=   0.063651994, acc=   0.968750000
P(y|x) 82:58300 loss=   0.035328373, acc=   0.984375000
Epoch 82: Train Loss 0.005447121802717447, Train Acc 0.9983552694320679
Epoch 82: Test Loss 3.40054988861084, Test Acc 0.6524999737739563
Epoch 82: Valid Loss 3.2754275798797607, Valid Acc 0.6531999707221985
P(y|x) 83:58400 loss=   0.001003176, acc=   1.000000000
P(y|x) 83:58500 loss=   0.000084028, acc=   1.000000000
P(y|x) 83:58600 loss=   0.000178158, acc=   1.000000000
P(y|x) 83:58700 loss=   0.096319124, acc=   0.968750000
P(y|x) 83:58800 loss=   0.002905890, acc=   1.000000000
P(y|x) 83:58900 loss=   0.014448121, acc=   0.984375000
P(y|x) 83:59000 loss=   0.000111654, acc=   1.000000000
Epoch 83: Train Loss 0.013431290164589882, Train Acc 0.9957103729248047
Epoch 83: Test Loss 3.4359867572784424, Test Acc

Epoch 95: Valid Loss 4.776041507720947, Valid Acc 0.614799976348877
P(y|x) 96:67500 loss=   0.043493375, acc=   0.984375000
P(y|x) 96:67600 loss=   0.001173228, acc=   1.000000000
P(y|x) 96:67700 loss=   0.002660766, acc=   1.000000000
P(y|x) 96:67800 loss=   0.001746073, acc=   1.000000000
P(y|x) 96:67900 loss=   0.001993880, acc=   1.000000000
P(y|x) 96:68000 loss=   0.000516295, acc=   1.000000000
P(y|x) 96:68100 loss=   0.037223309, acc=   0.984375000
Epoch 96: Train Loss 0.024189596995711327, Train Acc 0.9935099482536316
Epoch 96: Test Loss 8.502336502075195, Test Acc 0.48890000581741333
Epoch 96: Valid Loss 8.558280944824219, Valid Acc 0.4968000054359436
P(y|x) 97:68200 loss=   0.140289441, acc=   0.984375000
P(y|x) 97:68300 loss=   0.018375635, acc=   0.984375000
P(y|x) 97:68400 loss=   0.032949373, acc=   0.984375000
P(y|x) 97:68500 loss=   0.075421646, acc=   0.984375000
P(y|x) 97:68600 loss=   0.000077650, acc=   1.000000000
P(y|x) 97:68700 loss=   0.177434802, acc=   0.95312

P(y|x) 109:77100 loss=   0.027418941, acc=   0.984375000
P(y|x) 109:77200 loss=   0.003262654, acc=   1.000000000
P(y|x) 109:77300 loss=   0.005221516, acc=   1.000000000
Epoch 109: Train Loss 0.013413704931735992, Train Acc 0.9957992434501648
Epoch 109: Test Loss 4.065317630767822, Test Acc 0.6340000033378601
Epoch 109: Valid Loss 4.242635250091553, Valid Acc 0.6273999810218811
P(y|x) 110:77400 loss=   0.000105143, acc=   1.000000000
P(y|x) 110:77500 loss=   0.000361085, acc=   1.000000000
P(y|x) 110:77600 loss=   0.014255390, acc=   0.984375000
P(y|x) 110:77700 loss=   0.035071164, acc=   0.984375000
P(y|x) 110:77800 loss=   0.007591426, acc=   1.000000000
P(y|x) 110:77900 loss=   0.000252992, acc=   1.000000000
P(y|x) 110:78000 loss=   0.008924499, acc=   1.000000000
Epoch 110: Train Loss 0.04732212796807289, Train Acc 0.9871976971626282
Epoch 110: Test Loss 6.664834499359131, Test Acc 0.578000009059906
Epoch 110: Valid Loss 6.843672752380371, Valid Acc 0.579200029373169
P(y|x) 111:

Epoch 122: Valid Loss 5.074207782745361, Valid Acc 0.5788000226020813
P(y|x) 123:86500 loss=   0.001362100, acc=   1.000000000
P(y|x) 123:86600 loss=   0.003524482, acc=   1.000000000
P(y|x) 123:86700 loss=   0.019817352, acc=   0.984375000
P(y|x) 123:86800 loss=   0.006163724, acc=   1.000000000
P(y|x) 123:86900 loss=   0.004398733, acc=   1.000000000
P(y|x) 123:87000 loss=   0.000097975, acc=   1.000000000
P(y|x) 123:87100 loss=   0.000835583, acc=   1.000000000
Epoch 123: Train Loss 0.026217903941869736, Train Acc 0.9916651844978333
Epoch 123: Test Loss 3.970231533050537, Test Acc 0.5946000218391418
Epoch 123: Valid Loss 3.907576560974121, Valid Acc 0.5920000076293945
P(y|x) 124:87200 loss=   0.015395455, acc=   1.000000000
P(y|x) 124:87300 loss=   0.000307992, acc=   1.000000000
P(y|x) 124:87400 loss=   0.032706589, acc=   0.984375000
P(y|x) 124:87500 loss=   0.105894536, acc=   0.984375000
P(y|x) 124:87600 loss=   0.000140175, acc=   1.000000000
P(y|x) 124:87700 loss=   0.08347998

P(y|x) 136:96000 loss=   0.046924815, acc=   0.968750000
P(y|x) 136:96100 loss=   0.000030622, acc=   1.000000000
P(y|x) 136:96200 loss=   0.004213035, acc=   1.000000000
P(y|x) 136:96300 loss=   0.073786914, acc=   0.984375000
Epoch 136: Train Loss 0.022557105869054794, Train Acc 0.9933988451957703
Epoch 136: Test Loss 5.747498989105225, Test Acc 0.5803999900817871
Epoch 136: Valid Loss 5.816659450531006, Valid Acc 0.5763999819755554
P(y|x) 137:96400 loss=   0.021820098, acc=   0.984375000
P(y|x) 137:96500 loss=   0.006935552, acc=   1.000000000
P(y|x) 137:96600 loss=   0.000325993, acc=   1.000000000
P(y|x) 137:96700 loss=   0.000053599, acc=   1.000000000
P(y|x) 137:96800 loss=   0.000906482, acc=   1.000000000
P(y|x) 137:96900 loss=   0.000181414, acc=   1.000000000
P(y|x) 137:97000 loss=   0.000542641, acc=   1.000000000
Epoch 137: Train Loss 0.011776795610785484, Train Acc 0.9961104393005371
Epoch 137: Test Loss 4.594161510467529, Test Acc 0.6115999817848206
Epoch 137: Valid Loss

Epoch 149: Train Loss 0.005150127690285444, Train Acc 0.998444139957428
Epoch 149: Test Loss 4.241751194000244, Test Acc 0.670799970626831
Epoch 149: Valid Loss 4.385958671569824, Valid Acc 0.6657999753952026
P(y|x) 150:105500 loss=   0.000373960, acc=   1.000000000
P(y|x) 150:105600 loss=   0.001749158, acc=   1.000000000
P(y|x) 150:105700 loss=   0.000978887, acc=   1.000000000
P(y|x) 150:105800 loss=   0.053050064, acc=   0.984375000
P(y|x) 150:105900 loss=   0.004938304, acc=   1.000000000
P(y|x) 150:106000 loss=   0.003539026, acc=   1.000000000
P(y|x) 150:106100 loss=   0.120416865, acc=   0.984375000
Epoch 150: Train Loss 0.03484192490577698, Train Acc 0.9909317493438721
Epoch 150: Test Loss 6.325661659240723, Test Acc 0.628600001335144
Epoch 150: Valid Loss 6.551670551300049, Valid Acc 0.6222000122070312
P(y|x) 151:106200 loss=   0.000066444, acc=   1.000000000
P(y|x) 151:106300 loss=   0.000554815, acc=   1.000000000
P(y|x) 151:106400 loss=   0.000446662, acc=   1.000000000
P(

P(y|x) 163:114600 loss=   0.010846555, acc=   1.000000000
P(y|x) 163:114700 loss=   0.000249162, acc=   1.000000000
P(y|x) 163:114800 loss=   0.000128388, acc=   1.000000000
P(y|x) 163:114900 loss=   0.003728703, acc=   1.000000000
P(y|x) 163:115000 loss=   0.000006467, acc=   1.000000000
P(y|x) 163:115100 loss=   0.001447782, acc=   1.000000000
P(y|x) 163:115200 loss=   0.076747775, acc=   0.984375000
Epoch 163: Train Loss 0.014874996617436409, Train Acc 0.9955769777297974
Epoch 163: Test Loss 8.242241859436035, Test Acc 0.5074999928474426
Epoch 163: Valid Loss 8.062402725219727, Valid Acc 0.5123999714851379
P(y|x) 164:115300 loss=   0.005738631, acc=   1.000000000
P(y|x) 164:115400 loss=   0.001613028, acc=   1.000000000
P(y|x) 164:115500 loss=   0.002645999, acc=   1.000000000
P(y|x) 164:115600 loss=   0.065442622, acc=   0.984375000
P(y|x) 164:115700 loss=   0.035587966, acc=   0.968750000
P(y|x) 164:115800 loss=   0.000703901, acc=   1.000000000
P(y|x) 164:115900 loss=   0.0039419

P(y|x) 176:124100 loss=   0.134173796, acc=   0.984375000
P(y|x) 176:124200 loss=   0.000453323, acc=   1.000000000
P(y|x) 176:124300 loss=   0.126074463, acc=   0.984375000
P(y|x) 176:124400 loss=   0.004893824, acc=   1.000000000
Epoch 176: Train Loss 0.02050800435245037, Train Acc 0.9938877820968628
Epoch 176: Test Loss 8.68869400024414, Test Acc 0.535099983215332
Epoch 176: Valid Loss 8.687908172607422, Valid Acc 0.5242000222206116
P(y|x) 177:124500 loss=   0.008306772, acc=   1.000000000
P(y|x) 177:124600 loss=   0.000024915, acc=   1.000000000
P(y|x) 177:124700 loss=   0.001394853, acc=   1.000000000
P(y|x) 177:124800 loss=   0.000016779, acc=   1.000000000
P(y|x) 177:124900 loss=   0.000010967, acc=   1.000000000
P(y|x) 177:125000 loss=   0.021097347, acc=   0.984375000
P(y|x) 177:125100 loss=   0.193116024, acc=   0.984375000
Epoch 177: Train Loss 0.015515586361289024, Train Acc 0.9955547451972961
Epoch 177: Test Loss 13.509988784790039, Test Acc 0.487199991941452
Epoch 177: Va

P(y|x) 189:133500 loss=   0.000145659, acc=   1.000000000
Epoch 189: Train Loss 0.032819442451000214, Train Acc 0.9916874170303345
Epoch 189: Test Loss 7.4201812744140625, Test Acc 0.5756999850273132
Epoch 189: Valid Loss 7.58468770980835, Valid Acc 0.5735999941825867
P(y|x) 190:133600 loss=   0.007489994, acc=   1.000000000
P(y|x) 190:133700 loss=   0.004062906, acc=   1.000000000
P(y|x) 190:133800 loss=   0.000017554, acc=   1.000000000
P(y|x) 190:133900 loss=   0.016912729, acc=   1.000000000
P(y|x) 190:134000 loss=   0.012678772, acc=   1.000000000
P(y|x) 190:134100 loss=   0.001039118, acc=   1.000000000
P(y|x) 190:134200 loss=   0.000015184, acc=   1.000000000
Epoch 190: Train Loss 0.01818772964179516, Train Acc 0.9948657751083374
Epoch 190: Test Loss 14.190863609313965, Test Acc 0.3433000147342682
Epoch 190: Valid Loss 14.099028587341309, Valid Acc 0.35100001096725464
P(y|x) 191:134300 loss=   0.000001520, acc=   1.000000000
P(y|x) 191:134400 loss=   0.000011399, acc=   1.000000